Commit 2348efb7 authored by petter's avatar petter

Tesseract 3.03 changes

git-svn-id: https://www2.in.tum.de/repos/ttt/trunk@161 0463f305-d864-43cb-8a47-61cf597d4139
parent 2d8ad896
......@@ -808,7 +808,7 @@ public class Index {
// write input for Optical Character Recognition
if ((mode & ScriptCreator.OCR_OPTIMIZED) != 0){
String filename = scriptCreator.writeOCRScreenshot(i, screenshot);
readSearchBaseFromHOCRFile(filename+".hocr.html", i);
readSearchBaseFromHOCRFile(filename+".hocr.hocr", i);
// cleanup mess
{
File f = new File(filename);
......
......@@ -161,12 +161,12 @@ public class hOCRHandler extends DefaultHandler {
// reset searchtext
searchText = null;
String[] coords = attrs.getValue("title").substring(5).split(" ");
String[] coords = attrs.getValue("title").substring(5).split("[ ;]");
left = Integer.parseInt(coords[0]);
top = Integer.parseInt(coords[1]);
right = Integer.parseInt(coords[2]);
bottom = Integer.parseInt(coords[3]);
System.out.println("Durch!"+left+"/q"+top);
//System.out.println("Durch!"+left+"/q"+top);
}
if (localName.equals("div") && attrs.getValue("class").equals("ocr_page")) {
......@@ -207,4 +207,4 @@ public class hOCRHandler extends DefaultHandler {
}
}
}
}
\ No newline at end of file
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment