Commit 2348efb7 authored by petter's avatar petter

Tesseract 3.03 changes

git-svn-id: https://www2.in.tum.de/repos/ttt/trunk@161 0463f305-d864-43cb-8a47-61cf597d4139
parent 2d8ad896
...@@ -808,7 +808,7 @@ public class Index { ...@@ -808,7 +808,7 @@ public class Index {
// write input for Optical Character Recognition // write input for Optical Character Recognition
if ((mode & ScriptCreator.OCR_OPTIMIZED) != 0){ if ((mode & ScriptCreator.OCR_OPTIMIZED) != 0){
String filename = scriptCreator.writeOCRScreenshot(i, screenshot); String filename = scriptCreator.writeOCRScreenshot(i, screenshot);
readSearchBaseFromHOCRFile(filename+".hocr.html", i); readSearchBaseFromHOCRFile(filename+".hocr.hocr", i);
// cleanup mess // cleanup mess
{ {
File f = new File(filename); File f = new File(filename);
......
...@@ -161,12 +161,12 @@ public class hOCRHandler extends DefaultHandler { ...@@ -161,12 +161,12 @@ public class hOCRHandler extends DefaultHandler {
// reset searchtext // reset searchtext
searchText = null; searchText = null;
String[] coords = attrs.getValue("title").substring(5).split(" "); String[] coords = attrs.getValue("title").substring(5).split("[ ;]");
left = Integer.parseInt(coords[0]); left = Integer.parseInt(coords[0]);
top = Integer.parseInt(coords[1]); top = Integer.parseInt(coords[1]);
right = Integer.parseInt(coords[2]); right = Integer.parseInt(coords[2]);
bottom = Integer.parseInt(coords[3]); bottom = Integer.parseInt(coords[3]);
System.out.println("Durch!"+left+"/q"+top); //System.out.println("Durch!"+left+"/q"+top);
} }
if (localName.equals("div") && attrs.getValue("class").equals("ocr_page")) { if (localName.equals("div") && attrs.getValue("class").equals("ocr_page")) {
...@@ -207,4 +207,4 @@ public class hOCRHandler extends DefaultHandler { ...@@ -207,4 +207,4 @@ public class hOCRHandler extends DefaultHandler {
} }
} }
} }
} }
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment