I want the coordinates of each line in a page of a PDF using PDFBox. I am getting character level information but unable to get line coordinates.
Following is my code:
public class PDFFontExtractor extends PDFTextStripper {
public PDFFontExtractor() throws IOException {
super();
}
@Override
protected void writeString(String str, List<TextPosition> textPositions) throws IOException {
System.out.println(str);
for (TextPosition text : textPositions) {
System.out.println(text.getFont().getName());
System.out.println(text.getFontSizeInPt());
}
}
public static void main(String[] args) {
File file = new File("/home/neha/Downloads/docs/General.pdf");
try {
PDDocument document = PDDocument.load(file);
PDFFontExtractor textStripper = new PDFFontExtractor();
textStripper.setSortByPosition(true);
textStripper.writeText(document, NullWriter.NULL_WRITER);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}