While reading the Word file element by element using the Apache POI library, I need to have a track of current page number and current line number where the element is at? I have used the below code to get element by element:
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.poi.xwpf.usermodel.*;
public class ReadWordFile {
public static void main(String[] args) {
try {
// Open the Word file using an InputStream
FileInputStream fis = new FileInputStream("file.docx");
// Create an XWPFDocument object to represent the Word file
XWPFDocument document = new XWPFDocument(fis);
// Iterate through the elements in the Word file
for (IBodyElement element : document.getBodyElements()) {
if (element instanceof XWPFParagraph) {
// If the element is a paragraph, retrieve the text
XWPFParagraph paragraph = (XWPFParagraph) element;
String text = paragraph.getText();
System.out.println(text);
} else if (element instanceof XWPFTable) {
// If the element is a table, iterate through the rows and cells
XWPFTable table = (XWPFTable) element;
for (XWPFTableRow row : table.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
String text = cell.getText();
System.out.println(text);
}
}
} else if (element instanceof XWPFRun) {
// If the element is an image, retrieve it and save it to a file
XWPFRun run = (XWPFRun) element;
if (run.getEmbeddedPictures() != null && run.getEmbeddedPictures().size() > 0) {
XWPFPicture picture = run.getEmbeddedPictures().get(0);
byte[] data = picture.getPictureData().getData();
}
}
}
// Close the input stream
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}