My aim is to load a document from a web server and then parse its DOM for specific content. Loading the DOM is my problem.
I am trying to use a javafx.scene.web.WebEngine
as this seems as if it should be able to do all the necessary mechanics, including javascript execution, which may affect the final DOM.
When loading a document, it appears to get stuck in the RUNNING
state and never reaches the SUCCEEDED
state, which I believe is required before accessing the DOM from WebEngine.getDocument()
.
This occurs whether loading from a URL or literal content (as used in this minimal example).
Can anyone see what I’m doing wrong, or misunderstanding?
Thanks in advance for any help.
import java.util.concurrent.ExecutionException;
import org.w3c.dom.Document;
import javafx.application.Platform;
import javafx.concurrent.Task;
import javafx.concurrent.Worker;
import javafx.embed.swing.JFXPanel;
import javafx.scene.web.WebEngine;
public class WebEngineProblem {
private static Task<WebEngine> getEngineTask() {
Task<WebEngine> task = new Task<>() {
@Override
protected WebEngine call() throws Exception {
WebEngine webEngine = new WebEngine();
final Worker<Void> loadWorker = webEngine.getLoadWorker();
loadWorker.stateProperty().addListener((obs, oldValue, newValue) -> {
System.out.println("state:" + newValue);
if (newValue == State.SUCCEEDED) {
System.out.println("finished loading");
}
});
webEngine.loadContent("<!DOCTYPE html>\r\n" + "<html>\r\n" + "<head>\r\n" + "<meta charset=\"UTF-8\">\r\n"
+ "<title>Content Title</title>\r\n" + "</head>\r\n" + "<body>\r\n" + "<p>Body</p>\r\n" + "</body>\r\n"
+ "</html>\r\n");
State priorState = State.CANCELLED; //should never be CANCELLED
double priorWork = Double.NaN;
while (loadWorker.isRunning()) {
final double workDone = loadWorker.getWorkDone();
if (loadWorker.getState() != priorState || priorWork != workDone) {
priorState = loadWorker.stateProperty().getValue();
priorWork = workDone;
System.out.println(priorState + " " + priorWork + "/" + loadWorker.getTotalWork());
}
Thread.sleep(1000);
}
return webEngine;
}
};
return task;
}
public static void main(String[] args) {
new JFXPanel(); // Initialise the JavaFx Platform
WebEngine engine = null;
Task<WebEngine> task = getEngineTask();
try {
Platform.runLater(task);
Thread.sleep(1000);
engine = task.get(); // Never completes as always RUNNING
}
catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
}
// This code is never reached as the content never completes loading
// It would fail as it's not on the FX thread.
Document doc = engine.getDocument();
String content = doc.getTextContent();
System.out.println(content);
}
}