0

I have huge xmls as a result I have lot of xpaths to pull out the entries from the xml.So I am trying to spawn multiple threads so that each xpath can get evaluated in a different thread.But I am getting errors below is the code snippet which could give a fair idea, I have used a very small xml here for brevity purpose.I am creating 3 threads and queueing in 10 tasks.

import java.io.File;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import com.ximpleware.AutoPilot;
import com.ximpleware.EOFException;
import com.ximpleware.EncodingException;
import com.ximpleware.EntityException;
import com.ximpleware.ParseException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;

public class MultiThread {
    public static void main(String args[]) throws InterruptedException, ExecutionException, EncodingException, EOFException, EntityException, ParseException
    {
        String str="<library><booked>book</booked> <book id=\"1\"> <title>Googled By God</title>  </book> </library>";
        File f = new File("/home/cloudera/wos.xml");
        byte[] ba =null;;
        ExecutorService executor = Executors.newFixedThreadPool(3);
        List<Task> extractorTasks = new ArrayList<Task>();
        VTDGen vg = new VTDGen();
        vg.setDoc(str.getBytes());
        vg.parse(false);


        //add 10 tasks
        for(int i=0;i<10;i++)
        {

            Task d = new Task(str.getBytes(),vg,"/library/book/title");
            extractorTasks.add(d);
        }

        List<Future<String>> output = executor.invokeAll(extractorTasks);
        executor.shutdown();    
    }
}
class Task implements Callable<String> {

    VTDGen vg = null;
    String xpath = "";
    byte [] ba=null;
    AutoPilot ap = null;
    Task(byte[] _ba,VTDGen _vg,String _xpath)
    {
        ba = _ba;
        vg = _vg;
        xpath = _xpath;
    }
    public String call() throws Exception 
    {

        String title = "";
        try 
        {
            /* if we uncomment below 3 lines, all works well, thats becuase we are reparsing the whole document*/
            //vg = new VTDGen();
            //vg.setDoc(ba);
            //vg.parse(false);

            VTDNav vn = vg.getNav();
            ap = new AutoPilot(vn);
            ap.selectXPath(xpath);

            //Get all the titles and print each of those
            while(ap.evalXPath() != -1)
            {
                //getText will return the index of  the VTDRecord
                int titleIndex = vn.getText();
                //Get the text of the VTDRecord
                title = vn.toNormalizedString(titleIndex);
                System.out.println("Title is "+title);
            }

            vn.toElement(VTDNav.ROOT);

        }  
        catch (Exception e) {
            e.printStackTrace();
        }

        return title;
    }

}
BJC
  • 491
  • 3
  • 21
  • 1
    What's the problem? – xingbin Feb 03 '18 at 03:56
  • I am running into exceptions like java.lang.IllegalArgumentException at com.ximpleware.UniByteBuffer.(UniByteBuffer.java:32) at com.ximpleware.VTDGen.getNav(VTDGen.java:1745) – BJC Feb 03 '18 at 04:12
  • Please read [How do I ask a good question?](http://stackoverflow.com/help/how-to-ask) before attempting to ask more questions. –  Feb 03 '18 at 05:28

1 Answers1

1

I was able to find a fix.I am Storing VTDNav in a variable and passing the "duplicate" of it to each task. The GetNav() call cleans internal state which perhaps also results in invalidating of VTDnav, so keeping a copy of the navigator and passing the duplicate of navigator did the trick.

import java.io.File;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import com.ximpleware.AutoPilot;
import com.ximpleware.EOFException;
import com.ximpleware.EncodingException;
import com.ximpleware.EntityException;
import com.ximpleware.ParseException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;

public class MultiThread {
    public static void main(String args[]) throws InterruptedException, ExecutionException, EncodingException, EOFException, EntityException, ParseException
    {
        String str="<library><booked>book</booked> <book id=\"1\"> <title>Googled By God</title>  </book> </library>";
        File f = new File("/home/cloudera/wos.xml");
        byte[] ba =null;;
        ExecutorService executor = Executors.newFixedThreadPool(5);
        List<Task> extractorTasks = new ArrayList<Task>();
        VTDGen vg = new VTDGen();
        vg.setDoc(str.getBytes());
        vg.parse(false);

        //The GetNav() call cleans internal state , so keep a copy of VTDNav
        VTDNav vn = vg.getNav();

        for(int i=0;i<100;i++)
        {
            //pass the duplicates of navigator
            Task d = new Task(str.getBytes(),vn.duplicateNav(),"/library/book/title");
            extractorTasks.add(d);
        }

        List<Future<String>> output = executor.invokeAll(extractorTasks);
        executor.shutdown();    
    }
}
class Task implements Callable<String> {

    VTDGen vg = null;
    String xpath = "";
    byte [] ba=null;
    VTDNav vn = null;
    AutoPilot ap = null;
    Task(byte[] _ba,VTDNav _vn,String _xpath)
    {
        ba = _ba;
        vn = _vn;
        xpath = _xpath;
    }
    public String call() throws Exception 
    {

        String title = "";
        try 
        {
            ap = new AutoPilot(vn);
            //Thread.sleep(500);
            ap.selectXPath(xpath);

            //Get all the titles and print each of those
            while(ap.evalXPath() != -1)
            {
                //getText will return the index of  the VTDRecord
                int titleIndex = vn.getText();
                //Get the text of the VTDRecord
                title = vn.toNormalizedString(titleIndex);
                System.out.println("Title is "+title);
            }

            //if(vn.toElement(VTDNav.ROOT) == true)
            //  System.out.println("to element failed");

        }  
        catch (Exception e) {
            e.printStackTrace();
            System.out.println("Message is "+e.getMessage());
        }

        return title;
    }

}
BJC
  • 491
  • 3
  • 21
  • That is the right fix for your case... I must point out that I was asked once to share xpath object for the same xpath between multiple threads... my answer was to compile the same xpath expression into multiple autoPilot object.... do not share them at all.... hopefully you don't have this issue... – vtd-xml-author Feb 03 '18 at 07:36