0

I am using below code to process the big CSV file, it will split the CSV for every 2000 records and schedules a thread to process the 2000 records, at the end, after all threads finishes it execution, the future is taken.

I am facing the following problem, When i have CSV of 2000 records: 1 thread takes 32 sec oly but CSV 0f 8000 records: 4 thread takes around 1.3 mins. Here i can see all the threads started at same time and finishes execution at same time.

What is the problem here? as far as i can understand when we process with threads, time taken by each thread should be same when we increase number of threads. But in my case, time taken is increasing if i increase number of threads

public void parseCSVandUpload(InputStream inputStream) throws ExecutionException, InterruptedException, IOException
{
List<Future<String>> results = new ArrayList<Future<String>>();
CSVReader reader = null;
try
{
    List<String[]> csvPart = new ArrayList<String[]>();
    reader = new CSVReader(new InputStreamReader(inputStream));
    String[] nextLine;
    int counter = 0;
    ExecutorService csvScheduler = Executors.newFixedThreadPool(10);
    while((nextLine = reader.readNext()) != null)
    {
        csvPart.add(nextLine);
        counter++;
        if(counter == 2000)
        {
            count++;
            Future<String> result = csvScheduler.submit(new uploadCSV(csvPart));                
            results.add(result);
            csvPart = new ArrayList<String[]>();
            counter = 0;
        }
    }
    count++;        
    if (csvPart.size() > 0)
    {
        Future<String> result = csvScheduler.submit(new uploadCSV(csvPart));                
        results.add(result);                    
    }

}
catch (Exception e)
{
    throw e;
} 

finally
{
    try
    {
        if(reader!=null)
        {
            reader.close();
        }
    }
    catch (IOException ioe)
    {
    }
}
for(Future<String> result: results )
{
    System.out.println(result.get());

}

}

 class uploadCSV implements Callable<BulkResult>
{
    protected final Logger logger = LoggerFactory.getLogger(uploadCSV.class);
    private List<String[]> rows ;

    public uploadCSV(List<String[]> rows)
    {
        this.rows = rows;
    }

    private List<Map<String,Object>> parseColumn5(String profileName, final String[] nextLine) throws RepositoryException
    {
        List<String> lines = new ArrayList<String>();
        for(int column = 4; column <nextLine.length; column++)
        {
            String lineInfo =  this.parseLine(nextLine, column);
            if(column == 4 && Util.isNullOrEmpty(lineInfo))
            {
                return lines;
            }
            if(!Util.isNullOrEmpty(lineInfo))
            {
                lines.add(lineInfo);
            }
        }

        return lines;
    }

    private String parseColumn1(final String[] csvRow)
    {
        String column = null;
        if (csvRow != null && csvRow.length>0)
        {
            column = csvRow[0];             
        }
        return column;
    }

    private String parseColumn2(final String[] csvRow)
    {
        String column = null;
        if (csvRow != null && csvRow.length>1)
        {
            column = csvRow[1];

        }
        return column;
    }

    private String parseColumn3(final String[] csvRow)
    {
        String column = null;
        if (csvRow != null && csvRow.length>2)
        {
            column = csvRow[2];

        }
        return column;
    }

    private String parseColumn4(final String[] csvRow)
    {
        String column = null;
        if (csvRow != null && csvRow.length>3)
        {
            column = csvRow[3];

        }
        return column;
    }

    private String parseLine(final String[] csvRow, final int column)
    {
        String dnInfo = null;
        if (csvRow != null && csvRow.length>column)
        {
            dnInfo = csvRow[column];

        }
        return dnInfo;
    }




    public String call()  throws IOException, RepositoryException
    {
        String result = "Error";
        try
        {
            for(String[] nextLine : rows)
            {
                try
                {
                    String macAddress = this.parseColumn1(nextLine);
                    if (macAddress != null)
                    {
                        String vendor = this.parseColumn2(nextLine);
                        if(vendor == null)
                        {
                            continue;
                        }

                        //Get model
                        String model = this.parseColumn3(nextLine);
                        if(model == null)
                        {
                            continue;
                        }

                        //Get profileName
                        String profileName = this.parseColumn4(nextLine);
                        if(profileName == null)
                        {
                            continue;
                        }

                        //Get the extensions
                        List<Map<String,Object>> lines = this.parseColumn5(profileName, nextLine);

                        //Error if the mandatory extension is not provided
                        if (lines.isEmpty())
                        {
                            //error
                            return
                        }
                        else // process only if there is a extension
                        {

                            deviceParams.put(Device.MAC_ADDRESS, macAddress);
                            deviceParams.put(Device.VENDOR, vendor);
                            deviceParams.put(Device.MODEL, model);
                            deviceParams.put(Device.PROFILE_NAME, profileName);

                            ReturnCode responseCode = handleOperation(device, Device.UPDATE_DEVICE, deviceParams);
                            this.addException(responseCode, macAddress, Device.UPDATE_DEVICE, ret,true);
                            if(responseCode == ReturnCode.Ok)
                            {
                                result = "Success";
                            }

                        }

                    }
                    else
                    {
                        this.logger.error("[DM-Bulk] Cannot process empty row:{}",StringUtils.join(nextLine, ","));


                }
                catch (Exception e)
                {
                    this.logger.error(String.format("[DM-Bulk] Cannot process input row %s: %s", StringUtils.join(nextLine, ","), e.getMessage()));
                }

            }
        }
        catch (Exception e)
        {
            throw e;
        }

        finally
        {
            try
            {
                if(reader!=null)
                {
                    reader.close();
                }
            }
            catch (IOException ioe)
            {
            }
        }

        return result;

    }
}
Dinesh Kumar
  • 141
  • 10
  • how many cpu's does your computer have? –  Mar 05 '15 at 10:47
  • @DineshKumar Can you show what `new uploadCSV(csvPart))` does? – assylias Mar 05 '15 at 10:52
  • @assylias added the uploadCSV() class – Dinesh Kumar Mar 05 '15 at 11:03
  • @DineshKumar then what does `handleOperation` do? Is it synchronized for example, which could create a bottleneck? – assylias Mar 05 '15 at 11:26
  • At a glance it appears that the parsing of the CSV data is being done in the main thread by the CSVReader. Consider passing raw CSV Strings to the Callable and defer the responsibility for parsing the CSV data to the Callable. – Palamino Mar 05 '15 at 14:27
  • @assylias it is method that write the records into the DB. yeah it is synchronized – Dinesh Kumar Mar 05 '15 at 14:29
  • @DineshKumar there you go - if it's synchronized your threads can't run it concurrently so you get no performance improvement... – assylias Mar 05 '15 at 15:10
  • @assylias even if it is not synchronized also there is no difference – Dinesh Kumar Mar 06 '15 at 11:13
  • @DineshKumar You should run the same piece of code (same amount of records) with one thread and 4 threads and see how much you gain. If there is no gain then you can conclude that your task is not parallelisable as it is. This can either be because it can't be split in chunks that run in parallel or it can be because your code does not allow parallelisation (for example some synchronized method in your algo that forces all threads to wait for one another). Without seeing your code it is hard to tell what is going on... – assylias Mar 06 '15 at 11:17

0 Answers0