0

I have developed an ETL application utilizing Spring Batch, wherein data is fetched through REST requests. The application comprises around 15 Spring Batch classes, each responsible for loading a different dataset. One of the class/job FitsVendorJob has been posted below and all other classes also been designed in the same way.

I've encountered a performance challenge: while one class's readers are retrieving data, the readers from other classes are need to wait. Notably, some readers take considerable time (minutes) to fetch data, causing even the faster readers to wait. I feel it is happening not only with readers but with processor and writer too. I believe I designed all the classes/jobs to be independent of each other but even then there seems to be dependency.

My expected job flow would be,

  1. Execute truncateFitsVendorTable and truncateSrcVendorTable steps in parallel.
  2. Proceed with the processVendorKeyConstraints step.
  3. Run loadWFitsVendorTable and loadSrcVendorTable steps in parallel.

I have googled, read the docs, but I haven't yet found a solution to this problem. I would greatly appreciate any insights or guidance on how to overcome this.

@Configuration
@EnableBatchProcessing
@RequiredArgsConstructor
public class FitsVendorJob {

    @NonNull
    private JobBuilderFactory jobBuilderFactory;

    @NonNull
    private StepBuilderFactory stepBuilderFactory;

    @Autowired
    @NonNull
    private HikariDataSource dataSource;

    @Autowired
    @NonNull
    StreamReader streamReader;

    @Autowired
    @NonNull
    FetchRecordsService fetchRecords;

    @Autowired
    @NonNull
    QueryBuilder queryBuilder;

    @Autowired
    @NonNull
    EmailService emailService;

    @NonNull
    private BatchConfig batchConfig;

    private Long jobExecutionId;

    private String reportUrl;

    @Bean
    @StepScope
    public FlatFileItemReader<FitsVendor> fitsVendorReader() {
    FlatFileItemReader<FitsVendor> itemReader = new FlatFileItemReader<>();
    itemReader.setResource(
        new InputStreamResource(fetchRecords.initiateFetch(Constants.VENDOR, reportUrl, jobExecutionId)));
    itemReader.setName("csvReader");
    itemReader.setLinesToSkip(1);
    itemReader.setLineMapper(vendorLineMapper());
    itemReader.setRecordSeparatorPolicy(vendorRecordSeparator());
    return itemReader;
    }

    @Bean
    @StepScope
    public FlatFileItemReader<FitsVendor> fitsVendorReaderSrc() {
    FlatFileItemReader<FitsVendor> itemReader = new FlatFileItemReader<>();
    itemReader.setResource(
        new InputStreamResource(fetchRecords.initiateFetch(Constants.VENDOR, reportUrl, jobExecutionId)));
    itemReader.setName("csvReader");
    itemReader.setLinesToSkip(1);
    itemReader.setLineMapper(vendorLineMapper());
    itemReader.setRecordSeparatorPolicy(vendorRecordSeparator());
    return itemReader;
    }

    @Bean
    public LineMapper<FitsVendor> vendorLineMapper() {
    DefaultLineMapper<FitsVendor> lineMapper = new DefaultLineMapper<>();
    var lineTokenizer = new DelimitedLineTokenizer();

    lineTokenizer.setDelimiter(",");
    lineTokenizer.setNames(Constants.WFITS_VENDOR_CSV_HEADERS.split(","));
    lineTokenizer.setStrict(true);
    lineTokenizer.setIncludedFields(0, 1, 2, 3);

    BeanWrapperFieldSetMapper<FitsVendor> fieldSetMapper = new BeanWrapperFieldSetMapper<>();
    fieldSetMapper.setTargetType(FitsVendor.class);

    lineMapper.setLineTokenizer(lineTokenizer);
    lineMapper.setFieldSetMapper(fieldSetMapper);

    return lineMapper;
    }

    @Bean
    public FitsVendorProcessor fitsVendorProcessor() {
    return new FitsVendorProcessor();
    }

    @Bean
    public ReaderPolicy vendorRecordSeparator() {
    return new ReaderPolicy();
    }

    @Bean
    public JdbcBatchItemWriter<FitsVendor> fitsVendorWriter() {
    JdbcBatchItemWriter<FitsVendor> databaseItemWriter = new JdbcBatchItemWriter<>();
    databaseItemWriter.setDataSource(dataSource);
    databaseItemWriter.setSql(queryBuilder.buildInsertQuery(Constants.WFITS_VENDOR_TABLE));
    ItemPreparedStatementSetter<FitsVendor> valueSetter = new FitsVendorPreparedStatementSetter();
    databaseItemWriter.setItemPreparedStatementSetter(valueSetter);
    return databaseItemWriter;
    }

    @Bean
    public JdbcBatchItemWriter<FitsVendor> srcVendorWriter() {
    JdbcBatchItemWriter<FitsVendor> databaseItemWriter = new JdbcBatchItemWriter<>();
    databaseItemWriter.setDataSource(dataSource);
    databaseItemWriter.setSql(queryBuilder.buildInsertQuery(Constants.SRC_WFITS_VENDOR_TABLE));
    ItemPreparedStatementSetter<FitsVendor> valueSetter = new FitsVendorPreparedStatementSetterSrc();
    databaseItemWriter.setItemPreparedStatementSetter(valueSetter);
    return databaseItemWriter;
    }

    @Bean
    public TaskExecutor fitsVendortaskExecutor() {
    ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
    taskExecutor.setMaxPoolSize(10);
    taskExecutor.setCorePoolSize(5);
    taskExecutor.setQueueCapacity(25);
    taskExecutor.setThreadNamePrefix("FitsVendorJob Thread");
    taskExecutor.initialize();
    return taskExecutor;
}

    @Bean
    public JobSkipPolicy vendorSkipPolicy() {
    return new JobSkipPolicy();
    }

    @Bean
    public Step truncateFitsVendorTable() {
    return stepBuilderFactory.get("truncate-wfitsvendor-table").tasklet(truncateFitsVendorTasklet()).build();
    }

    @Bean
    public Step truncateSrcVendorTable() {
    return stepBuilderFactory.get("truncate-srcvendor-table").tasklet(truncateSrcVendorTasklet()).build();
    }

    @Bean
    public Step processVendorKeyConstraints() {
    return stepBuilderFactory.get(Constants.PROCESS_KEY_CONSTRAINTS_STEP).<FitsVendor, FitsVendor>chunk(10000)
        .reader(fitsVendorReader()).writer(new NoOpWriter<FitsVendor>()).processor(fitsVendorProcessor())
        .faultTolerant().skipPolicy(vendorSkipPolicy()).retryLimit(0).processorNonTransactional()
        .listener(new FitsPricingItemReaderListener()).taskExecutor(fitsVendortaskExecutor()).build();
    }

    @Bean
    public Step loadWFitsVendorTable() {
    return stepBuilderFactory.get(Constants.LOAD_WFITS_VENDOR_TABLE).<FitsVendor, FitsVendor>chunk(10000)
        .reader(fitsVendorReader()).writer(fitsVendorWriter()).processor(fitsVendorProcessor()).faultTolerant()
        .skipPolicy(vendorSkipPolicy()).retryLimit(0).processorNonTransactional()
        .listener(new FitsPricingItemReaderListener()).taskExecutor(fitsVendortaskExecutor()).build();
    }

    @Bean
    public Step loadSrcVendorTable() {
    return stepBuilderFactory.get(Constants.LOAD_WFITS_VENDOR_SRC_TABLE).<FitsVendor, FitsVendor>chunk(10000)
        .reader(fitsVendorReaderSrc()).writer(srcVendorWriter()).faultTolerant().skipPolicy(vendorSkipPolicy())
        .retryLimit(0).processorNonTransactional().listener(new VendorItemReaderListener())
        .taskExecutor(fitsVendortaskExecutor()).build();
    }

    @Bean(name = "vendorSendEmail")
    public Step sendEmail() {
    return stepBuilderFactory.get("send-email-vendor").tasklet(batchConfig.sendEmailTasklet()).build();
    }

    public Tasklet truncateFitsVendorTasklet() {
    return (contribution, chunkContext) -> {
        extractRuntimeParameteres(chunkContext);
        new JdbcTemplate(dataSource).execute(queryBuilder.buildTruncateQuery(Constants.WFITS_VENDOR_TABLE));
        return RepeatStatus.FINISHED;
    };
    }

    public Tasklet truncateSrcVendorTasklet() {
    return (contribution, chunkContext) -> {
        new JdbcTemplate(dataSource).execute(queryBuilder.buildTruncateQuery(Constants.SRC_WFITS_VENDOR_TABLE));
        return RepeatStatus.FINISHED;
    };
    }

    /**
     * @param chunkContext
     */
    private void extractRuntimeParameteres(ChunkContext chunkContext) {
    jobExecutionId = chunkContext.getStepContext().getStepExecution().getJobExecution().getId();
    reportUrl = chunkContext.getStepContext().getStepExecution().getJobParameters().getString(Constants.REPORT_URL);
    }

    @Bean
    @Qualifier("WFitsVendorJob")
    public Job runFitsVendorJob() {
    final var flow1 = new FlowBuilder<Flow>("flow1").from(truncateFitsVendorTable()).end();
    final var flow2 = new FlowBuilder<Flow>("flow2").from(truncateSrcVendorTable()).end();
    final var flow3 = new FlowBuilder<Flow>("flow3").from(processVendorKeyConstraints()).end();
    final var flow4 = new FlowBuilder<Flow>("flow4").from(loadWFitsVendorTable()).end();
    final var flow5 = new FlowBuilder<Flow>("flow5").from(loadSrcVendorTable()).end();

    SimpleAsyncTaskExecutor sate = new SimpleAsyncTaskExecutor();
    sate.setConcurrencyLimit(10);
    sate.setThreadNamePrefix("Vendor Job Flow: ");

    final var emailFlow = new FlowBuilder<Flow>("emailFlow").from(sendEmail()).end();

    final var truncateFlow = new FlowBuilder<Flow>("truncateFlow").split(sate).add(flow1, flow2).build();

    final var processKeyConstraintsFlow = new FlowBuilder<Flow>("processKeyConstraintsFlow").split(sate).add(flow3)
    .build();

    final var loadFlow = new FlowBuilder<Flow>("loadFlow").split(sate).add(flow4, flow5).build();

    final var decisionState = new FlowBuilder<Flow>("decisionState").from(loadFlow).on("FAILED").to(flow1).end();

    return jobBuilderFactory.get("FitsVendorJob").listener(new JobCompletionListener()).start(truncateFlow)
        .next(processKeyConstraintsFlow).next(decisionState).on("*").to(emailFlow).end().build();
    }

JobLauncher Bean

@Bean(name = "batchJobLauncher")
public JobLauncher simpleJobLauncher() throws Exception {
    ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
    taskExecutor.setMaxPoolSize(10);
    taskExecutor.setCorePoolSize(5);
    taskExecutor.setQueueCapacity(25);
    taskExecutor.setThreadNamePrefix("batchJobLauncher Thread: ");
    taskExecutor.initialize();

    SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
    jobLauncher.setJobRepository(jobRepository);
    jobLauncher.setTaskExecutor(taskExecutor);
    jobLauncher.afterPropertiesSet();
    return jobLauncher;
}

ImportJobLauncher.java

@Component
@Slf4j
public class ImportJobLauncher {

    private final Map<String, Job> jobsMap = new HashMap<>();

    @Autowired
    private CommonUtility comUtility;

    @Autowired
    @Qualifier("batchJobLauncher")
    JobLauncher jobLauncher;

    @Autowired
    @Qualifier("WFitsVendorJob")
    Job fitsVendorJob;

    @Autowired
    @Qualifier("WFitsServiceJob")
    Job fitsServiceJob;

    @Autowired
    @Qualifier("WFitsContractJob")
    Job fitsContractJob;

    @Autowired
    @Qualifier("WFitsCurrencyJob")
    Job fitsCurrencyJob;

    @Autowired
    @Qualifier("WFitsProductJob")
    Job fitsProductJob;

    @Autowired
    @Qualifier("WFitsPricingJob")
    Job fitsPricingJob;

    @Autowired
    @Qualifier("WFitsPricingAsrJob")
    Job fitsPricingAsrJob;

    @Autowired
    @Qualifier("WFitsPlatformJob")
    Job fitsPlatformJob;

    @Autowired
    @Qualifier("WFitsPiUserJob")
    Job fitsPiUserJob;

    @Autowired
    @Qualifier("WFitsPiJob")
    Job fitsPiJob;

    @Autowired
    @Qualifier("WFitsInventoryJob")
    Job fitsInventoryJob;

    @Autowired
    @Qualifier("WFitsPricingMapJob")
    Job fitsPricingMapJob;

    @Autowired
    @Qualifier("WFitsPricingVpcJob")
    Job fitsPricingVpcJob;

    @Autowired
    @Qualifier("WFitsBuildingJob")
    Job fitsBuildingJob;

    @Autowired
    @Qualifier("WFitsBuGroupJob")
    Job fitsBuGroupJob;

    @Autowired
    @Qualifier("WFitsProfileJob")
    Job fitsProfileJob;

    @Autowired
    @Qualifier("WFitsAccountJob")
    Job fitsAccountJob;

    @Autowired
    @Qualifier("WFitsServiceMapJob")
    Job fitsServiceMapJob;

    @Autowired
    @Qualifier("WFitsNcRuleJob")
    Job fitsNcRuleJob;

    @PostConstruct
    public void initialize() {
    jobsMap.put(Constants.VENDOR, fitsVendorJob);
    jobsMap.put(Constants.SERVICE, fitsServiceJob);
    jobsMap.put(Constants.CURRENCY, fitsCurrencyJob);
    jobsMap.put(Constants.PRODUCT, fitsProductJob);
    jobsMap.put(Constants.PLATFORM, fitsPlatformJob);
    jobsMap.put(Constants.CONTRACT, fitsContractJob);
    jobsMap.put(Constants.PRICING, fitsPricingJob);
    jobsMap.put(Constants.PRICINGVPC, fitsPricingVpcJob);
    jobsMap.put(Constants.PRICINGMAP, fitsPricingMapJob);
    jobsMap.put(Constants.PRICINGASR, fitsPricingAsrJob);
    jobsMap.put(Constants.BUGROUP, fitsBuGroupJob);
    jobsMap.put(Constants.PROFILE, fitsProfileJob);
    jobsMap.put(Constants.ACCOUNT, fitsAccountJob);
    jobsMap.put(Constants.INVENTORY, fitsInventoryJob);
    jobsMap.put(Constants.BUILDING, fitsBuildingJob);
    jobsMap.put(Constants.PIUSER, fitsPiUserJob);
    jobsMap.put(Constants.SERVICEMAP, fitsServiceMapJob);
    jobsMap.put(Constants.NCRULE, fitsNcRuleJob);
    jobsMap.put(Constants.PI, fitsPiJob);
    }

    public BatchJobResponse importFitsData(String jobName, String reportURL, String sendEmail) {
    var job = jobsMap.get(jobName);
    var jobParameters = createJobParameters(jobName, reportURL, sendEmail);
    var jobExecution = runJob(job, jobParameters);

    if (jobExecution == null) {
        log.error("Failed to trigger the job '{}'.", jobName);
        return new BatchJobResponse(jobName, BatchStatus.STOPPED.toString(), 0L);
    }

    return new BatchJobResponse(jobExecution.getJobInstance().getJobName(), jobExecution.getStatus().toString(),
        jobExecution.getId());
    }

    private JobParameters createJobParameters(String jobName, String reportURL, String sendEmail) {
    JobParametersBuilder jobParametersBuilder = new JobParametersBuilder()
        .addLong(Constants.START_AT, comUtility.currentTimeMillis()).addString(Constants.REPORT_URL, reportURL)
        .addString(Constants.SEND_EMAIL, sendEmail);

    if (Constants.PI.equals(jobName)) {
        String reportURL2 = comUtility.getReportUrl("piactive");
        jobParametersBuilder.addString("reportURL2", reportURL2);
    }

    return jobParametersBuilder.toJobParameters();
    }

    private JobExecution runJob(Job job, JobParameters jobParameters) {
    try {
        var jobExecution = jobLauncher.run(job, jobParameters);
        Thread.sleep(2000);
        return jobExecution;
    } catch (Exception e) {
        log.error("Error occurred while trying to trigger the {} job", job.getName(), e);
        Thread.currentThread().interrupt();
        return null;
    }
    }
}

Thread Dump is attached here: https://pastebin.com/Z3FJgBKL

Scenario captured in Thread dump: First I have triggered the Inventory job and when it is in progress, I triggered Vendor job, Pricing job. Both these job's step 1 (i.e. truncate table) will execute parallelly but when the control reaches the readers of these 2 jobs it is blocked until Inventory job's reader finishes processing.

Sagar SN
  • 35
  • 1
  • 8
  • have you tried to analyze thread dumps ? where are those readers are waiting https://stackoverflow.com/a/59297776/175554 if you create a small reproducer it would be easier to investigate the issue. – ozkanpakdil Aug 20 '23 at 11:22
  • @ozkanpakdil I have captured the thread dump using jstack and attached in the question. Kindly check. – Sagar SN Aug 22 '23 at 13:19
  • I checked the thread dump, you don't seem like have many threads. one thread " "SimpleAsyncTaskExecutor-1" #50" looks like locked situation and waiting on some http response, I think your batch configuration is not working very parallel and with enough of threads, if you prepare a reproducer I can dig more. – ozkanpakdil Aug 22 '23 at 19:14
  • @ozkanpakdil I updated the thread dump, can you please check now. Thanks – Sagar SN Aug 24 '23 at 14:46

0 Answers0