2

I have a spring boot project with apache camel (Using maven dependencies: camel-spring-boot-starter, camel-jpa-starter, camel-endpointdsl).

There are the following 3 entities:

@Entity
@Table(name = RawDataDelivery.TABLE_NAME)
@BatchSize(size = 10)
public class RawDataDelivery extends PersistentObjectWithCreationDate {
    protected static final String TABLE_NAME = "raw_data_delivery";

    private static final String COLUMN_CONFIGURATION_ID = "configuration_id";
    private static final String COLUMN_SCOPED_CALCULATED = "scopes_calculated";

    @Column(nullable = false, name = COLUMN_SCOPED_CALCULATED)
    private boolean scopesCalculated;

    @OneToMany(mappedBy = "rawDataDelivery", fetch = FetchType.LAZY)
    private Set<RawDataFile> files = new HashSet<>();

    @CollectionTable(name = "processed_scopes_per_delivery")
    @ElementCollection(targetClass = String.class)
    private Set<String> processedScopes = new HashSet<>();
    
    // Getter/Setter
}

@Entity
@Table(name = RawDataFile.TABLE_NAME)
@BatchSize(size = 100)
public class RawDataFile extends PersistentObjectWithCreationDate {
    protected static final String TABLE_NAME = "raw_data_files";

    private static final String COLUMN_CONFIGURATION_ID = "configuration_id";
    private static final String COLUMN_RAW_DATA_DELIVERY_ID = "raw_data_delivery_id";
    private static final String COLUMN_PARENT_ID = "parent_file_id";
    private static final String COLUMN_IDENTIFIER = "identifier";
    private static final String COLUMN_CONTENT = "content";
    private static final String COLUMN_FILE_SIZE_IN_BYTES = "file_size_in_bytes";

    @ManyToOne(optional = true, fetch = FetchType.LAZY)
    @JoinColumn(name = COLUMN_RAW_DATA_DELIVERY_ID)
    private RawDataDelivery rawDataDelivery;

    @Column(name = COLUMN_IDENTIFIER, nullable = false)
    private String identifier;

    @Lob
    @Column(name = COLUMN_CONTENT, nullable = true)
    private Blob content;

    @Column(name = COLUMN_FILE_SIZE_IN_BYTES, nullable = false)
    private long fileSizeInBytes;
    
    // Getter/Setter
}

@Entity
@TypeDef(name = "jsonb", typeClass = JsonBinaryType.class)
@Table(name = RawDataRecord.TABLE_NAME, uniqueConstraints = ...)
public class RawDataRecord extends PersistentObjectWithCreationDate {
    public static final String TABLE_NAME = "raw_data_records";

    static final String COLUMN_RAW_DATA_FILE_ID = "raw_data_file_id";
    static final String COLUMN_INDEX = "index";
    static final String COLUMN_CONTENT = "content";
    static final String COLUMN_HASHCODE = "hashcode";
    static final String COLUMN_SCOPE = "scope";

    @ManyToOne(optional = false)
    @JoinColumn(name = COLUMN_RAW_DATA_FILE_ID)
    private RawDataFile rawDataFile;

    @Column(name = COLUMN_INDEX, nullable = false)
    private long index;

    @Lob
    @Type(type = "jsonb")
    @Column(name = COLUMN_CONTENT, nullable = false, columnDefinition = "jsonb")
    private String content;

    @Column(name = COLUMN_HASHCODE, nullable = false)
    private String hashCode;

    @Column(name = COLUMN_SCOPE, nullable = true)
    private String scope;
}

What I try to do is to build a route with apache camel which selects all deliveries having the flag "scopesCalculated" == false and calculate/update the scope variable of all records attached to the files of this deliveries. This should happen in one database transaction. If all scopes are updated I want to set the scopesCalculated flag to true and commit the changes to the database (in my case postgresql).

What I have so far is this:

String r3RouteId = ...;

var dataSource3 = jpa(RawDataDelivery.class.getName())
        .lockModeType(LockModeType.NONE)
        .delay(60).timeUnit(TimeUnit.SECONDS)
        .consumeDelete(false)
        .query("select rdd from RawDataDelivery rdd where rdd.scopesCalculated is false and rdd.configuration.id = " + configuration.getId())
;

from(dataSource3)
        .routeId(r3RouteId)
        .routeDescription(configuration.getName())
        .messageHistory()
        .transacted()
        .process(exchange -> {
            RawDataDelivery rawDataDelivery = exchange.getIn().getBody(RawDataDelivery.class);
            rawDataDelivery.setScopesCalculated(true);
        })
        .transform(new Expression() {
            @Override
            public <T> T evaluate(Exchange exchange, Class<T> type) {
                RawDataDelivery rawDataDelivery = exchange.getIn().getBody(RawDataDelivery.class);
                return (T)rawDataDelivery.getFiles();
            }
        })
        .split(bodyAs(Iterator.class)).streaming()
            .transform(new Expression() {
                @Override
                public <T> T evaluate(Exchange exchange, Class<T> type) {
                    RawDataFile rawDataFile = exchange.getIn().getBody(RawDataFile.class);
                    
                    // rawDataRecordJpaRepository is an autowired interface by spring with the following method:
                    // @Lock(value = LockModeType.NONE)
                    // Stream<RawDataRecord> findByRawDataFile(RawDataFile rawDataFile);
                    
                    // we may have many records per file (100k and more), so we don't want to keep them all in memory.
                    // instead we try to stream the resultset and aggregate them by 500 partitions for processing
                    return (T)rawDataRecordJpaRepository.findByRawDataFile(rawDataFile);
                }
            })
            .split(bodyAs(Iterator.class)).streaming()
                .aggregate(constant("all"), new GroupedBodyAggregationStrategy())
                    .completionSize(500)
                    .completionTimeout(TimeUnit.SECONDS.toMillis(5))
                    .process(exchange -> {
                        List<RawDataRecord> rawDataRecords = exchange.getIn().getBody(List.class);

                        for (RawDataRecord rawDataRecord : rawDataRecords) {
                            rawDataRecord.setScope("abc");
                        }
                    })
;

Basically this is working, but I have the problem that the records of the last partition will not be updated. In my example I have 43782 records but only 43500 are updated. 282 remain with scope == null. I really don't understand the JPA transaction and session management of camel and I can't find some examples on how to update JPA/Hibernate entities with camel (without using SQL component).

I already tried some solutions but none of them are working. Most attempts end with "EntityManager/Session closed", "no transaction is in progress" or "Batch update failed. Expected result 1 but was 0", ...

I tried the following:

  1. to set jpa(...).joinTransaction(false).advanced().sharedEntityManager(true)
  2. use .enrich(jpa(RawDataRecord.class.getName()).query("select rec from RawDataRecord rec where rawDataFile = ${body}")) instead of .transform(...) with JPA repository for the records
  3. using hibernate session from camel headers to update/save/flush entities: "Session session = exchange.getIn().getHeader(JpaConstants.ENTITY_MANAGER, Session.class);"
  4. try to update over new jpa component at the end of the route: .split(bodyAs(Iterator.class)).streaming() .to(jpa(RawDataRecord.class.getName()).usePersist(false).flushOnSend(false))

Do you have any other ideas / recommendations?

MatWein
  • 167
  • 1
  • 13
  • Have you included a JTA transaction manager as the PlatformTransactionManager in your Spring Boot project? Something like 'spring-boot-starter-jta-narayana'? – Doug Grove Aug 04 '22 at 14:12
  • no, i have not. is this necessary? – MatWein Aug 04 '22 at 17:30
  • It is hard to tell from your code. If you are seeing "no transaction is in progress", and do not have a transaction manager configured, it could be an issue. – Doug Grove Aug 08 '22 at 14:56
  • But the above code is working and most of the records are saved with the new scope (as described above). only the last partition is not saved. – MatWein Aug 08 '22 at 21:00
  • "In my example I have 43782 records but only 43500 are updated. 282 remain with scope == null" <- Is this always 282 records that problematic? Or the number is vary? – xsalefter Aug 13 '22 at 02:39
  • Always 282, because this is the last partition of 43782 (43500 + 282) – MatWein Aug 13 '22 at 15:20
  • @MatWein My guess is that, probably the problem is not in jpa/hibernate at all? Probably bad input? incompatible escape character? – xsalefter Aug 15 '22 at 03:12

0 Answers0