1

I'm using JDBC db2 driver, a.k.a. JT400 to connect to db2 server on Application System/400, a midrange computer system.

My goal is to insert into three Tables, from outside of IBM mainframe, which would be cloud instance(eg. Amazon WS).

To make the performance better

1) I am using already established connections to connect to db2. (using org.apache.commons.dbcp.BasicDataSource or com.ibm.as400.access.AS400JDBCManagedConnectionPoolDataSource, both are working fine.)

public class AS400JDBCManagedConnectionPoolDataSource extends AS400JDBCManagedDataSource implements ConnectionPoolDataSource, Referenceable, Serializable {
}

public class AS400JDBCManagedDataSource extends ToolboxWrapper implements DataSource, Referenceable, Serializable, Cloneable {
}

2) I want to cache the insert into statements for all three tables, so that I don't have to send query every time and compile every time, which is expensive. I would instead just pass the parameters only. (Obviously I am doing this using JDBC prepared statements)

Based on an official IBM document Optimize Access to DB2 for i5/OS from Java and WebSphere, page 17-20 - Enabling Extended Dynamic Support, it's possible to cache the statement with AS400JDBCManagedConnectionPoolDataSource.

BUT, the problem is the insert into queries are being compiled each time, which is taking 200ms * 3 queries = 600ms each time.

Example I'm using,

public class CustomerOrderEventHandler extends MultiEventHandler {

    private static Logger logger = LogManager.getLogger(CustomerOrderEventHandler.class);

    //private BasicDataSource establishedConnections = new BasicDataSource();

    //private DB2SimpleDataSource nativeEstablishedConnections = new DB2SimpleDataSource();

    private AS400JDBCManagedConnectionPoolDataSource dynamicEstablishedConnections =
            new AS400JDBCManagedConnectionPoolDataSource();

    private State3 orderState3;
    private State2 orderState2;
    private State1 orderState1;

    public CustomerOrderEventHandler() throws SQLException {
        dynamicEstablishedConnections.setServerName(State.server);
        dynamicEstablishedConnections.setDatabaseName(State.DATABASE);
        dynamicEstablishedConnections.setUser(State.user);
        dynamicEstablishedConnections.setPassword(State.password);
        dynamicEstablishedConnections.setSavePasswordWhenSerialized(true);
        dynamicEstablishedConnections.setPrompt(false);
        dynamicEstablishedConnections.setMinPoolSize(3);
        dynamicEstablishedConnections.setInitialPoolSize(5);
        dynamicEstablishedConnections.setMaxPoolSize(50);
        dynamicEstablishedConnections.setExtendedDynamic(true);
        Connection connection = dynamicEstablishedConnections.getConnection();
        connection.close();
    }

    public void onEvent(CustomerOrder orderEvent){
        long start =  System.currentTimeMillis();
        Connection dbConnection = null;
        try {
            dbConnection = dynamicEstablishedConnections.getConnection();
            long connectionSetupTime = System.currentTimeMillis() - start;

            state3 = new State3(dbConnection);
            state2 = new State2(dbConnection);
            state1 = new State1(dbConnection);

            long initialisation = System.currentTimeMillis() - start - connectionSetupTime;

            int[] state3Result = state3.apply(orderEvent);
            int[] state2Result = state2.apply(orderEvent);
            long state1Result = state1.apply(orderEvent);

            dbConnection.commit();

            logger.info("eventId="+ getEventId(orderEvent) +
                    ",connectionSetupTime=" + connectionSetupTime +
                    ",queryPreCompilation=" + initialisation +
                    ",insertionOnlyTimeTaken=" +
                    (System.currentTimeMillis() - (start + connectionSetupTime + initialisation)) +
                    ",insertionTotalTimeTaken=" + (System.currentTimeMillis() - start));
        } catch (SQLException e) {
            logger.error("Error updating the order states.", e);
            if(dbConnection != null) {
                try {
                    dbConnection.rollback();
                } catch (SQLException e1) {
                    logger.error("Error rolling back the state.", e1);
                }
            }
            throw new CustomerOrderEventHandlerRuntimeException("Error updating the customer order states.", e);
        }
    }

    private Long getEventId(CustomerOrder order) {
        return Long.valueOf(order.getMessageHeader().getCorrelationId());
    }
}

And the States with insert commands look like below,

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;

public class State2 extends State {

    private static Logger logger = LogManager.getLogger(DetailState.class);

    Connection connection;
    PreparedStatement preparedStatement;

    String detailsCompiledQuery = "INSERT INTO " + DATABASE + "." + getStateName() +
            "(" + DetailState.EVENT_ID + ", " +
            State2.ORDER_NUMBER + ", " +
            State2.SKU_ID + ", " +
            State2.SKU_ORDERED_QTY + ") VALUES(?, ?, ?, ?)";

    public State2(Connection connection) throws SQLException {
        this.connection = connection;
        this.preparedStatement = this.connection.prepareStatement(detailsCompiledQuery); // this is taking ~200ms each time
        this.preparedStatement.setPoolable(true); //might not be required, not sure
    }

    public int[] apply(CustomerOrder event) throws StateException {

        event.getMessageBody().getDetails().forEach(detail -> {
            try {
                preparedStatement.setLong(1, getEventId(event));
                preparedStatement.setString(2, getOrderNo(event));
                preparedStatement.setInt(3, detail.getSkuId());
                preparedStatement.setInt(4, detail.getQty());
                preparedStatement.addBatch();
            } catch (SQLException e) {
                logger.error(e);
                throw new StateException("Error setting up data", e);
            }
        });

        long startedTime = System.currentTimeMillis();
        int[] inserted = new int[0];
        try {
            inserted = preparedStatement.executeBatch();
        } catch (SQLException e) {
            throw new StateException("Error updating allocations data", e);
        }
        logger.info("eventId="+ getEventId(event) +
                ",state=details,insertionTimeTaken=" + (System.currentTimeMillis() - startedTime));
        return inserted;
    }

    @Override
    protected String getStateName() {
        return properties.getProperty("state.order.details.name");
    }
}

So the flow is each time an event is received(eg. CustomerOrder), it gets the establishedConnection and then asks the states to initialise their statements.

The metrics for timing look as below,

for the first event, it takes 580ms to create the preparedStatements for 3 tables.

{"timeMillis":1489982655836,"thread":"ScalaTest-run-running-CustomerOrderEventHandlerSpecs","level":"INFO","loggerName":"com.xyz.customerorder.events.handler.CustomerOrderEventHandler",
"message":"eventId=1489982654314,connectionSetupTime=1,queryPreCompilation=580,insertionOnlyTimeTaken=938,insertionTotalTimeTaken=1519","endOfBatch":false,"loggerFqcn":"org.apache.logging.log4j.spi.AbstractLogger","threadId":1,"threadPriority":5}

for the second event, takes 470ms to prepare the statements for 3 tables, which is less than the first event but just < 100ms, I assume it to be drastically less, as it should not even make it to compilation.

{"timeMillis":1489982667243,"thread":"ScalaTest-run-running-PurchaseOrderEventHandlerSpecs","level":"INFO","loggerName":"com.xyz.customerorder.events.handler.CustomerOrderEventHandler",
"message":"eventId=1489982665456,connectionSetupTime=0,queryPreCompilation=417,insertionOnlyTimeTaken=1363,insertionTotalTimeTaken=1780","endOfBatch":false,"loggerFqcn":"org.apache.logging.log4j.spi.AbstractLogger","threadId":1,"threadPriority":5}

What I'm thinking is since I'm closing preparedStatement for that particular connection, it does not even exist for new connection. If thats the case whats the point of having statement caching at all in multi-threaded environment.

The documentation has similar example, where its making transactions inside the same connection which is not the case for me, as I need to have multiple connections at the same time.

snapshot

Questions

Primary

Q1) Is DB2 JDBC driver caching the statements at all, between multiple connections? Because I don't see much difference while preparing the statement. (see example, first one takes ~600ms, second one takes ~500ms)

References

ODP = Open Data Path

SQL packages

SQL packages are permanent objects used to store information related to prepared SQL statements. They can be used by the IBM iSeries Access for the IBM Toolbox for Java JDBC driver. They are also used by applications which use the QSQPRCED (SQL Process Extended Dynamic) API interface.

In the case JDBC, the existence of the SQL package is checked when the client application issues the first prepare of a SQL Statement. If the package does not exist, it is created at that time (even though it may not yet contain any SQL statements)

Tomcat jdbc connection pool configuration - DB2 on iSeries(AS400)

IBM Data Server Driver for JDBC and SQLJ statement caching

prayagupa
  • 30,204
  • 14
  • 155
  • 192
  • very detailed post, but what is your question? – Andy Guibert Mar 20 '17 at 04:41
  • Hi @AndyGuibert My primary question is are the JDBC prepared statements are cached at all? because I only see difference of `~100ms` while preparing the statements. Every time it takes at least `~150ms * 3 = 450ms` for three queries compilation. – prayagupa Mar 20 '17 at 05:09
  • Your question is too broad: you are asking at least three different questions. Please focus on one question. Also: in JDBC a `ConnectionPoolDataSource` **is not** a connection pool (it is a data source **for** a connection pool). Though sometimes implementations get that wrong, so `AS400JDBCManagedConnectionPoolDataSource` might be one after all. – Mark Rotteveel Mar 20 '17 at 07:53
  • Thanks for giving your valuable time. Obviously Q2 & Q3 are optional.Did not understand what you mean `ConnectionPoolDataSource` is not a connection pool?It provides the Pooled physical connection. I'm using `AS400JDBCManagedConnectionPoolDataSource` which implements `ConnectionPoolDataSource` & `DataSource`, and is [recommend by the documentation](ftp://www.redbooks.ibm.com/redbooks/2007_System_i_Technical_Summit/dbp07/Optimize%20Access%20to%20DB2%20for%20i5OS%20from%20Java%20and%20WebSphere.pdf) as it is supposed to support statement caching. I can see it working as connection pool. – prayagupa Mar 20 '17 at 08:36

2 Answers2

2

A couple of important things to note regarding statement caching:

  • Because Statement objects are child objects of a given Connection, once the Connection is closed all child objects (e.g. all Statement objects) must also be closed.
  • It is not possible to associate a statement from one connection with a different connection.
  • Statement pooling may or may not be done be by a given JDBC driver. Statement pooling may also be performed by a connection management layer (i.e. application server)
  • Per JDBC spec, default value for Statement.isPoolable() == false and PreparedStatement.isPoolable() == true, however this flag is only a hint to the JDBC driver. There is no guarantee from the spec that statement pooling will occur.

First off, I am not sure if the JT400 driver does statement caching. The document you referenced in your question comment, Optimize Access to DB2 for i5/OS from Java and WebSphere, is specific to using the JT400 JDBC driver with WebSphere application server, and on slide #3 it indicates that statement caching comes from the WebSphere connection management layer, not the native JDBC driver layer. Given that, I'm going to assume that the JT400 JDBC driver doesn't support statement caching on its own.

So at this point you are probably going to want to plug into some sort of app server (unless you want to implement statement caching on your own, which is sort of re-inventing the wheel). I know for sure that both WebSphere Application Server products (traditional and Liberty) support statement caching for any JDBC driver.

For WebSphere Liberty (the newer product), the data source config is the following:

<dataSource jndiName="jdbc/myDS" statementCacheSize="10">
    <jdbcDriver libraryRef="DB2iToolboxLib"/>
    <properties.db2.i.toolbox databaseName="YOURDB" serverName="localhost"/>
</dataSource>

<library id="DB2iToolboxLib">
    <fileset dir="/path/to/jdbc/driver/dir" includes="jt400.jar"/>
</library>

The key bit being the statementCacheSize attribute of <dataSource>, which has a default value of 10.

(Disclaimer, I'm a WebSphere dev, so I'm going to talk about what I know)

Andy Guibert
  • 41,446
  • 8
  • 38
  • 61
  • Hi Andy, so when you say statements are not cached across multiple connections, does it still apply if I use app servers (be it Tomcat or Webserver)? In my case, the application I'm using is a standalone app, but if the app server allows `global statements caching` I'm up for a app server, I already have Tomcat in my stack. I just checked [tomcat statement cache](https://tomcat.apache.org/tomcat-8.0-doc/jdbc-pool.html#org.apache.tomcat.jdbc.pool.interceptor.StatementCache) which mentions `StatementCache#max = cached statements across the connection pool. The default value is 50.` – prayagupa Mar 21 '17 at 03:21
  • statements are not cached across connections, even on app servers. However, app servers can give the illusion of caching statements across multiple connections by storing open connections in an app server pool, and providing the application with wrappered connections. This way, when the application closes the connection, it only closes a wrapper and the app server hangs on to the real connection. JDBC drivers may also do similar things internally but I'm not sure. – Andy Guibert Mar 21 '17 at 03:24
  • and regarding Tomcat: the doc you linked looks like the equivalent mechanism for what I was describing with Liberty. – Andy Guibert Mar 21 '17 at 03:26
  • 1
    Ok, that makes quite sense to me now. Possibly the return value of `AS400JDBCManagedConnectionPoolDataSource#getConnection` does it too then. I will revert back to a single connection(not pooled one) and then compare the metrics to prepare the statements. Quite not sure if tomcat's [statement cache](https://tomcat.apache.org/tomcat-8.0-doc/jdbc-pool.html#org.apache.tomcat.jdbc.pool.interceptor.StatementCache) is across connections, as the doc says `The statements are cached PER CONNECTION.` I will also give it a shot. Your answer is very very very valuable. Thanks a lot – prayagupa Mar 21 '17 at 03:48
0

First off, the IBM i Java documentation is here: IBM Toolbox for Java

Secondly, I don't see where you are setting the "extended dynamic" property to true which provides

a mechanism for caching dynamic SQL statements on the server. The first time a particular SQL statement is prepared, it is stored in a SQL package on the server. If the package does not exist, it is automatically created. On subsequent prepares of the same SQL statement, the server can skip a significant part of the processing by using information stored in the SQL package. If this is set to "true", then a package name must be set using the "package" property.

I think you're missing some steps in using the managed pool...here's the first example in the IBM docs

import javax.naming.Context;
import javax.naming.InitialContext;
import javax.sql.DataSource;

import com.ibm.as400.access.AS400JDBCManagedConnectionPoolDataSource;
import com.ibm.as400.access.AS400JDBCManagedDataSource;


public class TestJDBCConnPoolSnippet
{
    void test()
    {
        AS400JDBCManagedConnectionPoolDataSource cpds0 = new AS400JDBCManagedConnectionPoolDataSource();

        // Set general datasource properties.  Note that both connection pool datasource (CPDS) and managed
               // datasource (MDS) have these properties, and they might have different values.
        cpds0.setServerName(host);
        cpds0.setDatabaseName(host);//iasp can be here
        cpds0.setUser(userid);
        cpds0.setPassword(password);


        cpds0.setSavePasswordWhenSerialized(true);

        // Set connection pooling-specific properties.
        cpds0.setInitialPoolSize(initialPoolSize_);
        cpds0.setMinPoolSize(minPoolSize_);
        cpds0.setMaxPoolSize(maxPoolSize_);
        cpds0.setMaxLifetime((int)(maxLifetime_/1000));  // convert to seconds
        cpds0.setMaxIdleTime((int)(maxIdleTime_/1000));  // convert to seconds
        cpds0.setPropertyCycle((int)(propertyCycle_/1000));  // convert to seconds
        //cpds0.setReuseConnections(false);  // do not re-use connections

        // Set the initial context factory to use.
        System.setProperty(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.fscontext.RefFSContextFactory");


        // Get the JNDI Initial Context.
        Context ctx = new InitialContext();

        // Note: The following is an alternative way to set context properties locally:
        //   Properties env = new Properties();
        //   env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.fscontext.RefFSContextFactory");
        //   Context ctx = new InitialContext(env);

        ctx.rebind("mydatasource", cpds0);  // We can now do lookups on cpds, by the name "mydatasource".

        // Create a standard DataSource object that references it.

        AS400JDBCManagedDataSource mds0 = new AS400JDBCManagedDataSource();
        mds0.setDescription("DataSource supporting connection pooling");
        mds0.setDataSourceName("mydatasource");
        ctx.rebind("ConnectionPoolingDataSource", mds0);

        DataSource dataSource_ = (DataSource)ctx.lookup("ConnectionPoolingDataSource");

        AS400JDBCManagedDataSource mds_ = (AS400JDBCManagedDataSource)dataSource_;

        boolean isHealthy = mds_.checkPoolHealth(false);  //check pool health

        Connection c = dataSource_.getConnection(); 

    }

}
Charles
  • 21,637
  • 1
  • 20
  • 44
  • I'm using the `AS400JDBCManagedConnectionPoolDataSource#setExtendedDynamic(true)` as well. One difference I see in your example is its using `AS400JDBCManagedDataSource`. I'm using `AS400JDBCManagedConnectionPoolDataSource` alone to get the established connection. – prayagupa Mar 20 '17 at 18:24