I have an write-heavy Springboot application integrating with Bigquery for heavy load , facing 10 minutes to insert some of the entries.Here are my configurations
Number of Entries Stored: 1 Million/min
Number of pods : 100
Insertion Type : Streaming Data(Using JsonStreamWrite)
Deployed Cloud : Azure
Average time taken for insertion : 650 ms
Max time taken : 22 mins (for a single insert)
Number of Threads Per Pod : 15 threads
Each pod has a BigQuery Connection and tries to insert in BigQuery. Now as 10% of the inserts are taking time in minutes , we are facing a lot of timeout and performance issues. Is there an efficient way to write data in BigQuery with such large loads.
We use the following Google client libraries
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquerystorage</artifactId>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquery</artifactId>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependencies>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>libraries-bom</artifactId>
<version>25.4.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
private void updateRequestMetadataOperations(JSONArray requestMetaDataArr){
JSONArray firstObjArr = new JSONArray();
JSONObject firstTableJsonObj = new JSONObject();
firstTableJsonObj.put("firstColumn",firstColumnVal);
firstTableJsonObj.put("secondColumn",secondColumnVal);
firstTableJsonObj.put("thirdColumn",thirdColumnVal);
firstTableJsonObj.put("fourthColumn",fourthColumnVal);
firstTableJsonObj.put("fifthColumn",fifthColumnVal);
firstTableJsonObj.put("sixthColumn",sixthColumnVal);
.
.
.
firstTableJsonObj.put("twentyColumn",twentyColumnVal);
firstObjArr.put(firstTableJsonObj);
}
public void insertIntoBigQuery(String tableName, JSONArray jsonArr) throws Exception{
if(jsonArr.length()==0){
return;
}
JsonStreamWriter jsonStreamWriter = JsonStreamWriterUtil.getWriteStreamMap(tableName);
if(jsonStreamWriter!=null) {
jsonStreamWriter.append(jsonArr);
}
}
public JsonStreamWriter createWriteStream(String table) throws IOException, Descriptors.DescriptorValidationException, InterruptedException {
BigQueryWriteClient bqClient = BigQueryWriteClient.create();
WriteStream stream = WriteStream.newBuilder().setType(WriteStream.Type.COMMITTED).build();
TableName tableName = TableName.of("ProjectId", "DataSet", table);
CreateWriteStreamRequest createWriteStreamRequest =
CreateWriteStreamRequest.newBuilder()
.setParent(tableName.toString())
.setWriteStream(stream)
.build();
WriteStream writeStream = bqClient.createWriteStream(createWriteStreamRequest);
JsonStreamWriter jsonStreamWriter = JsonStreamWriter
.newBuilder(writeStream.getName(), writeStream.getTableSchema())
.build();
return jsonStreamWriter;
}