So I have created a script to upload a list of 1 million points in batches of 4000 points every second so it should take 250 batches to upload the data.
I have placed 2 timing functions in the script:
One around each write_api.write() to calculate the time it takes for each batch One around the outer loop to calculate the the time it takes to upload the whole 1 million points However each individual timing function says it takes on average 1 second to upload the batch, so in my opinion it should take 250 seconds to upload, however the actual writing time from the outer loop to calculate the total time says 400 seconds which is almost double the time taken even if I sum up the individual 250 write times.
import random
import copy
import datetime
from influxdb_client import InfluxDBClient, Point, WriteOptions
from influxdb_client.client.exceptions import InfluxDBError
import time
class BatchingCallback(object):
def success(self, conf: (str, str, str), data: str):
print(f"Written batch: {conf}, data: {data}")
def error(self, conf: (str, str, str), data: str, exception: InfluxDBError):
print(f"Cannot write batch: {conf}, data: {data} due: {exception}")
def retry(self, conf: (str, str, str), data: str, exception: InfluxDBError):
print(f"Retryable error occurs for batch: {conf}, data: {data} retry: {exception}")
def write_data(url, token, bucket, org):
callback = BatchingCallback()
data = generate_list_dictionary() # Generating the data which contains a list of 1 mil points with 100 fields each
total_start = time.perf_counter()
point_per_batch = 4000
with InfluxDBClient(url=url, token=token, org=org) as client:
with client.write_api(write_options=WriteOptions(batch_size=points_per_batch),
success_callback=callback.success,
error_callback=callback.error,
retry_callback=callback.retry) as write_api:
time_start = datetime.datetime.now()
for data_point in range(0, len(data), points_per_batch):
upper_index = data_point + points_per_batch #Allows us to slice the data in batches of 4000
seconds = datetime.delta(seconds=int(upper_index/points_per_batch)) # Allows us to send the data every second
while(True):
if(datetime.datetime.now() >= time_start + seconds):
# Writing the data in batches and performing individual timings
start = time.perf_counter()
write_api.write(bucket=bucket, org=org, record=data[data_points:upper_index])
end = time.perf_counter()
print(f"Individual time is: {end-start}")
break
total_end = time.perf_counter
print(f"Total time is: {total_end-total_start}")
if __name__ == '__main__':
url = ""
token = ""
bucket = ""
org = ""
write_data(url, token, bucket, org)
As you can see the Total time gets printed as 450 seconds on average, while the individual time is 1 second on average however by this logic it should take 250 seconds to upload but it's almost double.
So my question is, is this individual time that Python is calculating wrong? If so how do I calculate the time taken to upload every single write() I do.
The client version I'm using is 1.31.0 and my Python version is 3.7.7 and I'm on Windows