I am using binance websocket to create sqlite database after closing of each candle on 1min interval. For this purpose I've create the following piece of code.
import pandas as pd
from binance.client import Client
from binance import BinanceSocketManager
from binance.enums import *
import sqlalchemy
import json
import random
import datetime
import time
engine= sqlalchemy.create_engine('sqlite:///app.db')
client = Client(api_public_key, api_private_key)
bsm = BinanceSocketManager(client)
socket = bsm.kline_socket('BTCUSDT', interval=KLINE_INTERVAL_1MINUTE)
def createframe(msg):
df = pd.DataFrame([msg['k']])
df = df.loc[:, ['t', 'o', 'h', 'l', 'c', 'V']]
df.columns = ['time', 'open', 'high', 'low', 'close', 'volume']
df.astype('float')
df['time'] = datetime.datetime.fromtimestamp(df['time'] / 1000).strftime('%Y-%m-%d %H:%M:%S')
return df
while True:
await socket.__aenter__()
msg = await socket.recv()
if msg['k']['x']:
frame = createframe(msg)
frame.to_sql(msg['k']['s'], engine, if_exists='append', index=False)
Json object returned for kline websocket (msg):
{'e': 'kline', 'E': 1634470338549, 's': 'BTCUSDT', 'k': {'t': 1634470320000, 'T': 1634470379999, 's': 'BTCUSDT', 'i': '1m', 'f': 1101708308, 'L': 1101708646, 'o': '61104.36000000', 'c': '61110.07000000', 'h': '61118.44000000', 'l': '61104.35000000', 'v': '5.48572000', 'n': 339, 'x': False, 'q': '335219.68678330', 'V': '3.23284000', 'Q': '197550.98708780', 'B': '0'}}
After running the code for few minutes I get the following output:
pd.read_sql('BTCUSDT',engine)
time open high low close volume
0 2021-10-17 06:27:00 61019.99000000 61020.00000000 61013.51000000 61015.01000000 3.07002000
1 2021-10-17 06:28:00 61015.00000000 61062.52000000 61000.01000000 61056.70000000 13.05177000
2 2021-10-17 06:28:00 61015.00000000 61062.52000000 61000.01000000 61056.70000000 13.05177000
3 2021-10-17 06:29:00 61056.71000000 61084.60000000 61053.57000000 61060.35000000 6.09200000
4 2021-10-17 06:29:00 61056.71000000 61084.60000000 61053.57000000 61060.35000000 6.09200000
5 2021-10-17 06:30:00 61060.35000000 61069.00000000 61050.00000000 61050.01000000 4.71704000
6 2021-10-17 06:30:00 61060.35000000 61069.00000000 61050.00000000 61050.01000000 4.71704000
7 2021-10-17 06:31:00 61050.01000000 61059.78000000 61000.00000000 61033.33000000 4.17527000
8 2021-10-17 06:31:00 61050.01000000 61059.78000000 61000.00000000 61033.33000000 4.17527000
9 2021-10-17 06:32:00 61033.33000000 61047.42000000 61013.00000000 61013.01000000 4.55346000
10 2021-10-17 06:33:00 61013.01000000 61040.36000000 61013.00000000 61024.17000000 12.86187000
11 2021-10-17 06:33:00 61013.01000000 61040.36000000 61013.00000000 61024.17000000 12.86187000
12 2021-10-17 06:34:00 61024.17000000 61024.18000000 60963.04000000 60967.06000000 7.99450000
Any idea on how to avoid timeframe data duplicates?