So pretty much I have a script that takes one set of a time series, drops the time and some other information from the second time series, and then adds it to the outer end of a csv file. The problem I am having is it is constantly storing 3 blank ,,,,,,, lines at the end of my file, but updates the lines as the script goes on. The code is this:
import pandas as pd
import time
def compiler():
for i in range(1000):
# Read File
df = pd.read_csv(r'C:/Users/J/Desktop/dropmarketdata/xz.csv')
# Remove useless info
df.pop('cached')
df.pop('id')
df.pop('name')
df.pop('last_updated')
df.pop('max_supply')
# Read 2nd file
ohlc = pd.read_csv(r'C:/Users/J/Desktop/dropmarketdata/ohlc/ohlc.csv')
main_df = pd.DataFrame()
# Drop datetime because im already indexing by it on the other file
del ohlc['datetime']
# join to outside or at the end of each lines where both files have
# the same number of lines
main_df = df.join(ohlc, how='outer')
main_df.set_index('datetime', inplace=True)
main_df.to_csv(r'C:/Users/J/Desktop/dropmarketdata/
ohlcomp.csv', float_format='%.8f')
print('saving....')
time.sleep(900)
print('15m has surpassed....')
compiler()
The problem is my file always looks like this:
2018-04-16 01:57:09.021924,85409.30000000,18473609990.00000000,77146350.00000000,-0.11000000,-1.92000000,-7.11000000,0.00000052,0.00417603,147,DROP,30000000000.00000000,,,,,
2018-04-16 02:12:10.098678,85061.30000000,18473609990.00000000,74266498.00000000,-4.09000000,-5.59000000,-10.38000000,0.00000050,0.00402014,148,DROP,30000000000.00000000,,,,,
2018-04-16 02:27:10.916329,87757.50000000,18473609990.00000000,76921156.00000000,1.22000000,-2.24000000,-6.99000000,0.00000052,0.00416384,147,DROP,30000000000.00000000,,,,,
Each row indexed by date, Where all ,,,,, are at the end of the rows are actually supposed to have H,L,O,C data. I'm quite new to python so sorry if this sounds like a dumb question. Thanks for the help.
EDIT:
For anyone who needs to stream the data on their own this code should work
import pandas as pd
import time
from datetime import datetime
import coinmarketcap
from coinmarketcap import Market
import ccxt
def compiler():
# Read Filed
df = pd.read_csv('other.csv')
ohlc = pd.read_csv('ohlc.csv')
# Remove useless info
df.pop('cached')
df.pop('id')
df.pop('name')
df.pop('last_updated')
df.pop('max_supply')
main_df = pd.DataFrame()
# Drop datetime because im already indexing by it on the other file
del ohlc['datetime']
# join to outside or at the end of each lines where both files have
# the same number of lines
main_df = df.join(ohlc, how='outer')
main_df.set_index('datetime', inplace=True)
main_df.to_csv('file.csv', float_format='%.8f')
print('saving compiled list....')
def collect1():
#pulling from tidex
tidex = ccxt.tidex()
tidex.load_markets(True)
ticker = tidex.fetch_ticker('DROP/BTC')
ticker_df = pd.DataFrame(ticker, index=['f'], columns=['ask', 'bid', 'close', 'high', 'low', 'datetime'])
ticker_df['ask'] = '%.8f' % ticker_df['ask']
ticker_df['bid'] = '%.8f' % ticker_df['bid']
ticker_df['close'] = '%.8f' % ticker_df['close']
ticker_df['high'] = '%.8f' % ticker_df['high']
ticker_df['low'] = '%.8f' % ticker_df['low']
ticker_df.loc[:, 'datetime'] = pd.Series("{:}".format(datetime.now()), index=ticker_df.index)
ticker_df.set_index(pd.DatetimeIndex(ticker_df.loc[:, 'datetime']), inplace=True)
ticker_df.pop('datetime')
ticker_df.to_csv('ohlc.csv', float_format='%.8f')
def collect2():
#pulling information from coinmarketcap
market = Market()
ticker2 = market.ticker("dropil")
dropArray = pd.DataFrame(ticker2)
dropArray.loc[:, 'datetime'] = pd.Series("{:}".format(datetime.now()), index=dropArray.index)
dropArray.reset_index()
dropArray.set_index(pd.DatetimeIndex(dropArray.loc[:, 'datetime']), inplace=True)
dropArray.pop('datetime')
dropArray.to_csv('other.csv', float_format='%.8f')
for i in range(1000):
collect1()
collect2()
compiler()
time.sleep(900)