I have Python code that uses trade data as input. The trade data is taken from here: https://data.binance.vision/?prefix=data/spot/daily/trades/ETHUSDT/.
import pandas as pd
df = pd.read_csv("ETHUSDT-trades-2022-12-16.csv",
names = ["id", "price", "qty", "quoteQty", "time", "makerBuy", "bestPrice"])
df["time"] = pd.to_datetime(df["time"], unit = "ms")
df.set_index("time", inplace = True)
df.price.resample("240T").agg({
"open": "first",
"high": "max",
"low": "min",
"close": "last"
})
Input:
id price qty quoteQty makerBuy bestPrice
time
2022-12-16 00:00:00.000 1042612060 1266.53 0.0102 12.918606 True True
2022-12-16 00:00:00.000 1042612061 1266.53 0.0012 1.519836 True True
2022-12-16 00:00:00.005 1042612062 1266.54 0.0111 14.058594 False True
2022-12-16 00:00:00.008 1042612063 1266.53 0.0097 12.285341 True True
2022-12-16 00:00:00.011 1042612064 1266.53 0.0181 22.924193 True True
... ... ... ... ... ... ...
2022-12-16 23:59:59.991 1043495126 1166.82 0.0548 63.941736 True True
2022-12-16 23:59:59.994 1043495127 1166.83 0.0105 12.251715 False True
2022-12-16 23:59:59.995 1043495128 1166.83 0.0263 30.687629 False True
2022-12-16 23:59:59.997 1043495129 1166.82 0.0089 10.384698 True True
2022-12-16 23:59:59.999 1043495130 1166.83 0.0090 10.501470 False True
883071 rows × 6 columns
Output:
time open high low close
2022-12-16 00:00:00 1266.53 1273.87 1265.65 1271.79
2022-12-16 04:00:00 1271.78 1280.06 1267.13 1277.77
2022-12-16 08:00:00 1277.77 1279.27 1205.50 1212.83
2022-12-16 12:00:00 1212.84 1218.23 1195.04 1203.63
2022-12-16 16:00:00 1203.64 1204.58 1183.37 1202.50
2022-12-16 20:00:00 1202.50 1211.99 1155.32 1166.83
Question
I want to migrate the Python code into C#. In fact, I actually did but the thing is, I want to use an actual DataFrame
framework and not just do everything manually. Microsoft.Data.Analysis
lacks a lot of features, so I decided to give Deedle
a try.
using Deedle;
var df = Frame.ReadCsv("ETHUSDT-trades-2022-12-16.csv",
hasHeaders: false);
var series = df["Column2"]; // Columns is supposed to be price
series.ResampleEquivalence(date => TimeSpan.FromMinutes(240), aggregate =>
{
});
df.Print();
The problem is that I'm unable to set actual names for the columns and do the resampling part. How is that going to work here?
FYI - that's the manual way
using System.Globalization;
using CsvHelper;
using CsvHelper.Configuration;
using TickDataCustom;
var configuration = new CsvConfiguration(CultureInfo.InvariantCulture) { HasHeaderRecord = false };
using var reader = new StreamReader("ETHUSDT-trades-2022-12-16.csv");
using var csv = new CsvReader(reader, configuration);
csv.Context.RegisterClassMap<TradeMap>();
var records = csv.GetRecords<Trade>();
var source = records
.Select(x => new Trade2
{
Id = x.Id,
Price = x.Price,
Quantity = x.Quantity,
QuoteQuantity = x.QuoteQuantity,
Time = DateTimeOffset.FromUnixTimeMilliseconds(x.Timestamp).UtcDateTime,
MakerBuy = x.MakerBuy,
BestPrice = x.BestPrice
})
.ToList();
var period = TimeSpan.FromMinutes(240);
var ohlcv = source
.AsParallel()
.OrderBy(p => p.Time)
.Select(d => new
{
d.Time,
d.Price,
Span = d.Time.Ticks / period.Ticks
})
.GroupBy(d => d.Span)
.Select(g => new Ohlc
{
Timestamp = new DateTime(period.Ticks * g.Key),
Open = g.OrderBy(s => s.Time).First().Price,
Close = g.OrderBy(s => s.Time).Last().Price,
Low = g.Min(d => d.Price),
High = g.Max(d => d.Price)
})
.OrderBy(o => o.Timestamp);
foreach (var ohlc in ohlcv)
{
Console.WriteLine($"T:{ohlc.Timestamp}, O:{ohlc.Open:f2}, H:{ohlc.High:f2}, L:{ohlc.Low:f2}, C:{ohlc.Close:f2}");
}
Console.ReadLine();
public class TradeMap : ClassMap<Trade>
{
public TradeMap()
{
Map(p => p.Id).Index(0);
Map(p => p.Price).Index(1);
Map(p => p.Quantity).Index(2);
Map(p => p.QuoteQuantity).Index(3);
Map(p => p.Timestamp).Index(4);
Map(p => p.MakerBuy).Index(5);
Map(p => p.BestPrice).Index(6);
}
}
public class Trade
{
[Index(0)] public string Id { get; set; } = default!;
[Index(1)] public decimal Price { get; set; }
[Index(2)] public decimal Quantity { get; set; }
[Index(3)] public decimal QuoteQuantity { get; set; }
[Index(4)] public long Timestamp { get; set; }
[Index(5)] public bool MakerBuy { get; set; }
[Index(6)] public bool BestPrice { get; set; }
}
public class Ohlc
{
public DateTime Timestamp { get; set; }
public decimal Open { get; set; }
public decimal High { get; set; }
public decimal Low { get; set; }
public decimal Close { get; set; }
public decimal Volume { get; set; }
}
public class Trade2
{
public string Id { get; set; } = default!;
public decimal Price { get; set; }
public decimal Quantity { get; set; }
public decimal QuoteQuantity { get; set; }
public DateTime Time { get; set; }
public bool MakerBuy { get; set; }
public bool BestPrice { get; set; }
}