This might help:
from datetime import datetime
from itertools import groupby
from pprint import pprint
# assuming that collection of data objects is a list
datas = [
{
"id": "abcd",
"value": 100.0,
"timestamp": "2011-07-14 19:43:37"
},
{
"id": "abcd",
"value": 500.0,
"timestamp": "2011-07-15 20:30:37"
},
{
"id": "abcd",
"value": 400.0,
"timestamp": "2011-07-15 20:30:38"
}
]
decorated_datas = []
# first we need to add a key with each data, that would be needed during sorting
# and that key would be date and hour
for data in datas:
timestamp = datetime.strptime(data["timestamp"], "%Y-%m-%d %H:%M:%S") # assuming your timestamp is in this format only
decorated_datas.append((timestamp.date(), timestamp.time().hour, data))
# then we sort the data created in the last step using the date and hour
sorted_decorated_datas = sorted(decorated_datas, key=lambda x: (x[0], x[1]))
# function for calculating statistics of a given collection of numbers
def calculate_stats(collection_of_numbers):
maxVal = max(collection_of_numbers)
minVal = min(collection_of_numbers)
avgVal = sum(collection_of_numbers) / len(collection_of_numbers)
return (maxVal, minVal, avgVal)
results = []
# then we group our sorted data by date and hour, and then we calculate
# statistics for the group and append result to our final results
for key, group_iter in groupby(sorted_decorated_datas, lambda x: (x[0], x[1])):
group_values = [data[2]["value"] for data in group_iter]
maxValue, minValue, avgValue = calculate_stats(group_values)
result = {"date": key[0], "hour": key[1], "minVal":
minValue, "maxVal": maxValue, "avgVal": avgValue}
results.append(result)
pprint(results)
And the output was:
[{'avgVal': 100.0,
'date': datetime.date(2011, 7, 14),
'hour': 19,
'maxVal': 100.0,
'minVal': 100.0},
{'avgVal': 450.0,
'date': datetime.date(2011, 7, 15),
'hour': 20,
'maxVal': 500.0,
'minVal': 400.0}]
EDIT
After giving it a thought, I found out that the format in which you represent timestamp in a string is the perfect candidate where conversion to datetime
object isn't needed and those timestamp strings can be sorted by themselves without converting themm to datetime objects, so here is the updated code:
from itertools import groupby
from pprint import pprint
# assuming that collection of data objects is a list
datas = [
{
"id": "abcd",
"value": 100.0,
"timestamp": "2011-07-14 19:43:37"
},
{
"id": "abcd",
"value": 500.0,
"timestamp": "2011-07-15 20:30:37"
},
{
"id": "abcd",
"value": 400.0,
"timestamp": "2011-07-15 20:30:38"
}
]
def get_date_and_hour(timestamp_str):
date, time = timestamp_str.split()
date = tuple(map(int, date.split('-')))
hour = int(time.split(':')[0])
return tuple((*date, hour))
def calculate_stats(collection_of_numbers):
maxVal = max(collection_of_numbers)
minVal = min(collection_of_numbers)
avgVal = sum(collection_of_numbers) / len(collection_of_numbers)
return (maxVal, minVal, avgVal)
results = []
sorted_datas = sorted(datas, key=lambda x: x["timestamp"])
for key, group_iter in groupby(sorted_datas, lambda x: get_date_and_hour(x["timestamp"])):
group_values = [data["value"] for data in group_iter]
maxValue, minValue, avgValue = calculate_stats(group_values)
result = {"date": key[0:3], "hour": key[3], "minVal":
minValue, "maxVal": maxValue, "avgVal": avgValue}
results.append(result)
pprint(results)
and the output was:
[{'avgVal': 100.0,
'date': (2011, 7, 14),
'hour': 19,
'maxVal': 100.0,
'minVal': 100.0},
{'avgVal': 450.0,
'date': (2011, 7, 15),
'hour': 20,
'maxVal': 500.0,
'minVal': 400.0}]
This version is much more shorter and maintainable than the previous one.