I have a complicated json file that take me a lot of time to make it in Excel format, I do my best but i think I miss something so can someone help please,
the json file is below and my script did half of work :
json file : {"logFormatVersion": "log_security_v3", "data": [ { "logAlertUid": "a2fee3b7e2824c", "request": { "body": "", "cookies": [ { "key": "info_1", "value": "info_2" }, { "key": "info_3", "value": "info_4" }, { "key": "info_5", "value": "info_6" } ], "headers": [ { "key": "Host", "value": "ip_address" }, { "key": "Accept-Charset", "value": "iso-8859-1,utf-8;q=0.9,*;q=0.1" }, { "key": "Accept-Language", "value": "info_7" }, { "key": "Connection", "value": "Keep-Alive" }, { "key": "Referer", "value": "info_8" } ], "hostname": "FQDN", "ipDst": "Y.Y.Y.Y", "ipSrc": "X.X.X.X", "method": "GET", "path": "/xampp/cgi.cgi", "portDst": 443, "protocol": "HTTP/1.1", "query": "", "requestUid": "info_9" }, "websocket": [], "context": { "tags": "", "geoipCode": "", "geoipName": "", "applianceName": "name_device", "applianceUid": "18539", "backendHost": "ip_address", "backendPort": 80, "reverseProxyName": "FQDN", "reverseProxyUid": "info_10", "tunnelName": "info_11", "tunnelUid": "6d531c", "workflowName": "name-workflow", "workflowUid": "77802" }, "events": [ { "eventUid": "e62d8b", "tokens": { "date": "time", "matchingParts": [ { "part": "info_17", "partKey": "info_18", "partKeyOperator": "info_19", "partKeyPattern": "info_20", "partKeyMatch": "info_21", "partValue": "info_21", "partValueOperator": "info_22", "partValuePatternUid": "info_23", "partValuePatternName": "info_24", "partValuePatternVersion": "00614", "partValueMatch": "info_25", "attackFamily": "info_26", "riskLevel": 80, "riskLevelOWASP": 8, "cwe": "CWE-name" } ], "reason": "info_27", "securityExceptionConfigurationUids": [ "info_28" ], "securityExceptionMatchedRuleUids": [ "info_24" ] } }, { "eventUid": "e62d8b", "tokens": { "date": "time", "matchingParts": [ { "part": "info_17", "partKey": "info_18", "partKeyOperator": "info_19", "partKeyPattern": "info_20", "partKeyMatch": "info_21", "partValue": "info_21", "partValueOperator": "info_22", "partValuePatternUid": "info_23", "partValuePatternName": "info_24", "partValuePatternVersion": "00614", "partValueMatch": "info_25", "attackFamily": "info_26", "riskLevel": 80, "riskLevelOWASP": 8, "cwe": "CWE-name" } ], "reason": "info_27", "securityExceptionConfigurationUids": [ "info_28" ], "securityExceptionMatchedRuleUids": [ "info_24" ] } }
],
"timestampImport": null,
"timestamp": "info30",
"uid": "AYYm"
} ]}
cordially
my script is like that :
import json
import pandas as pd
with open("eventlogs.json", "r") as f:
objectfile = json.load(f)
data = objectfile["data"]
df =pd.DataFrame(data)
df_request = pd.json_normalize(df["request"])
df_headers = pd.DataFrame(df_request["headers"])
df_headers = df_headers.explode("headers")
df_headers[["header-key", "header-value"]] = df_headers["headers"].apply(pd.Series)
df_headers.drop("headers", axis=1, inplace=True)
df = df.drop("request", axis=1)
new_df = pd.concat([df, df_request, df_headers], axis=1)
new_df.to_excel("all-data.xlsx", index=False)
if you execute the script it give a result like this : result without flatten "events" columns and "cookies" with a lot of line.
like I said the result is not really nice specially when I want to have result like this : Dream - File
I will be very thankful if you can help with idea to arrive to flatten this json to excel with multiindexed columns .