If every JSON is in single line then you can open()
file and use for line in file:
to read line by line - and next you can convert line to dictonary using module json
and process it.
You can use for key, value in data:
to work with every item separatelly. You can use key
to create filename f"{key}.column"
and open it in append mode "a"
and write str(value) + "\n"
in this file.
Because you have nested dictionaries so you need isinstance(value, dict)
to check if you don't have {"f1": 0, "f2": 1.7, "f3": 2}
and repeate code for this dictionary - and this may need to use recursion.
Minimal working code.
I use io
only to simulate file in memory but you should use open(filename)
file_data = '''{"timestamp": "2022-01-14T00:12:21.000", "Field1": 10, "Field_Doc": {"f1": 0}}
{"timestamp": "2022-01-18T00:15:51.000", "Field_Doc": {"f1": 0, "f2": 1.7, "f3": 2}}'''
import json
# --- functions ---
def process_dict(data, prefix=""):
for key, value in data.items():
if prefix:
key = prefix + "." + key
if isinstance(value, dict):
process_dict(value, key)
else:
with open(key + '.column', "a") as f:
f.write(str(value) + "\n")
# --- main ---
#file_obj = open("filename")
import io
file_obj = io.StringIO(file_data) # emulate file in memory
for line in file_obj:
data = json.loads(line)
print(data)
process_dict(data)
#process_dict(data, "some prefix for all files")
EDIT:
More universal version - it get function
as third parameter so it can be used with different functions
file_data = '''{"timestamp": "2022-01-14T00:12:21.000", "Field1": 10, "Field_Doc": {"f1": 0}}
{"timestamp": "2022-01-18T00:15:51.000", "Field_Doc": {"f1": 0, "f2": 1.7, "f3": 2}}'''
import json
# --- functions ---
def process_dict(data, func, prefix=""):
for key, value in data.items():
if prefix:
key = prefix + "." + key
if isinstance(value, dict):
process_dict(value, func, key)
else:
func(key, value)
def write_func(key, value):
with open(key + '.column', "a") as f:
f.write(str(value) + "\n")
# --- main ---
#file_obj = open("filename")
import io
file_obj = io.StringIO(file_data) # emulate file in memory
for line in file_obj:
data = json.loads(line)
print(data)
process_dict(data, write_func)
#process_dict(data, write_func, "some prefix for all files")
Other idea to make it more universal is to create function which flatten dict and create
{'timestamp': '2022-01-14T00:12:21.000', 'Field1': 10, 'Field_Doc.f1': 0}
{'timestamp': '2022-01-18T00:15:51.000', 'Field_Doc.f1': 0, 'Field_Doc.f2': 1.7, 'Field_Doc.f3': 2}
and later use loop to write elements.
file_data = '''{"timestamp": "2022-01-14T00:12:21.000", "Field1": 10, "Field_Doc": {"f1": 0}}
{"timestamp": "2022-01-18T00:15:51.000", "Field_Doc": {"f1": 0, "f2": 1.7, "f3": 2}}'''
import json
# --- functions ---
def flatten_dict(data, prefix=""):
result = {}
for key, value in data.items():
if prefix:
key = prefix + "." + key
if isinstance(value, dict):
result.update( process_dict(value, key) )
else:
result[key] = value
#result.update( {key: value} )
return result
# --- main ---
#file_obj = open("filename")
import io
file_obj = io.StringIO(file_data) # emulate file in memory
for line in file_obj:
data = json.loads(line)
print('before:', data)
data = flatten_dict(data)
#data = flatten_dict(data, "some prefix for all items")
print('after :', data)
print('---')
for key, value in data.items():
with open(key + '.column', "a") as f:
f.write(str(value) + "\n")