My questions could be complicated, but I can explain further if someone helps me in resolving.
I'm writing a udf. I have a dictionary called _readingType={0:("a","b")} there are 300 keys in this dictionary. There is a variabale called reading = int(strIn[j + 2:j + 6], 16) depending on the value of the reading it grabs the value corresponding to that key and appends to the result which is a list.When I'm giving a dummy value in reading it is readingTypeMapping = _readingType.get(readingType) is giving desired output. But when I do it dynamically it is not. And also this is my schema
parse_data_udf = udf(parse_data, ArrayType(StructType([
StructField("i", StringType(), False),
StructField("timeStamp", StringType(), False),
StructField("numEvents", IntegerType(), False),
StructField("numReadings", IntegerType(), False),
StructField("readingType", IntegerType(), False),
StructField("readingTypeMapping", IntegerType(), False),
StructField("readingValue", StringType(), False)
])))
If I define readingTypeMapping as interger then output would be {"i": "0", "timeStamp": "", "numEvents": 0, "numReadings": 5, "readingType": 63, "readingTypeMapping": 0, "readingValue": "5E"} If I define readingTypeMapping as String then output is NullPointException If I keep readingTypeMapping as String and the pass a test value in the reading let say reading = 83 then the code is executing perfectly and resulting the desired output. What should I do?
This is my code
def parse_data(strIn):
result = []
j = 0
_readingQuality = = {0:("","")}
_readingType = {0:("","")}
while j < len(strIn) - 1:
ts = int(strIn[j:j + 8], 16)
timeStamp = str(datetime.datetime.utcfromtimestamp(ts))
numEvents = int(strIn[j + 8], 16)
numReadings = int(strIn[j + 9], 16)
j += 10
for i in range(numReadings):
binary = binary_repr(int(strIn[j:j + 2], 16), 8)
timeStampPresent = bool(int(binary[0], 2))
readingQualitiesPresent = bool(int(binary[1], 2))
pendingPowerOfTen = int(binary[2:5], 2)
if pendingPowerOfTen == 7:
pendingPowerOfTen = 9
readingsValueSizeInBytes = int(binary[5:], 2) + 1
readingType = int(strIn[j + 2:j + 6], 16)
readingTypeMapping = _readingType.get(readingType)
rc = readingTypeMapping[0]
rd = readingTypeMapping[1]
if timeStampPresent:
ts = int(strIn[j + 6:j + 14], 16)
timeStamp = str(datetime.datetime.utcfromtimestamp(ts))
j = j + 14
else:
timeStamp = ''
j = j + 6
# If reading qualities are present
if readingQualitiesPresent:
q = []
c = []
d = []
finished = False
while not finished:
binary = binary_repr(int(strIn[j:j + 2], 16), 8)
num = int(binary[1:], 2)
q.append(num)
t = _readingQuality.get(num,(None,None))
c.append(t[0])
d.append(t[1])
if binary[0] == '0':
finished = True # No more reading quality codes present, can stop
else:
j = j + 2
j = j + 2
# 2 characters of string = 1 byte
readingValue = strIn[j:j + readingsValueSizeInBytes * 2]
j = j + readingsValueSizeInBytes * 2
result.append({
"i": str(i),
"timeStamp": timeStamp,
"numEvents": numEvents,
"numReadings": numReadings,
"readingType": readingType,
"readingTypeMapping" : readingTypeMapping,
"rc":rc,
"rd":rd,
"readingValue": readingValue
})
return result
Create UDF
parse_data_udf = udf(parse_data, ArrayType(StructType([
StructField("i", StringType(), False),
StructField("timeStamp", StringType(), False),
StructField("numEvents", IntegerType(), False),
StructField("numReadings", IntegerType(), False),
StructField("readingType", IntegerType(), False),
StructField("readingTypeMapping", IntegerType(), False),
StructField("rc", StringType(), False),
StructField("rc", StringType(), False),
StructField("readingValue", StringType(), False)
])))
# Apply the UDF and explode the array of structs into separate rows
df_parsed = df.withColumn("parsed", parse_data_udf(col("PL")))
df_exploded = df_parsed.select("PL", explode(col("parsed")).alias("parsed"))
# Show the resulting DataFrame
display(df_exploded)
PythonException: 'TypeError: 'NoneType' object is not subscriptable', from , line 386. Full traceback below: