I work on Dataiku and I have a jupyter notebook which is work and now I want to include this on python recipe. The objective is to write dataframe pandas in a dataset.
data_f
is the name of my dataframe and output_gen_python
is the name of my dataset in dataiku.
I have this error :
Job failed: Error in Python process: At line 158: <class 'NameError'>: name 'data_df' is not defined
Here is my code :
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from datetime import datetime, timedelta
# Read recipe inputs
batches_types_copy = dataiku.Dataset("batches_types_copy")
batches_types_copy_df = batches_types_copy.get_dataframe()
Last_hour_extract = dataiku.Dataset("Last_hour_extract")
last_hour_extract_df = Last_hour_extract.get_dataframe()
class OutputMode(object):
...
class IDCalculation_I:
def _preGenerateID(self,outputMode,data_df):
...
def generateID(self,outputMode,data_df):
pass
class IDCase1(IDCalculation_I):
def generateID(self,outputMode,data_df):
...
return data_df
class IDCase2(IDCalculation_I):
def generateID(self,outputMode,data_df):
...
return data_df
class Fingerprinter(object):
def __init__(self,outputMode):
self._outputMode = outputMode
def _generateID(self,data_df):
return self._outputMode.getCaseID().generateID(self._outputMode,data_df)
def run(self,data_df):
# GenerateID
data_df = self._generateID(data_df)
return data_df
def __str__(self):
return str(self._outputMode)
outputMode = OutputMode('EEA','06:00:00','08:00:00',pytz.timezone('Europe/Paris'),CONST_MODE_CONT,IDCase1())
fp_calculator = Fingerprinter(outputMode)
output_gen_python_df = data_df # Compute a Pandas dataframe to write into output_gen_python
# Write recipe outputs
output_gen_python = dataiku.Dataset("output_gen_python")
output_gen_python.write_with_schema(output_gen_python_df)