Write pandas dataframe in dataset

Options
Data_ing_solv
Data_ing_solv Registered Posts: 7 ✭✭✭

I work on Dataiku and I have a jupyter notebook which is work and now I want to include this on python recipe.

`data_f` is the name of my dataframe and `output_gen_python` is the name of my dataset in dataiku.

I have this error :

> Job failed: Error in Python process: At line 158: <class 'NameError'>: name 'data_df' is not defined

Here is my code :

import dataikuimport pandas as pd, numpy as npfrom dataiku import pandasutils as pdufrom datetime import datetime, timedelta# Read recipe inputsbatches_types_copy = dataiku.Dataset("batches_types_copy")batches_types_copy_df = batches_types_copy.get_dataframe()Last_hour_extract = dataiku.Dataset("Last_hour_extract")last_hour_extract_df = Last_hour_extract.get_dataframe()class OutputMode(object):...class IDCalculation_I:def _preGenerateID(self,outputMode,data_df):...def generateID(self,outputMode,data_df):passclass IDCase1(IDCalculation_I):def generateID(self,outputMode,data_df):...return data_dfclass IDCase2(IDCalculation_I):def generateID(self,outputMode,data_df):...return data_dfclass Fingerprinter(object):def __init__(self,outputMode):self._outputMode = outputModedef _generateID(self,data_df):return self._outputMode.getCaseID().generateID(self._outputMode,data_df)def run(self,data_df):# GenerateIDdata_df = self._generateID(data_df)return data_dfdef __str__(self):return str(self._outputMode)outputMode = OutputMode('EEA','06:00:00','08:00:00',pytz.timezone('Europe/Paris'),CONST_MODE_CONT,IDCase1())fp_calculator = Fingerprinter(outputMode)output_gen_python_df = data_df # Compute a Pandas dataframe to write into output_gen_python# Write recipe outputsoutput_gen_python = dataiku.Dataset("output_gen_python")output_gen_python.write_with_schema(output_gen_python_df)

Answers

Setup Info
    Tags
      Help me…