Write pandas dataframe in dataset

Options
Data_ing_solv
Data_ing_solv Registered Posts: 7 ✭✭✭
edited July 16 in Using Dataiku

I work on Dataiku and I have a jupyter notebook which is work and now I want to include this on python recipe.

`data_f` is the name of my dataframe and `output_gen_python` is the name of my dataset in dataiku.

I have this error :

> Job failed: Error in Python process: At line 158: <class 'NameError'>: name 'data_df' is not defined

Here is my code :

import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from datetime import datetime, timedelta

# Read recipe inputs
batches_types_copy = dataiku.Dataset("batches_types_copy")
batches_types_copy_df = batches_types_copy.get_dataframe()
Last_hour_extract = dataiku.Dataset("Last_hour_extract")
last_hour_extract_df = Last_hour_extract.get_dataframe()


class OutputMode(object):
...

class IDCalculation_I:
def _preGenerateID(self,outputMode,data_df):
...

def generateID(self,outputMode,data_df):
pass

class IDCase1(IDCalculation_I):
def generateID(self,outputMode,data_df):
...
return data_df

class IDCase2(IDCalculation_I):
def generateID(self,outputMode,data_df):
...
return data_df

class Fingerprinter(object):
def __init__(self,outputMode):
self._outputMode = outputMode

def _generateID(self,data_df):
return self._outputMode.getCaseID().generateID(self._outputMode,data_df)

def run(self,data_df):
# GenerateID
data_df = self._generateID(data_df)
return data_df

def __str__(self):
return str(self._outputMode)


outputMode = OutputMode('EEA','06:00:00','08:00:00',pytz.timezone('Europe/Paris'),CONST_MODE_CONT,IDCase1())
fp_calculator = Fingerprinter(outputMode)

output_gen_python_df = data_df # Compute a Pandas dataframe to write into output_gen_python

# Write recipe outputs
output_gen_python = dataiku.Dataset("output_gen_python")
output_gen_python.write_with_schema(output_gen_python_df)

Answers

Setup Info
    Tags
      Help me…