Running the script in analysis using the API

Mohammed · January 15

I have multiple dataiku models deployed in the flow. I want to refresh the model periodically using the API.
The currently deployed model has custom features defined inside the analysis section of the lab. When refreshing the model, I want to copy these steps in the deployed model to the current model trial, create the same custom features, and then run multiple trials ( different algorithms, features, parameters, etc.)

I have developed the code as given below. Here, I am retrieving the analysis from the current model and copying it to the current ML task. However, I am unable to access these features during the settings.use_feature(feature_name) step, it gives a key error indicating that it is missing in the feature dictionary.

But when I manually open the API created lab session, I see custom feature preparation steps in the analysis. I can use these features to train the model. But doing the same in API gives me an error.

What am I missing here? Any help is highly appreciated.
Thanks in advance!

def get_current_features(project, model_id):

    """

    Retrieves the list of current input features for a deployed model by its ID.

    :param project: A DSSProject object representing the Dataiku project.

    :param model_id: String, the ID of the model for which features are to be retrieved.

    :return: A list of feature names that are used as inputs in the model.

    """

    # Get the saved model

    deployed_model = project.get_saved_model(model_id)

    # Get active version ID from model settings

    active_version_id = deployed_model.get_settings().get_raw()['activeVersion']

    # Get the details for the active version

    version_details = deployed_model.get_version_details(active_version_id).get_raw()

    # Extract feature details from the preprocessing info

    feature_details = version_details['preprocessing']['per_feature']

    # Filter and collect current input features

    current_features = [key for key, value in feature_details.items() if value.get('role') == "INPUT"]

    return current_features




#get the model id of the deployed model in flow 

model_id = #YOUR_MODEL_ID


#get the current features of the deployed model

current_features = get_current_features(project,model_id)


# Retrieve the deployed model

model = project.get_saved_model(model_id)


# Get the ML task from which this saved model was created

original_mltask = model.get_origin_ml_task()

original_mltask_settings = original_mltask.get_settings()


# Retrieve the analysis script associated with this ML task

analysis_id = original_mltask_settings.analysis_id

analysis = project.get_analysis(analysis_id)

analysis_def = analysis.get_definition()

#Custom Features build using the script 

original_script = analysis_def.get_raw_script()


# Load datasets 

master_data = project.get_dataset(master_data_name) #TRAIN DATA

upcoming_data = project.get_dataset(upcoming_data_name) #TEST DATA


mltask = project.create_prediction_ml_task(

    input_dataset=master_data_name,

    target_variable=DV,

    ml_backend_type='PY_MEMORY',  # ML backend to use

    guess_policy='DEFAULT'        # Template to use for setting default parameters

)

###################

#API MODEL TRAINING 

###################


#Copy analysis scripts from the existing model to get the custom features

new_analysis = project.get_analysis(mltask.analysis_id)

new_an_def = new_analysis.get_definition()

new_an_def.get_raw()["script"] = original_script

new_an_def.save()


# Wait for the ML task to be ready

mltask.wait_guess_complete()


train_selection_builder = dataikuapi.dss.utils.DSSDatasetSelectionBuilder()

test_selection_builder = dataikuapi.dss.utils.DSSDatasetSelectionBuilder()


# Obtain settings, enable GBT, and save settings

settings = mltask.get_settings()

settings.disable_all_algorithms()

settings.set_algorithm_enabled("GBT_REGRESSION", True)


settings.get_split_params().set_split_explicit(train_selection=train_selection_builder,

                                               test_selection=test_selection_builder,

                                               dataset_name=master_data_name,

                                               test_dataset_name=upcoming_data_name)


features_to_reject = []


def handle_feature(feature_name, feature_params):

    if feature_name not in current_features and feature_params["role"] == 'INPUT':

        features_to_reject.append(feature_name)

    return feature_params


settings.foreach_feature(handle_feature)


for feature_name in current_features:

    settings.use_feature(feature_name)


for feature_name in features_to_reject:

    settings.reject_feature(feature_name)



# Save the settings

settings.save()


# Start training and wait for it to be complete

mltask.start_train(session_name="Test")

mltask.wait_train_complete()

Operating system used: Windows 10

Running the script in analysis using the API

Categories

Setup Info

Tags