How to check the last run time of a recipe with python api?

Haoran
Haoran Registered Posts: 10 ✭✭✭
edited March 26 in Using Dataiku
import re
import dataiku
import urllib3
import datetime
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

def get_project(client, project_key):
return client.get_project(project_key)

def get_datasets(client, project_key):
project = get_project(client, project_key)
return project.list_datasets()

def get_shared_datasets(client, project_key=None, direction='from'):

projects = []
if isinstance(project_key, str):
projects = [project_key]
if isinstance(project_key, list):
projects = project_key

patt = re.compile(r'\w+\.\w+')
shared_datasets = {}

for project in client.list_projects():
prj = client.get_project(project['projectKey'])
for r in prj.list_recipes():
if 'inputs' in r and 'main' in r['inputs'] and 'items' in r['inputs']['main']:
for inp in r['inputs']['main']['items']:
if patt.match(inp['ref']):
proj_ds = inp['ref'].split('.')
dataset_name = proj_ds[1]


if project_key is None or \
(proj_ds[0] in projects and direction == 'from') or \
(project['projectKey'] in projects and direction == 'to'):

if (dataset_name == 'TTH_MONTHLY') and (project['projectKey'] == 'CNBAIDATAANALYSIS') and (r['name'] == 'compute_TTH_MONTHLY_upper'):

if dataset_name not in shared_datasets:
shared_datasets[dataset_name] = {}


if project['projectKey'] not in shared_datasets[dataset_name]:
shared_datasets[dataset_name][project['projectKey']] = []


if r['name'] not in shared_datasets[dataset_name][project['projectKey']]:
shared_datasets[dataset_name][project['projectKey']].append(r['name'])

timestamp_ms = r['versionTag']['lastModifiedOn']
dt = datetime.datetime.fromtimestamp(timestamp_ms / 1000.0,
tz=datetime.timezone.utc)
dt_local = dt.astimezone(datetime.timezone(datetime.timedelta(hours=8)))
formatted_time = dt_local.strftime('%Y-%m-%d %H:%M:%S')

print(f"*** dataset '{dataset_name}' used in project '{project['projectKey']}' by recipe '{r['name']}' (modified at {formatted_time})")

curr_recipe = client.get_project(project['projectKey']).get_recipe('compute_TTH_MONTHLY_upper')

print(curr_recipe.get_status())

return shared_datasets



if __name__ == "__main__":

# Dataiku env
url = "https://dss-xxxxxxxxxxx.com:10000/"
api_key = "dkuaps-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# set up client
dataiku.set_remote_dss(url, api_key, no_check_certificate=True)
client = dataiku.api_client()

# project key
project_key = "my_project"
print("project_key: " + project_key)

# shared-out datasets
shared_from_datasets = get_shared_datasets(client, project_key, 'from')

for dataset_name, projects in shared_from_datasets.items():
print(f"@@ DATASET: {dataset_name}")
for project_key, recipe_names in projects.items():
print(f" PROJECT: {project_key}")
for recipe_name in recipe_names:
print(f" - {recipe_name}")

Have a check of this line:

print(curr_recipe.get_status())

Do you have any ideas on how to find out the last run time of a recipe? or if there are any other solutions? Thanks!

Dataiku version used: 14.1

Tagged:

Answers

  • OlgaO
    OlgaO Registered Posts: 2 ✭✭

    Can you check if you can use below?

    import dataikuapi
    from datetime import datetime

    Connection details

    host = "http://localhost:11200" # Replace with your DSS instance URL
    api_key = "YOUR_API_KEY" # Replace with your API key
    project_key = "YOUR_PROJECT_KEY" # Replace with your project key
    dataset_name = "YOUR_OUTPUT_DATASET" # Replace with the name of the dataset the recipe outputs

    1. Connect to the DSS instance and get the project

    client = dataikuapi.DSSClient(host, api_key)
    project = client.get_project(project_key)

    2. Get the dataset object

    dataset = project.get_dataset(dataset_name)

    3. Retrieve the last metrics

    last_metrics = dataset.get_last_metric_values()

    4. Get the last build date metric (it's in UTC)The 'reporting:BUILD_START_DATE' metric gives the last build start time

    last_build_datetime_str = last_metrics.get_metric_by_id("reporting:BUILD_START_DATE")

    if last_build_datetime_str:
    print(f"The last build date/time (UTC) for dataset '{dataset_name}' is: {last_build_datetime_str}")
    # You can convert this string to a datetime object if needed for further processing
    else:
    print(f"Dataset '{dataset_name}' has no recorded build start date.")

  • OlgaO
    OlgaO Registered Posts: 2 ✭✭

    If project has multiple recipes, then specific calls needed.

  • FlorentD
    FlorentD Dataiker, Dataiku DSS Core Designer, Registered, Moderator Posts: 46 Dataiker

    Hi,

    Looking at the jobs of DSS should provide this information.

    client = dataiku.api_client()
    project = client.get_default_project() #or get_project(KEY) if you prefer
    jobs = project.list_jobs()
    [job for job in project.list_jobs() if job.get('def').get('recipe',"") == "<RECIPE_NAME>"]
    

    With this information, you should be able to find what you need. If you prefer relying on the Dataset name, you can.

    Hope this helps

    Best

Setup Info
    Tags
      Help me…