How to check the last run time of a recipe with python api?
import re
import dataiku
import urllib3
import datetime
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def get_project(client, project_key):
return client.get_project(project_key)
def get_datasets(client, project_key):
project = get_project(client, project_key)
return project.list_datasets()
def get_shared_datasets(client, project_key=None, direction='from'):
projects = []
if isinstance(project_key, str):
projects = [project_key]
if isinstance(project_key, list):
projects = project_key
patt = re.compile(r'\w+\.\w+')
shared_datasets = {}
for project in client.list_projects():
prj = client.get_project(project['projectKey'])
for r in prj.list_recipes():
if 'inputs' in r and 'main' in r['inputs'] and 'items' in r['inputs']['main']:
for inp in r['inputs']['main']['items']:
if patt.match(inp['ref']):
proj_ds = inp['ref'].split('.')
dataset_name = proj_ds[1]
if project_key is None or \
(proj_ds[0] in projects and direction == 'from') or \
(project['projectKey'] in projects and direction == 'to'):
if (dataset_name == 'TTH_MONTHLY') and (project['projectKey'] == 'CNBAIDATAANALYSIS') and (r['name'] == 'compute_TTH_MONTHLY_upper'):
if dataset_name not in shared_datasets:
shared_datasets[dataset_name] = {}
if project['projectKey'] not in shared_datasets[dataset_name]:
shared_datasets[dataset_name][project['projectKey']] = []
if r['name'] not in shared_datasets[dataset_name][project['projectKey']]:
shared_datasets[dataset_name][project['projectKey']].append(r['name'])
timestamp_ms = r['versionTag']['lastModifiedOn']
dt = datetime.datetime.fromtimestamp(timestamp_ms / 1000.0,
tz=datetime.timezone.utc)
dt_local = dt.astimezone(datetime.timezone(datetime.timedelta(hours=8)))
formatted_time = dt_local.strftime('%Y-%m-%d %H:%M:%S')
print(f"*** dataset '{dataset_name}' used in project '{project['projectKey']}' by recipe '{r['name']}' (modified at {formatted_time})")
curr_recipe = client.get_project(project['projectKey']).get_recipe('compute_TTH_MONTHLY_upper')print(curr_recipe.get_status())
return shared_datasets
if __name__ == "__main__":
# Dataiku env
url = "https://dss-xxxxxxxxxxx.com:10000/"
api_key = "dkuaps-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# set up client
dataiku.set_remote_dss(url, api_key, no_check_certificate=True)
client = dataiku.api_client()
# project key
project_key = "my_project"
print("project_key: " + project_key)
# shared-out datasets
shared_from_datasets = get_shared_datasets(client, project_key, 'from')
for dataset_name, projects in shared_from_datasets.items():
print(f"@@ DATASET: {dataset_name}")
for project_key, recipe_names in projects.items():
print(f" PROJECT: {project_key}")
for recipe_name in recipe_names:
print(f" - {recipe_name}")
Have a check of this line:
print(curr_recipe.get_status())
Do you have any ideas on how to find out the last run time of a recipe? or if there are any other solutions? Thanks!
Dataiku version used: 14.1
Answers
-
Can you check if you can use below?
import dataikuapi
Connection details
from datetime import datetimehost = "http://localhost:11200" # Replace with your DSS instance URL
1. Connect to the DSS instance and get the project
api_key = "YOUR_API_KEY" # Replace with your API key
project_key = "YOUR_PROJECT_KEY" # Replace with your project key
dataset_name = "YOUR_OUTPUT_DATASET" # Replace with the name of the dataset the recipe outputsclient = dataikuapi.DSSClient(host, api_key)
2. Get the dataset object
project = client.get_project(project_key)dataset = project.get_dataset(dataset_name)
3. Retrieve the last metricslast_metrics = dataset.get_last_metric_values()
4. Get the last build date metric (it's in UTC)The 'reporting:BUILD_START_DATE' metric gives the last build start timelast_build_datetime_str = last_metrics.get_metric_by_id("reporting:BUILD_START_DATE")
if last_build_datetime_str:
print(f"The last build date/time (UTC) for dataset '{dataset_name}' is: {last_build_datetime_str}")
# You can convert this string to a datetime object if needed for further processing
else:
print(f"Dataset '{dataset_name}' has no recorded build start date.") -
If project has multiple recipes, then specific calls needed.
-
Hi,
Looking at the jobs of DSS should provide this information.
client = dataiku.api_client() project = client.get_default_project() #or get_project(KEY) if you prefer jobs = project.list_jobs() [job for job in project.list_jobs() if job.get('def').get('recipe',"") == "<RECIPE_NAME>"]With this information, you should be able to find what you need. If you prefer relying on the Dataset name, you can.
Hope this helps
Best

