Not able to build missing partition

Amarnath
Amarnath Partner, Dataiku DSS Core Designer, Dataiku DSS ML Practitioner, Dataiku DSS Adv Designer, Registered Posts: 1 Partner

Hi Everyone,

I'm facing issue in creating the missing partitions for folder based datsets, please see the attached image for the partition patteren, After partion there are some missing partitions, for that we use the below code but not able to create the missing partitions.

from dea_common.hp_scenario import build_dataset
from dataiku.scenario import Scenario
import dataiku
from datetime import timedelta, date, datetime

# generate all partitions that should be built (here based on months until current day)
def dates_range(start, end):
total_months = lambda dt: dt.month + 12 * dt.year
mlist = []
for tot_m in range(total_months(start) - 1, total_months(end)):
y, m = divmod(tot_m, 12)
mlist.append(datetime(y, m + 1, 1).strftime("%Y-%m-%d"))
return mlist

def get_missing_partition(dataset_name, date1, date2):
l = []
# let's get all curent existing partitions from a dataset of the flow
dataset = dataiku.Dataset(dataset_name)
partitions = dataset.list_partitions()
print("Existing partitions:")
print(partitions)

# generate all partitions that should be built (from '2014-11-01' until '2021-05-17')
all_dates = [dt for dt in dates_range(datetime.strptime(date1,'%Y-%m-%d').date(), datetime.strptime(date2,'%Y-%m-%d').date())]
print("Partitions that should exist:")
print(all_dates)

# finding missing partitions
for partition in all_dates:
if partition not in partitions:
print("%s : missing partition" % partition)
l.append(partition)

return l

emea_partitions = get_missing_partition('Campaign_1','2021-7-15','2021-10-5')
#us_partitions = get_missing_partition('gcw_us_sell_to_pos_joined','2014-11-01','2021-08-09')

if len(emea_partitions) != 0 :
scenario = Scenario()
scenario.build_dataset("Campaign_1", partitions=",".join(emea_partitions))

Answers

Setup Info
    Tags
      Help me…