[2023/12/20-17:30:39.242] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - getReadiness: enumerated partition, found 1 paths, computing hash [2023/12/20-17:30:39.243] [ActivityExecutor-36] [INFO] [dku.flow.activity] running compute_flight_reviews_joined_NP - Checked source readiness DKU_TUT_FUZZY_JOIN.flight_reviews -> true [2023/12/20-17:30:39.243] [ActivityExecutor-36] [INFO] [dku.flow.activity] running compute_flight_reviews_joined_NP - Will check readiness of DKU_TUT_FUZZY_JOIN.world_cities p=NP [2023/12/20-17:30:39.250] [ActivityExecutor-36] [INFO] [dku.datasets.file] running compute_flight_reviews_joined_NP - Building Filesystem handler config: {"path":"/data/dataiku/datadir/uploads/DKU_TUT_FUZZY_JOIN/datasets/world_cities","notReadyIfEmpty":false,"filesSelectionRules":{"mode":"ALL","excludeRules":[],"includeRules":[],"explicitFiles":[]}} [2023/12/20-17:30:39.250] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - getReadiness: will enumerate partition [2023/12/20-17:30:39.250] [ActivityExecutor-36] [INFO] [dku.datasets.ftplike] running compute_flight_reviews_joined_NP - Enumerating Filesystem dataset prefix= [2023/12/20-17:30:39.250] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - Building FS provider for dataset handler: DKU_TUT_FUZZY_JOIN.world_cities [2023/12/20-17:30:39.273] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - FS Provider built [2023/12/20-17:30:39.273] [ActivityExecutor-36] [DEBUG] [dku.fs.local] running compute_flight_reviews_joined_NP - Enumerating local filesystem prefix=/ [2023/12/20-17:30:39.273] [ActivityExecutor-36] [DEBUG] [dku.fs.local] running compute_flight_reviews_joined_NP - Enumeration done nb_paths=1 size=1168196 [2023/12/20-17:30:39.273] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - getReadiness: enumerated partition, found 1 paths, computing hash [2023/12/20-17:30:39.274] [ActivityExecutor-36] [INFO] [dku.flow.activity] running compute_flight_reviews_joined_NP - Checked source readiness DKU_TUT_FUZZY_JOIN.world_cities -> true [2023/12/20-17:30:39.275] [ActivityExecutor-36] [DEBUG] [dku.flow.activity] running compute_flight_reviews_joined_NP - Computing hashes to propagate BEFORE activity [2023/12/20-17:30:39.275] [ActivityExecutor-36] [DEBUG] [dku.flow.activity] running compute_flight_reviews_joined_NP - Recorded 2 hashes before activity run [2023/12/20-17:30:39.275] [ActivityExecutor-36] [DEBUG] [dku.flow.activity] running compute_flight_reviews_joined_NP - Building recipe runner of type [2023/12/20-17:30:39.281] [ActivityExecutor-36] [INFO] [dku.recipe.fuzzyjoin.runner] running compute_flight_reviews_joined_NP - SET PAYLOAD: { "joins": [ { "table2": 1, "table1": 0, "conditionsMode": "AND", "type": "LEFT", "on": [ { "column1": { "name": "arrival", "table": 0 }, "column2": { "name": "city", "table": 1 }, "fuzzyMatchDesc": { "distanceType": "LEVENSHTEIN", "threshold": 1 }, "normaliseDesc": { "caseInsensitive": true, "normaliseText": true } } ] } ], "selectedColumns": [ { "name": "review", "type": "string", "table": 0 }, { "name": "score", "type": "string", "table": 0 }, { "name": "date", "type": "date", "table": 0 }, { "name": "departure", "type": "string", "table": 0 }, { "name": "layover", "type": "string", "table": 0 }, { "name": "arrival", "type": "string", "table": 0 }, { "name": "city", "type": "string", "table": 1 }, { "name": "country", "type": "string", "table": 1 }, { "name": "geopoint", "type": "geopoint", "table": 1 } ], "resolvedSelectedColumns": [], "engineParams": { "hive": { "skipPrerunValidate": false, "hiveconf": [], "inheritConf": "default", "addDkuUdf": false, "executionEngine": "HIVESERVER2" }, "sqlPipelineParams": { "pipelineAllowMerge": true, "pipelineAllowStart": true }, "impala": { "forceStreamMode": true }, "lowerCaseSchemaIfEngineRequiresIt": true, "sparkSQL": { "skipPrerunValidate": false, "pipelineAllowMerge": true, "useGlobalMetastore": false, "pipelineAllowStart": true, "readParams": { "mode": "AUTO", "autoModeRepartitionInto": 10, "map": {} }, "overwriteOutputSchema": false, "executionEngine": "SPARK_SUBMIT", "sparkConfig": { "inheritConf": "default", "conf": [] } } }, "engineType": "DSS", "virtualInputs": [ { "outputColumnsSelectionMode": "AUTO_NON_CONFLICTING", "preFilter": { "distinct": false, "enabled": false }, "originLabel": "flight_reviews", "index": 0, "computedColumns": [] }, { "outputColumnsSelectionMode": "MANUAL", "preFilter": { "distinct": false, "enabled": false }, "originLabel": "world_cities", "prefix": "arrival", "index": 1, "computedColumns": [] } ], "withMetaColumn": false, "debugMode": false, "computedColumns": [], "postFilter": { "$status": { "schema": { "columns": [ { "name": "review", "type": "string" }, { "name": "score", "type": "string" }, { "name": "date", "type": "date" }, { "name": "departure", "type": "string" }, { "name": "layover", "type": "string" }, { "name": "arrival", "type": "string" }, { "name": "arrival_city", "type": "string" }, { "name": "arrival_country", "type": "string" }, { "name": "arrival_geopoint", "type": "geopoint" } ], "userModified": false } } } } [2023/12/20-17:30:39.289] [ActivityExecutor-36] [DEBUG] [dku.flow.recipes.prerunpropagate] running compute_flight_reviews_joined_NP - Pre-run schema propagation not enabled in this job [2023/12/20-17:30:39.294] [Thread-21] [INFO] [dku.datasets.pull] - pull background thread starting for flight_reviews [2023/12/20-17:30:39.296] [Thread-22] [INFO] [dku.datasets.pull] - pull background thread starting for world_cities [2023/12/20-17:30:39.305] [Thread-22] [INFO] [dku.datasets.file] - Building Filesystem handler config: {"path":"/data/dataiku/datadir/uploads/DKU_TUT_FUZZY_JOIN/datasets/world_cities","notReadyIfEmpty":false,"filesSelectionRules":{"mode":"ALL","excludeRules":[],"includeRules":[],"explicitFiles":[]}} [2023/12/20-17:30:39.305] [Thread-21] [INFO] [dku.datasets.file] - Building Filesystem handler config: {"path":"/data/dataiku/datadir/uploads/DKU_TUT_FUZZY_JOIN/datasets/flight_reviews","notReadyIfEmpty":false,"filesSelectionRules":{"mode":"ALL","excludeRules":[],"includeRules":[],"explicitFiles":[]}} [2023/12/20-17:30:39.305] [Thread-22] [INFO] [dku.datasets.ftplike] - Enumerating Filesystem dataset prefix= [2023/12/20-17:30:39.305] [Thread-21] [INFO] [dku.datasets.ftplike] - Enumerating Filesystem dataset prefix= [2023/12/20-17:30:39.305] [Thread-22] [DEBUG] [dku.datasets.fsbased] - Building FS provider for dataset handler: DKU_TUT_FUZZY_JOIN.world_cities [2023/12/20-17:30:39.306] [Thread-21] [DEBUG] [dku.datasets.fsbased] - Building FS provider for dataset handler: DKU_TUT_FUZZY_JOIN.flight_reviews [2023/12/20-17:30:39.306] [Thread-22] [DEBUG] [dku.datasets.fsbased] - FS Provider built [2023/12/20-17:30:39.307] [Thread-22] [DEBUG] [dku.fs.local] - Enumerating local filesystem prefix=/ [2023/12/20-17:30:39.307] [Thread-21] [DEBUG] [dku.datasets.fsbased] - FS Provider built [2023/12/20-17:30:39.307] [Thread-21] [DEBUG] [dku.fs.local] - Enumerating local filesystem prefix=/ [2023/12/20-17:30:39.307] [Thread-22] [DEBUG] [dku.fs.local] - Enumeration done nb_paths=1 size=1168196 [2023/12/20-17:30:39.308] [Thread-21] [DEBUG] [dku.fs.local] - Enumeration done nb_paths=1 size=74543 [2023/12/20-17:30:39.308] [Thread-22] [INFO] [dku.input.push] - USTP: push selection.method=FULL records=-1 ratio=0.02 col=null [2023/12/20-17:30:39.308] [Thread-21] [INFO] [dku.input.push] - USTP: push selection.method=FULL records=-1 ratio=0.02 col=null [2023/12/20-17:30:39.313] [Thread-22] [INFO] [dku.format] - Extractor run: limit={"maxBytes":-1,"maxRecords":-1,"ordering":{"enabled":false,"rules":[]}} totalRecords=0 [2023/12/20-17:30:39.313] [Thread-21] [INFO] [dku.format] - Extractor run: limit={"maxBytes":-1,"maxRecords":-1,"ordering":{"enabled":false,"rules":[]}} totalRecords=0 [2023/12/20-17:30:39.317] [ActivityExecutor-36] [INFO] [dku.datasets.bloblike] running compute_flight_reviews_joined_NP - Clear partitions [2023/12/20-17:30:39.317] [Thread-21] [INFO] [dku] - getCompression filename=**flight_reviews.csv** [2023/12/20-17:30:39.317] [Thread-22] [INFO] [dku] - getCompression filename=**world_cities.csv** [2023/12/20-17:30:39.318] [Thread-21] [INFO] [dku] - getCompression filename=**flight_reviews.csv** [2023/12/20-17:30:39.318] [Thread-22] [INFO] [dku] - getCompression filename=**world_cities.csv** [2023/12/20-17:30:39.318] [Thread-21] [INFO] [dku.format] - Start uncompressed stream: /data/dataiku/datadir/uploads/DKU_TUT_FUZZY_JOIN/datasets/flight_reviews/flight_reviews.csv / totalRecsBefore=0 [2023/12/20-17:30:39.318] [Thread-22] [INFO] [dku.format] - Start uncompressed stream: /data/dataiku/datadir/uploads/DKU_TUT_FUZZY_JOIN/datasets/world_cities/world_cities.csv / totalRecsBefore=0 [2023/12/20-17:30:39.319] [Thread-22] [INFO] [dku] - getCompression filename=**world_cities.csv** [2023/12/20-17:30:39.318] [Thread-21] [INFO] [dku] - getCompression filename=**flight_reviews.csv** [2023/12/20-17:30:39.319] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - Building FS provider for dataset handler: DKU_TUT_FUZZY_JOIN.flight_reviews_joined [2023/12/20-17:30:39.327] [ActivityExecutor-36] [INFO] [dku.fs.s3] running compute_flight_reviews_joined_NP - Created S3 FS provider bucket=gis-data-us-east-1 effectivePath=/space-e9d099df-dku/node-dbbb02f6/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined from 'space-e9d099df-dku/node-dbbb02f6' and '/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined' [2023/12/20-17:30:39.327] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - FS Provider built [2023/12/20-17:30:39.372] [ActivityExecutor-36] [INFO] [dku.datasets.bloblike] running compute_flight_reviews_joined_NP - Clearing partition as a folder : 'NP' [2023/12/20-17:30:39.411] [ActivityExecutor-36] [INFO] [dku.aws.credentials] running compute_flight_reviews_joined_NP - AWS connection=dataiku-managed-storage authCtx=aron.elston@phytonconsulting.com assuming role=arn:aws:iam::538701811630:role/GIS/useast1/dku/s3-space-e9d099df-dku-us-east-1-dku [2023/12/20-17:30:39.411] [ActivityExecutor-36] [DEBUG] [dku.aws.credentials] running compute_flight_reviews_joined_NP - Credentials cache miss [2023/12/20-17:30:39.482] [Thread-21] [INFO] [dku.format] - after stream totalComp=74543 totalUncomp=74543 totalRec=998 [2023/12/20-17:30:39.482] [Thread-21] [INFO] [dku.format] - Extractor run done, totalCompressed=74543 totalRecords=998 [2023/12/20-17:30:39.482] [Thread-21] [DEBUG] [dku.datasets.pull] - pull background thread: ending queue, cursize=998 [2023/12/20-17:30:39.483] [Thread-21] [INFO] [dku.datasets.pull] - pull background thread finished for flight_reviews [2023/12/20-17:30:39.945] [ActivityExecutor-36] [DEBUG] [dku.aws.credentials] running compute_flight_reviews_joined_NP - Using context creds access=ASIAX23JBJ6XCGVKO2LQ [2023/12/20-17:30:39.948] [ActivityExecutor-36] [DEBUG] [dku.aws.credentials] running compute_flight_reviews_joined_NP - Truncated/slugified session name to 64 chars: dss-conn-dataiku-managed-storage-assumed-for-aron.elsto-0CBiIKMS [2023/12/20-17:30:40.061] [ActivityExecutor-36] [DEBUG] [dku.aws.credentials] running compute_flight_reviews_joined_NP - Got assumed role credentials from STS [2023/12/20-17:30:40.061] [ActivityExecutor-36] [DEBUG] [dku.aws.credentials] running compute_flight_reviews_joined_NP - Caching assumed STS credential [2023/12/20-17:30:40.141] [ActivityExecutor-36] [INFO] [dku.fs.s3] running compute_flight_reviews_joined_NP - Retrieving location from bucket [2023/12/20-17:30:40.253] [ActivityExecutor-36] [INFO] [dku.fs.s3] running compute_flight_reviews_joined_NP - Bucket location is US, using us-east-1 [2023/12/20-17:30:40.256] [ActivityExecutor-36] [DEBUG] [dku.fs.s3] running compute_flight_reviews_joined_NP - Done create S3 client [2023/12/20-17:30:40.269] [ActivityExecutor-36] [INFO] [dku.fs.s3] running compute_flight_reviews_joined_NP - Clearing S3 bucket gis-data-us-east-1 under space-e9d099df-dku/node-dbbb02f6/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined/ [2023/12/20-17:30:40.292] [ActivityExecutor-36] [INFO] [dku.fs.s3] running compute_flight_reviews_joined_NP - Deleting 1 entries [2023/12/20-17:30:40.388] [ActivityExecutor-36] [INFO] [dku.fs.s3] running compute_flight_reviews_joined_NP - Done clearing: deleted 1 entries [2023/12/20-17:30:40.388] [ActivityExecutor-36] [INFO] [dku.datasets.bloblike] running compute_flight_reviews_joined_NP - Done clearing partition 'NP' [2023/12/20-17:30:40.394] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - Building FS provider for dataset handler: DKU_TUT_FUZZY_JOIN.flight_reviews_joined [2023/12/20-17:30:40.395] [ActivityExecutor-36] [INFO] [dku.fs.s3] running compute_flight_reviews_joined_NP - Created S3 FS provider bucket=gis-data-us-east-1 effectivePath=/space-e9d099df-dku/node-dbbb02f6/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined from 'space-e9d099df-dku/node-dbbb02f6' and '/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined' [2023/12/20-17:30:40.395] [ActivityExecutor-36] [DEBUG] [dku.datasets.fsbased] running compute_flight_reviews_joined_NP - FS Provider built [2023/12/20-17:30:40.397] [ActivityExecutor-36] [INFO] [dku.aws.credentials] running compute_flight_reviews_joined_NP - AWS connection=dataiku-managed-storage authCtx=aron.elston@phytonconsulting.com assuming role=arn:aws:iam::538701811630:role/GIS/useast1/dku/s3-space-e9d099df-dku-us-east-1-dku [2023/12/20-17:30:40.398] [ActivityExecutor-36] [DEBUG] [dku.aws.credentials] running compute_flight_reviews_joined_NP - Credentials cache hit [2023/12/20-17:30:40.412] [ActivityExecutor-36] [DEBUG] [dku.fsproviders.hdfs] running compute_flight_reviews_joined_NP - Build HDFSProvider conn=null cpr=s3a://gis-data-us-east-1/space-e9d099df-dku/node-dbbb02f6/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined pWCR=/ crSA=s3a://gis-data-us-east-1 crWSA=/space-e9d099df-dku/node-dbbb02f6/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined rpWSA=/space-e9d099df-dku/node-dbbb02f6/managed-dss-data/DKU_TUT_FUZZY_JOIN/flight_reviews_joined [2023/12/20-17:30:40.416] [ActivityExecutor-36] [INFO] [com.dataiku.dip.input.formats.parquet.ParquetOutputWriter] running compute_flight_reviews_joined_NP - Creating UGI [2023/12/20-17:30:40.655] [ActivityExecutor-36] [WARN] [org.apache.hadoop.util.NativeCodeLoader] running compute_flight_reviews_joined_NP - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable [2023/12/20-17:30:40.699] [ActivityExecutor-36] [DEBUG] [dku.hadoop] running compute_flight_reviews_joined_NP - Initializing Hadoop FS with context UGI: dataiku_user (auth:PROXY) via dataiku (auth:SIMPLE) (login: dataiku (auth:SIMPLE)) rootPathURI=s3a://gis-data-us-east-1 [2023/12/20-17:30:40.714] [ActivityExecutor-36] [INFO] [dku.hadoop] running compute_flight_reviews_joined_NP - Forcing fs..impl.disable.cache because the config might not be taken into account [2023/12/20-17:30:41.290] [ActivityExecutor-36] [INFO] [org.apache.hadoop.conf.Configuration.deprecation] running compute_flight_reviews_joined_NP - fs.s3a.server-side-encryption-algorithm is deprecated. Instead, use fs.s3a.encryption.algorithm [2023/12/20-17:30:41.392] [ActivityExecutor-36] [WARN] [org.apache.hadoop.metrics2.impl.MetricsConfig] running compute_flight_reviews_joined_NP - Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties [2023/12/20-17:30:41.406] [ActivityExecutor-36] [INFO] [org.apache.hadoop.metrics2.impl.MetricsSystemImpl] running compute_flight_reviews_joined_NP - Scheduled Metric snapshot period at 10 second(s). [2023/12/20-17:30:41.407] [ActivityExecutor-36] [INFO] [org.apache.hadoop.metrics2.impl.MetricsSystemImpl] running compute_flight_reviews_joined_NP - s3a-file-system metrics system started [2023/12/20-17:30:42.168] [ActivityExecutor-36] [DEBUG] [dku.fsproviders.hdfs] running compute_flight_reviews_joined_NP - Built Hadoop FS for: s3a://gis-data-us-east-1 -> S3AFileSystem{uri=s3a://gis-data-us-east-1, workingDir=s3a://gis-data-us-east-1/user/dataiku_user, partSize=67108864, enableMultiObjectsDelete=true, maxKeys=5000, OpenFileSupport{changePolicy=ETagChangeDetectionPolicy mode=Server, defaultReadAhead=65536, defaultBufferSize=4096, defaultAsyncDrainThreshold=16000, defaultInputPolicy=default}, blockSize=33554432, multiPartThreshold=134217728, s3EncryptionAlgorithm='SSE_S3', blockFactory=org.apache.hadoop.fs.s3a.S3ADataBlocks$DiskBlockFactory@3fed124d, auditManager=Service ActiveAuditManagerS3A in state ActiveAuditManagerS3A: STARTED, auditor=LoggingAuditor{ID='414667c7-336d-4f54-a634-c1ac79f64fe7', headerEnabled=true, rejectOutOfSpan=false, isMultipartUploadEnabled=true}}, authoritativePath=[], useListV1=false, magicCommitter=true, boundedExecutor=BlockingThreadPoolExecutorService{SemaphoredDelegatingExecutor{permitCount=160, available=160, waiting=0}, activeCount=0}, unboundedExecutor=java.util.concurrent.ThreadPoolExecutor@7c6d6c1d[Running, pool size = 0, active threads = 0, queued tasks = 0, completed tasks = 0], credentials=AWSCredentialProviderList[refcount= 1: [TemporaryAWSCredentialsProvider], delegation tokens=disabled, DirectoryMarkerRetention{policy='delete'}, instrumentation {S3AInstrumentation{}}, ClientSideEncryption=false} [2023/12/20-17:30:42.871] [ActivityExecutor-36] [INFO] [org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter] running compute_flight_reviews_joined_NP - File Output Committer Algorithm version is 2 [2023/12/20-17:30:42.871] [ActivityExecutor-36] [INFO] [org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter] running compute_flight_reviews_joined_NP - FileOutputCommitter skip cleanup _temporary folders under output directory:false, ignore cleanup failures: false [2023/12/20-17:30:43.104] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.codec.CodecConfig] running compute_flight_reviews_joined_NP - Compression: SNAPPY [2023/12/20-17:30:43.118] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Parquet block size to 134217728 [2023/12/20-17:30:43.118] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Parquet page size to 1048576 [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Parquet dictionary page size to 1048576 [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Dictionary is on [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Validation is off [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Writer version is: PARQUET_1_0 [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Maximum row group padding size is 8388608 bytes [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Page size checking is: estimated [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Min row count for page size check is: 100 [2023/12/20-17:30:43.119] [ActivityExecutor-36] [INFO] [org.apache.parquet.hadoop.ParquetOutputFormat] running compute_flight_reviews_joined_NP - Max row count for page size check is: 10000 [2023/12/20-17:30:43.136] [ActivityExecutor-36] [ERROR] [dku.flow.jobrunner] running compute_flight_reviews_joined_NP - Activity unexpectedly failed com.dataiku.dip.utils.NotImplementedException: DSS type geopoint is not supported by Parquet writer at com.dataiku.dip.input.formats.parquet.ParquetSchemaBuilder.dssTypeToParquet(ParquetSchemaBuilder.java:94) at com.dataiku.dip.input.formats.parquet.ParquetSchemaBuilder.dssTypeToParquet(ParquetSchemaBuilder.java:60) at com.dataiku.dip.input.formats.parquet.ParquetSchemaBuilder.dssTypesToParquet(ParquetSchemaBuilder.java:54) at com.dataiku.dip.input.formats.parquet.ParquetSchemaBuilder.dssSchemaToParquet(ParquetSchemaBuilder.java:47) at com.dataiku.dip.input.formats.parquet.DSSRowWriteSupport.init(DSSRowWriteSupport.java:56) at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:388) at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:349) at com.dataiku.dip.input.formats.parquet.ParquetOutputWriter$1.run(ParquetOutputWriter.java:148) at com.dataiku.dip.input.formats.parquet.ParquetOutputWriter$1.run(ParquetOutputWriter.java:127) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) at com.dataiku.dip.input.formats.parquet.ParquetOutputWriter.init(ParquetOutputWriter.java:127) at com.dataiku.dip.dataflow.exec.stream.ToDatasetStreamer.init(ToDatasetStreamer.java:125) at com.dataiku.dip.dataflow.exec.stream.ToDatasetStreamer.getAsProcessor(ToDatasetStreamer.java:108) at com.dataiku.dip.dataflow.exec.AbstractInitializedRunner.initOutputs(AbstractInitializedRunner.java:128) at com.dataiku.dip.dataflow.exec.fuzzyjoin.FuzzyJoinRecipeRunner.init(FuzzyJoinRecipeRunner.java:57) at com.dataiku.dip.dataflow.jobrunner.ExecutionRunnablesBuilder.getRunnables(ExecutionRunnablesBuilder.java:89) at com.dataiku.dip.dataflow.jobrunner.ActivityRunner.runActivity(ActivityRunner.java:568) at com.dataiku.dip.dataflow.jobrunner.JobRunner.runActivity(JobRunner.java:165) at com.dataiku.dip.dataflow.jobrunner.JobRunner.access$900(JobRunner.java:45) at com.dataiku.dip.dataflow.jobrunner.JobRunner$ActivityExecutorThread.run(JobRunner.java:360)