Good Morning,
I have created a geoprocessing tool and deployed it to esri server (enterprise portal). The script does the following:
- asks the user to enter the URL for the rest endpoint of a feature service
- script takes that feature service, runs various processes on it in order to get the data into a pandas dataframe
- convert pandas dataframe to csv
The tricky part is getting that csv to the user.
Any help/guidance is greatly appreciated.
Here is the code for the script:
import json import urllib2 import urllib import os from datetime import datetime import time import pandas as pd import numpy as np import arcpy todays_date = datetime.date(datetime.now()).strftime("%Y%m%d") def get_json(url, params): request = urllib2.Request(url, params) response = urllib2.urlopen(request) json_response = response.read() cont = json.loads(json_response.decode('utf-8')) return cont def get_json_with_query(url, query): final_url = url + query request = urllib2.Request(final_url) response = urllib2.urlopen(request) json_response = response.read() cont = json.loads(json_response.decode('utf-8')) return cont def df_to_csv(df, filename): df.to_csv(filename, sep=',', index=False, encoding='utf-8') arcpy.AddMessage(' ') arcpy.AddMessage('csv creation complete') return def convert_floats_to_ints(df): df[df.select_dtypes(['float64']).columns] = df.select_dtypes(['float64']).apply(lambda x: x.astype(int)) return df def get_date_column_names(fs): column_names = [] for item in fs['fields']: if item['type'] == 'esriFieldTypeDate': column_names.append(item['name']) return column_names def convert_date(col_val): if col_val: try: conv_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(col_val / 1000.)) return conv_date except Exception as e: return col_val else: return col_val def fs_to_dataframe(fs): import pandas as pd data_dict = dict() count = 0 while count <= len(fs['features']): for item in fs['features']: data_dict[count] = item['attributes'] count += 1 df = pd.DataFrame.from_dict(data_dict, orient='index') df.fillna('', inplace=True) df.drop_duplicates(inplace=True) return df def dataframe_to_csv(df, layer_filename): arcpy.AddMessage(' ') arcpy.AddMessage('creating ' + layer_filename) df_to_csv(df, layer_filename) return def layers_to_csv(cont, fs_url, csv_path): arcpy.AddMessage(' ') arcpy.AddMessage('converting layers to csv files') for layer in cont['layers']: url = fs_url + '/' + str(layer['id']) fs = get_fs(url) layer_to_csv(fs, url, csv_path) return def tables_to_csv(cont, fs_url, csv_path): arcpy.AddMessage(' ') arcpy.AddMessage('converting tables to csv files') for layer in cont['tables']: url = fs_url + '/' + str(layer['id']) fs = get_fs(url) layer_to_csv(fs, url, csv_path) return def get_fs(fs_url): params = urllib.urlencode({'f': 'pjson'}) cont = get_json(fs_url, params) return cont def get_max_record_count(feature_service): return feature_service['maxRecordCount'] def get_total_record_count(feature_service_url): total_records_count_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=&returnGeometry=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=true&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson' url = feature_service_url + '/' count = get_json_with_query(url, total_records_count_query) arcpy.AddMessage(' ') arcpy.AddMessage('There are ' + str(count['count']) + ' total records for this layer.') return count['count'] def layer_to_csv(layer, fs_url, csv_path): fs_max = get_max_record_count(layer) fs_ttl = get_total_record_count(fs_url) layer_filename = layer['name'] + '_' + todays_date + '.csv' filename = os.path.join(csv_path, layer_filename) objectid = fs_max objid1 = 0 objid2 = fs_max colNames = get_date_column_names(layer) dataframe = [] url = fs_url + '/' step = 1 if fs_ttl < fs_max: default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson' arcpy.AddMessage(' ') arcpy.AddMessage(str(fs_ttl) + ' records to download...') arcpy.AddMessage(' ') fs = get_json_with_query(url, default_query) df = fs_to_dataframe(fs) for col in colNames: if df[col].dtype == np.int64: df[col] = df[col].apply(convert_date) dataframe_to_csv(df, filename) arcpy.AddMessage(' ') arcpy.AddMessage('layer to csv process complete') return else: while fs_ttl > 0: arcpy.AddMessage(' ') arcpy.AddMessage(str(fs_ttl) + ' records to download...') arcpy.AddMessage(' ') objectid_query = 'query?where=objectid+>+' + str(objid1) + '+and+objectid+<%3D+' + str( objid2) + '&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson' fs = get_json_with_query(url, objectid_query) df = fs_to_dataframe(fs) for col in colNames: if df[col].dtype == np.int64: df[col] = df[col].apply(convert_date) dataframe.append(df) objectid += fs_max objid1 += fs_max objid2 += fs_max fs_ttl -= fs_max step += 1 df_merged = pd.concat(dataframe) df_merged.reset_index(drop=True, inplace=True) dataframe_to_csv(df_merged, filename) arcpy.AddMessage(' ') arcpy.AddMessage('layer to csv process complete') return class Toolbox(object): def __init__(self): """Define the toolbox (the name of the toolbox is the name of the .pyt file).""" self.label = "rest_to_csv" self.alias = "resttocsv" # List of tool classes associated with this toolbox self.tools = [RESTtoCSV] class RESTtoCSV(object): def __init__(self): """Define the tool (tool name is the name of the class).""" self.label = "RESTtoCSV" self.description = "By providing the URL to the REST endpoint of the Feature Service this script will create a csv file for all layers and tables that are within the Feature Service" self.canRunInBackground = False def getParameterInfo(self): """Define parameter definitions""" # Input Features parameter in_features = arcpy.Parameter( displayName="Feature Service URL", name="feature_service_url", datatype="String", parameterType="Required", direction="Input") save_location = arcpy.Parameter( displayName="Save CSV to...", name="csv_save_location", datatype="DEFolder", parameterType="Required", direction="Input") parameters = [in_features,save_location] return parameters def isLicensed(self): """Set whether tool is licensed to execute.""" return True def updateParameters(self, parameters): """Modify the values and properties of parameters before internal validation is performed. This method is called whenever a parameter has been changed.""" return def updateMessages(self, parameters): """Modify the messages created by internal validation for each tool parameter. This method is called after internal validation.""" return def execute(self, parameters, messages): """The source code of the tool.""" fs_url = parameters[0].valueAsText csv_path = parameters[1].valueAsText #default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson' fs = get_fs(fs_url) if 'layers' in fs: layers_to_csv(fs,fs_url,csv_path) if 'tables' in fs: tables_to_csv(fs,fs_url,csv_path) if 'id' in fs: layer_to_csv(fs,fs_url,csv_path) return
oh, ok. So any output will never directly be provided to the user as something they can choose where to save it, it gets saved on the server in the data store then the user can grab it from there.
I agree with you, the ArcGIS Documentation on geoprocessing scripts and whatnot is way too sparse.
Thanks for taking the time to work through this with me. Most appreciated.
No prob. Just to clarify, as an output it would go to the listed ArcGIS server jobs directory, if hardcoded you could put it into a datastore location.
roger that.
thanks again.