how can I get the output of my geoprocess (a csv file) to the user?

MarkJones7 · ‎08-13-2020

Good Morning,

I have created a geoprocessing tool and deployed it to esri server (enterprise portal). The script does the following:

- asks the user to enter the URL for the rest endpoint of a feature service

- script takes that feature service, runs various processes on it in order to get the data into a pandas dataframe

- convert pandas dataframe to csv

The tricky part is getting that csv to the user.

Any help/guidance is greatly appreciated.

Here is the code for the script:

import json
import urllib2
import urllib
import os
from datetime import datetime
import time
import pandas as pd
import numpy as np
import arcpy

todays_date = datetime.date(datetime.now()).strftime("%Y%m%d")


def get_json(url, params):
 request = urllib2.Request(url, params)
 response = urllib2.urlopen(request)
 json_response = response.read()

 cont = json.loads(json_response.decode('utf-8'))

 return cont


def get_json_with_query(url, query):
 final_url = url + query
 request = urllib2.Request(final_url)
 response = urllib2.urlopen(request)
 json_response = response.read()

 cont = json.loads(json_response.decode('utf-8'))

 return cont


def df_to_csv(df, filename):
 df.to_csv(filename, sep=',', index=False, encoding='utf-8')
 arcpy.AddMessage(' ')
 arcpy.AddMessage('csv creation complete')

 return


def convert_floats_to_ints(df):
 df[df.select_dtypes(['float64']).columns] = df.select_dtypes(['float64']).apply(lambda x: x.astype(int))

 return df


def get_date_column_names(fs):
 column_names = []

 for item in fs['fields']:
 if item['type'] == 'esriFieldTypeDate':
 column_names.append(item['name'])

 return column_names


def convert_date(col_val):
 if col_val:
 try:
 conv_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(col_val / 1000.))
 return conv_date
 except Exception as e:
 return col_val
 else:
 return col_val


def fs_to_dataframe(fs):
 import pandas as pd

 data_dict = dict()
 count = 0
 while count <= len(fs['features']):
 for item in fs['features']:
 data_dict[count] = item['attributes']
 count += 1

 df = pd.DataFrame.from_dict(data_dict, orient='index')

 df.fillna('', inplace=True)

 df.drop_duplicates(inplace=True)

 return df


def dataframe_to_csv(df, layer_filename):
 arcpy.AddMessage(' ')

 arcpy.AddMessage('creating ' + layer_filename)

 df_to_csv(df, layer_filename)

 return


def layers_to_csv(cont, fs_url, csv_path):
 arcpy.AddMessage(' ')
 arcpy.AddMessage('converting layers to csv files')

 for layer in cont['layers']:
 url = fs_url + '/' + str(layer['id'])
 fs = get_fs(url)
 layer_to_csv(fs, url, csv_path)

 return


def tables_to_csv(cont, fs_url, csv_path):
 arcpy.AddMessage(' ')
 arcpy.AddMessage('converting tables to csv files')

 for layer in cont['tables']:
 url = fs_url + '/' + str(layer['id'])
 fs = get_fs(url)
 layer_to_csv(fs, url, csv_path)

 return


def get_fs(fs_url):
 params = urllib.urlencode({'f': 'pjson'})
 cont = get_json(fs_url, params)
 return cont


def get_max_record_count(feature_service):
 return feature_service['maxRecordCount']


def get_total_record_count(feature_service_url):
 total_records_count_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=&returnGeometry=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=true&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
 url = feature_service_url + '/'
 count = get_json_with_query(url, total_records_count_query)
 arcpy.AddMessage(' ')
 arcpy.AddMessage('There are ' + str(count['count']) + ' total records for this layer.')
 return count['count']

def layer_to_csv(layer, fs_url, csv_path):
 fs_max = get_max_record_count(layer)
 fs_ttl = get_total_record_count(fs_url)
 layer_filename = layer['name'] + '_' + todays_date + '.csv'
 filename = os.path.join(csv_path, layer_filename)
 objectid = fs_max
 objid1 = 0
 objid2 = fs_max
 colNames = get_date_column_names(layer)
 dataframe = []
 url = fs_url + '/'
 step = 1

 if fs_ttl < fs_max:
 default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
 arcpy.AddMessage(' ')
 arcpy.AddMessage(str(fs_ttl) + ' records to download...')
 arcpy.AddMessage(' ')
 fs = get_json_with_query(url, default_query)
 df = fs_to_dataframe(fs)

 for col in colNames:
 if df[col].dtype == np.int64:
 df[col] = df[col].apply(convert_date)

 dataframe_to_csv(df, filename)

 arcpy.AddMessage(' ')

 arcpy.AddMessage('layer to csv process complete')

 return

 else:

 while fs_ttl > 0:
 arcpy.AddMessage(' ')
 arcpy.AddMessage(str(fs_ttl) + ' records to download...')
 arcpy.AddMessage(' ')

 objectid_query = 'query?where=objectid+>+' + str(objid1) + '+and+objectid+<%3D+' + str(
 objid2) + '&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'

 fs = get_json_with_query(url, objectid_query)
 df = fs_to_dataframe(fs)

 for col in colNames:
 if df[col].dtype == np.int64:
 df[col] = df[col].apply(convert_date)

 dataframe.append(df)

 objectid += fs_max
 objid1 += fs_max
 objid2 += fs_max
 fs_ttl -= fs_max
 step += 1

 df_merged = pd.concat(dataframe)
 df_merged.reset_index(drop=True, inplace=True)

 dataframe_to_csv(df_merged, filename)

 arcpy.AddMessage(' ')

 arcpy.AddMessage('layer to csv process complete')

 return


class Toolbox(object):
 def __init__(self):
 """Define the toolbox (the name of the toolbox is the name of the
 .pyt file)."""
 self.label = "rest_to_csv"
 self.alias = "resttocsv"

 # List of tool classes associated with this toolbox
 self.tools = [RESTtoCSV]


class RESTtoCSV(object):
 def __init__(self):
 """Define the tool (tool name is the name of the class)."""
 self.label = "RESTtoCSV"
 self.description = "By providing the URL to the REST endpoint of the Feature Service this script will create a csv file for all layers and tables that are within the Feature Service"
 self.canRunInBackground = False
 

 def getParameterInfo(self):
 """Define parameter definitions"""
 
 # Input Features parameter
 in_features = arcpy.Parameter(
 displayName="Feature Service URL", 
 name="feature_service_url",
 datatype="String",
 parameterType="Required",
 direction="Input")

 save_location = arcpy.Parameter(
 displayName="Save CSV to...", 
 name="csv_save_location",
 datatype="DEFolder",
 parameterType="Required",
 direction="Input")



 parameters = [in_features,save_location]

 return parameters

 def isLicensed(self):
 """Set whether tool is licensed to execute."""
 return True

 def updateParameters(self, parameters):
 """Modify the values and properties of parameters before internal
 validation is performed. This method is called whenever a parameter
 has been changed."""
 return

 def updateMessages(self, parameters):
 """Modify the messages created by internal validation for each tool
 parameter. This method is called after internal validation."""
 return

 def execute(self, parameters, messages):
 """The source code of the tool."""
 
 fs_url = parameters[0].valueAsText 

 csv_path = parameters[1].valueAsText

 #default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'

 
 fs = get_fs(fs_url)


 if 'layers' in fs:

 layers_to_csv(fs,fs_url,csv_path)

 if 'tables' in fs:

 tables_to_csv(fs,fs_url,csv_path)

 if 'id' in fs:

 layer_to_csv(fs,fs_url,csv_path)

 
 return

MarkJones7 · ‎08-13-2020

oh, ok. So any output will never directly be provided to the user as something they can choose where to save it, it gets saved on the server in the data store then the user can grab it from there.

I agree with you, the ArcGIS Documentation on geoprocessing scripts and whatnot is way too sparse.

Thanks for taking the time to work through this with me. Most appreciated.

DavidPike · ‎08-13-2020

No prob. Just to clarify, as an output it would go to the listed ArcGIS server jobs directory, if hardcoded you could put it into a datastore location.

MarkJones7 · ‎08-13-2020

roger that.

thanks again.