AnsweredAssumed Answered

how can I get the output of my geoprocess (a csv file) to the user?

Question asked by markjones_tps on Aug 13, 2020
Latest reply on Aug 13, 2020 by markjones_tps

Good Morning,

 

I have created a geoprocessing tool and deployed it to esri server (enterprise portal). The script does the following:

- asks the user to enter the URL for the rest endpoint of a feature service

- script takes that feature service, runs various processes on it in order to get the data into a pandas dataframe

- convert pandas dataframe to csv

 

The tricky part is getting that csv to the user.

 

Any help/guidance is greatly appreciated.

 

Here is the code for the script:

import json
import urllib2
import urllib
import os
from datetime import datetime
import time
import pandas as pd
import numpy as np
import arcpy

todays_date = datetime.date(datetime.now()).strftime("%Y%m%d")


def get_json(url, params):
request = urllib2.Request(url, params)
response = urllib2.urlopen(request)
json_response = response.read()

cont = json.loads(json_response.decode('utf-8'))

return cont


def get_json_with_query(url, query):
final_url = url + query
request = urllib2.Request(final_url)
response = urllib2.urlopen(request)
json_response = response.read()

cont = json.loads(json_response.decode('utf-8'))

return cont


def df_to_csv(df, filename):
df.to_csv(filename, sep=',', index=False, encoding='utf-8')
arcpy.AddMessage(' ')
arcpy.AddMessage('csv creation complete')

return


def convert_floats_to_ints(df):
df[df.select_dtypes(['float64']).columns] = df.select_dtypes(['float64']).apply(lambda x: x.astype(int))

return df


def get_date_column_names(fs):
column_names = []

for item in fs['fields']:
if item['type'] == 'esriFieldTypeDate':
column_names.append(item['name'])

return column_names


def convert_date(col_val):
if col_val:
try:
conv_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(col_val / 1000.))
return conv_date
except Exception as e:
return col_val
else:
return col_val


def fs_to_dataframe(fs):
import pandas as pd

data_dict = dict()
count = 0
while count <= len(fs['features']):
for item in fs['features']:
data_dict[count] = item['attributes']
count += 1

df = pd.DataFrame.from_dict(data_dict, orient='index')

df.fillna('', inplace=True)

df.drop_duplicates(inplace=True)

return df


def dataframe_to_csv(df, layer_filename):
arcpy.AddMessage(' ')

arcpy.AddMessage('creating ' + layer_filename)

df_to_csv(df, layer_filename)

return


def layers_to_csv(cont, fs_url, csv_path):
arcpy.AddMessage(' ')
arcpy.AddMessage('converting layers to csv files')

for layer in cont['layers']:
url = fs_url + '/' + str(layer['id'])
fs = get_fs(url)
layer_to_csv(fs, url, csv_path)

return


def tables_to_csv(cont, fs_url, csv_path):
arcpy.AddMessage(' ')
arcpy.AddMessage('converting tables to csv files')

for layer in cont['tables']:
url = fs_url + '/' + str(layer['id'])
fs = get_fs(url)
layer_to_csv(fs, url, csv_path)

return


def get_fs(fs_url):
params = urllib.urlencode({'f': 'pjson'})
cont = get_json(fs_url, params)
return cont


def get_max_record_count(feature_service):
return feature_service['maxRecordCount']


def get_total_record_count(feature_service_url):
total_records_count_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=&returnGeometry=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=true&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
url = feature_service_url + '/'
count = get_json_with_query(url, total_records_count_query)
arcpy.AddMessage(' ')
arcpy.AddMessage('There are ' + str(count['count']) + ' total records for this layer.')
return count['count']

def layer_to_csv(layer, fs_url, csv_path):
fs_max = get_max_record_count(layer)
fs_ttl = get_total_record_count(fs_url)
layer_filename = layer['name'] + '_' + todays_date + '.csv'
filename = os.path.join(csv_path, layer_filename)
objectid = fs_max
objid1 = 0
objid2 = fs_max
colNames = get_date_column_names(layer)
dataframe = []
url = fs_url + '/'
step = 1

if fs_ttl < fs_max:
default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'
arcpy.AddMessage(' ')
arcpy.AddMessage(str(fs_ttl) + ' records to download...')
arcpy.AddMessage(' ')
fs = get_json_with_query(url, default_query)
df = fs_to_dataframe(fs)

for col in colNames:
if df[col].dtype == np.int64:
df[col] = df[col].apply(convert_date)

dataframe_to_csv(df, filename)

arcpy.AddMessage(' ')

arcpy.AddMessage('layer to csv process complete')

return

else:

while fs_ttl > 0:
arcpy.AddMessage(' ')
arcpy.AddMessage(str(fs_ttl) + ' records to download...')
arcpy.AddMessage(' ')

objectid_query = 'query?where=objectid+>+' + str(objid1) + '+and+objectid+<%3D+' + str(
objid2) + '&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'

fs = get_json_with_query(url, objectid_query)
df = fs_to_dataframe(fs)

for col in colNames:
if df[col].dtype == np.int64:
df[col] = df[col].apply(convert_date)

dataframe.append(df)

objectid += fs_max
objid1 += fs_max
objid2 += fs_max
fs_ttl -= fs_max
step += 1

df_merged = pd.concat(dataframe)
df_merged.reset_index(drop=True, inplace=True)

dataframe_to_csv(df_merged, filename)

arcpy.AddMessage(' ')

arcpy.AddMessage('layer to csv process complete')

return


class Toolbox(object):
def __init__(self):
"""Define the toolbox (the name of the toolbox is the name of the
.pyt file)."""
self.label = "rest_to_csv"
self.alias = "resttocsv"

# List of tool classes associated with this toolbox
self.tools = [RESTtoCSV]


class RESTtoCSV(object):
def __init__(self):
"""Define the tool (tool name is the name of the class)."""
self.label = "RESTtoCSV"
self.description = "By providing the URL to the REST endpoint of the Feature Service this script will create a csv file for all layers and tables that are within the Feature Service"
self.canRunInBackground = False


def getParameterInfo(self):
"""Define parameter definitions"""

# Input Features parameter
in_features = arcpy.Parameter(
displayName="Feature Service URL",
name="feature_service_url",
datatype="String",
parameterType="Required",
direction="Input")

save_location = arcpy.Parameter(
displayName="Save CSV to...",
name="csv_save_location",
datatype="DEFolder",
parameterType="Required",
direction="Input")



parameters = [in_features,save_location]

return parameters

def isLicensed(self):
"""Set whether tool is licensed to execute."""
return True

def updateParameters(self, parameters):
"""Modify the values and properties of parameters before internal
validation is performed. This method is called whenever a parameter
has been changed."""
return

def updateMessages(self, parameters):
"""Modify the messages created by internal validation for each tool
parameter. This method is called after internal validation."""
return

def execute(self, parameters, messages):
"""The source code of the tool."""

fs_url = parameters[0].valueAsText

csv_path = parameters[1].valueAsText

#default_query = 'query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&having=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=xyFootprint&resultOffset=&resultRecordCount=&returnTrueCurves=false&returnExceededLimitFeatures=false&quantizationParameters=&returnCentroid=false&sqlFormat=none&resultType=&f=pjson'


fs = get_fs(fs_url)


if 'layers' in fs:

layers_to_csv(fs,fs_url,csv_path)

if 'tables' in fs:

tables_to_csv(fs,fs_url,csv_path)

if 'id' in fs:

layer_to_csv(fs,fs_url,csv_path)


return

Outcomes