Python Script Running Slower

GIS_geek · ‎02-07-2024

Hello,

I updated ArcGIS Pro from 2.9.10 to 3.0.6, thus updating my Python environment from 3.7.11 to 3.9.11. My organization runs a daily script that downloads attachments from ArcGIS Online. The script ran smooth and quick with Python 3.7 (10-15 minutes) but now with 3.9 it takes longer (90 minutes). Any idea why this might be happening? The script is very similar to the one on this guide.

import arcgis
from arcgis.gis import GIS
import os, re, csv
import shutil
import pandas as pd
import datetime
from pandas import ExcelWriter
import xlsxwriter
import arcpy


# Varialbles
agolURL = "https://irwd.maps.arcgis.com"
username = ""
password = ""
survey_item_id = "survey"
c_Directory = "O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource"
directory = "Survey123"
save_path = "O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123"
keep_org_item = False
store_csv_w_attachments = False

# Login credential variable
# **Using personal AGOL credentials to log in**
gis = GIS(agolURL, username, password)
survey_item_id = gis.content.get(survey_item_id)

# Create Downloads directory
print('Creating directory.')
arcpy.CreateFolder_management(c_Directory, directory)
downloads_path = os.path.join(c_Directory, directory)
# Overwrite Downloads directory if it exist
if os.path.exists(downloads_path):
    shutil.rmtree(downloads_path)
    os.mkdir(downloads_path)
print("Directory '% s' created." % directory)


# Survey data exported to excel.
print("Exporting survey data to excel.")
rel_fs = survey_item_id.related_items('Survey2Service', 'forward')[0]
item_excel = rel_fs.export(title=survey_item_id.title, export_format='Excel')
item_excel.download(save_path=save_path)
if not bool (keep_org_item):
    item_excel.delete(force=True)


# Survey attachments
print("Exporting attachments.")
layer = rel_fs.layers + rel_fs.tables
for i in layer:
    if i.properties.hasAttachments == True:
        feature_layer_folder = os.path.join(save_path, '{}_attachments'.format\
        (re.sub(r'[^A-Za-z0-9]+', '', i.properties.name)))
        # Overwrite survey_attachments folder if it exists.
        if os.path.exists(feature_layer_folder):
            shutil.rmtree(feature_layer_folder)
        os.mkdir(feature_layer_folder)
        if bool(store_csv_w_attachments):
            path = os.path.join(feature_layer_folder, "{}_attachments.csv".format(i.properties.name))
        elif not bool(store_csv_w_attachments):
            path = os.path.join(save_path, "{}_attachments.csv".format(i.properties.name))
        csv_fields = ['ObjectID', 'AttachmentPath']
        with open(path, 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(csv_fields)

            feature_object_ids = i.query(where="1=1", return_ids_only=True, order_by_fields='objectid ASC')
            for j in range(len(feature_object_ids['objectIds'])):
                current_oid = feature_object_ids['objectIds'][j]
                current_oid_attachments = i.attachments.get_list(oid=current_oid)

                if len(current_oid_attachments) > 0:
                    for k in range(len(current_oid_attachments)):
                        attachment_id = current_oid_attachments[k]['id']
                        current_attachment_path = i.attachments.download(oid=current_oid, attachment_id=attachment_id, save_path=feature_layer_folder)
                        csvwriter.writerow([current_oid, os.path.join('{}_attachments'.format(re.sub(r'[^A-Za-z0-9]+', '', i.properties.name)), os.path.split(current_attachment_path[0])[1])])


# Joining the two excel files
print("Joining excel tables.")
testSurvey = pd.read_excel('O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123/Log-a-Leak.xlsx')
surveyAttach = pd.read_csv('O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123/survey_attachments.csv')
fileJoin = testSurvey[['ObjectID', 'GlobalID', 'CreationDate', 'Creator', 'EditDate', 'Editor', 'Name (optional)', 'Phone number (optional)', 'Email (optional)', 'Leak type', 'Date and time when the issue occurred' ,'x', 'y']].merge(surveyAttach[['ObjectID', 'AttachmentPath']], on = 'ObjectID', how = 'left')


# Creating new file
print('Creating new file from joins.')
#surveyResults = 'C:/Downloads/Survey123/Results_Join.xlsx'
#fileJoin.to_excel(surveyResults, index = False)
surveyResults = pd.ExcelWriter('O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123/Results_Join.xlsx', engine='xlsxwriter')
fileJoin.to_excel(surveyResults, sheet_name='Survey123' ,index = False)
surveyResults.save()


# Adding hyperlink
print("Adding hyperlink.")
df = pd.read_excel(surveyResults)
#df['AttachmentPath'] = '=HYPERLINK("O:\\Water Resources\\Conservation\\Water Efficiency Tool\\Spot_N_Stop\\datasource\\Survey123\\' + df["AttachmentPath"] + '")'
df['AttachmentPath'] = 'O:\\Water Resources\\Conservation\\Water Efficiency Tool\\Spot_N_Stop\\datasource\\Survey123\\' + df["AttachmentPath"]
df.to_excel('O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123/SurveyData.xlsx', index=False)


# Deleting files
print('Deleting files.')
deleteSurvey = 'O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123/Log-a-Leak.xlsx'
deleteAttachments = 'O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123/survey_attachments.csv'
deleteJoinedResults = 'O:/Water Resources/Conservation/Water Efficiency Tool/Spot_N_Stop/datasource/Survey123/Results_Join.xlsx'
deleteFiles = (deleteSurvey, deleteAttachments, deleteJoinedResults)

for file in deleteFiles:
    if os.path.exists(file):
        os.remove(file)
        print('Removing ' + file.title())
    else:
        print(file.title() + ' does not exist.')


print('Process complete!')

BobBooth1 · ‎02-08-2024

It might be useful to know what section slowed down. You could add timing/print statements to the different sections to check how long they run.

GIS_geek · ‎02-08-2024

Hi @BobBooth1,

It slows down at the "Exporting attachments" section. I checked the folder the attachments get saved to and they do save but at a very slow rate. After about an hour and a half I canceled the process and used a machine using Python 3.7.11. I will do some research and look for the code to add timing print statements. Thank you!