Why does this code take longer to execute over time?

GerryGabrisch · ‎01-07-2022

This python script selects groups of data on specific days, processes those data, and selects another group of data to process. The code executes quickly when first executed. As the code chugs along it gets slower and slower. There are no nested cursors, no nested loops - just selections and data processing. Why is code execution getting slower over time?

Any tips or tricks for preventing this type of slowdown is appreciated.

import sys
import traceback
import os
import shutil
import arcpy
import shapefile


try:
    arcpy.env.overwriteOutput = True
    #The input shapefile...
    in_fc = r"C:\GIS\telemetrytags\projected_data\telemetryUTM.shp"
    
    #An empty directory to store the geometric average outputs
    out_dir = r'C:\GIS\telemetrytags\individual_fish_individual_days'
    
    #For the first cursor only these fields are needed...
    fields = ['TagID', 'Date'] 
    
    #A list to hold all of the tag ids...
    tag_ids = []
    #A list to hold the survey dates
    ping_dates = []
        
    #Iterate the shapefile and get a list of all unique dates and a list of all unique tags.
    with arcpy.da.SearchCursor(in_fc, fields) as cursor:
        for row in cursor:
            if row[0] not in tag_ids:
                tag_ids.append(row[0])
            if row[1] not in ping_dates:  
                ping_dates.append(row[1])
    #Do you need to delete row now with python 3?  Doing so did not speed up the code which lags over time...    
    del row
   
    #This is the field list for populating the rest of the attribute table of the mean center files...
    fields = ['Origin','Stock','Sex','FL','Destinatio', 'Mortality_']

    #Build the query to select individual tags on individual days...
    for tag in tag_ids:
        for ping in ping_dates:
            #Build the query...
            query =  '\"TagID\" = ' + str(int(tag)) + ' And \"Date\" = timestamp ' +'\'' + str(ping).split(' ')[0] +'\''
            print(query)
            
            #Select all those tags on that particular day...
            this_fish, ping_count = arcpy.management.SelectLayerByAttribute(in_fc, selection_type="NEW_SELECTION", where_clause=query)
            print('ping return count = ', ping_count)
            
            #If a fish was not found on a particular day then ignore that selection...
            if ping_count == 0:
                pass
            
            #If there is only one ping then just rename the original file and move it to the out_dir.  
            if int(ping_count) == 1:
                out_fc = out_dir + "\\"  + str(int(tag)) + '_' + str(ping).split(' ')[0] + '.shp'
                arcpy.CopyFeatures_management(this_fish, out_fc)
                #Clean up that table for the final merging of shapefiles...
                arcpy.DeleteField_management(out_fc, ['MEAS','Pos_ft','CumPos_ft','Pos_RM','CumPos_RM','Time','Channel','Antenna','Power','Latitude','Longitude','NEAR_FID','NEAR_DIST','NEAR_X','NEAR_Y','Data_Inclu'])          
            
            #More that one ping on a given day needs to be averaged...
            if int(ping_count) > 1:
                #Build the output file and path for the averaged data...
                out_fc = out_dir + "\\"  + str(int(tag)) + '_' + str(ping).split(' ')[0] + '.shp'
                date = str(ping).split(' ')[0]
                #Create the mean center file based on the geospatial locations...
                arcpy.stats.MeanCenter(this_fish, out_fc, Weight_Field='Power')
                
                #The mean center operation output does not preserve attributes.  Create them now...
                arcpy.AddField_management(out_fc, 'TagID', "SHORT")
                arcpy.AddField_management(out_fc, 'Date_', "TEXT", field_length='12')
                arcpy.AddField_management(out_fc, 'Origin', "TEXT", field_length='3')
                arcpy.AddField_management(out_fc, 'Stock', "TEXT", field_length='20')
                arcpy.AddField_management(out_fc, 'Sex', "TEXT", field_length='1')
                arcpy.AddField_management(out_fc, 'FL', "SHORT")
                arcpy.AddField_management(out_fc, 'Destinatio', "TEXT", field_length='10')
                arcpy.AddField_management(out_fc, 'Mortality_', "TEXT", field_length='20')
                
                #These values are all the same between records, get the values to populate the out_fc...
                with arcpy.da.SearchCursor(this_fish, fields) as cursor:
                    for row in cursor:
                        Origin = row[0]
                        Stock = row[1]
                        Sex = row[2]
                        FL = int(row[3])
                        Destinatio = row[4]
                        Mortality_ = row[5] 
                    del row
               
                arcpy.CalculateField_management(out_fc, 'TagID', int(tag),  "PYTHON3")
                arcpy.CalculateField_management(out_fc, 'Date_', "'"+ date +"'",  "PYTHON3")
                arcpy.CalculateField_management(out_fc, 'Origin', "'"+Origin+"'",  "PYTHON3")
                arcpy.CalculateField_management(out_fc, 'Stock', "'"+Stock+"'",  "PYTHON3")
                arcpy.CalculateField_management(out_fc, 'Sex', "'"+Sex+"'",  "PYTHON3")
                arcpy.CalculateField_management(out_fc, 'FL', FL,  "PYTHON3")
                arcpy.CalculateField_management(out_fc, 'Destinatio', "'"+Destinatio+"'",  "PYTHON3")
                arcpy.CalculateField_management(out_fc, 'Mortality_', "'"+Mortality_+"'",  "PYTHON3")

except Exception:
    tb = sys.exc_info()[2]
    tbinfo = traceback.format_tb(tb)[0]
    pymsg = "PYTHON ERRORS:\nTraceback info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()[1])
    msgs = "ArcPy ERRORS:\n" + arcpy.GetMessages(2) + "\n"
    arcpy.AddError(pymsg)
    arcpy.AddError(msgs)

Luke_Pinner · ‎01-07-2022

@GerryGabrisch wrote:
... As far as code execution goes I am running this as a stand-alone Python script in WingIDE. ArcGIS Pro is not actively running.

Yes, you already stated that. Doesn't matter, geoprocessing operations are logged to feature class metadata and an external file in scripts, turn them both off.

arcpy.SetLogMetadata(False)
arcpy.SetLogHistory(False)

View solution in original post

Luke_Pinner · ‎01-07-2022

Some ideas in this thread. In particular,

try using MakeFeatureLayer and pass that to Select Layer By Attribute instead of a feature class (or use MakeFeatureLayer with a where clause and then use GetCount)
turn off geoprocessing logging and history

GerryGabrisch · ‎01-07-2022

Thanks, I am running the code in a Python IDE and not in a Python window in ArcGIS Pro. I am assuming there is no geoprocessing logging happening. Is this correct or is geoprocessing logging a basic function in arcpy?

GerryGabrisch · ‎01-07-2022

Should I delete the feature layer for each selection after it has executed the code for that selection or just keep reassigning the variable each time the code makes a new feature layer?

Luke_Pinner · ‎01-07-2022

Geoprocessing operations are logged to feature class metadata and an external file in scripts (%AppData%\Roaming\Esri\ArcGISPro\ArcToolbox\History), turn them both off.

GerryGabrisch · ‎01-07-2022

Turning off geoprocessing logging is always a good idea to keep your CITRIX profile small. As far as code execution goes I am running this as a stand-alone Python script in WingIDE. ArcGIS Pro is not actively running.

Luke_Pinner · ‎01-07-2022

@GerryGabrisch wrote:
... As far as code execution goes I am running this as a stand-alone Python script in WingIDE. ArcGIS Pro is not actively running.

Yes, you already stated that. Doesn't matter, geoprocessing operations are logged to feature class metadata and an external file in scripts, turn them both off.

arcpy.SetLogMetadata(False)
arcpy.SetLogHistory(False)

GerryGabrisch · ‎01-08-2022

That's what I was looking for, thanks.

GerryGabrisch · ‎01-08-2022

Yes, adding the make feature layer and turning off the logging vastly improved code preformance.

Anonymous User · ‎01-10-2022

I know this is already solved, but thought I'd offer another way of achieving this that may (or may not) speed things up some more. This uses a template Featureclass instead of adding the fields individually and uses cursors to insert the data instead of calculating each field individually. I'm not sure if the stats. will write to the memory workspace so this process may be DOA.

# Create dictionary with values that are created within the code (not fc)
master_dict = {'TagID': tag,
              'Date_': date}

# Create the output FC from a template FC
out_fc = arcpy.CreateFeatureclass_management('out_path', 'out_name', 'POINT', 'Your template FC')

# output the stats to a memory fc
meanCenter = arcpy.stats.MeanCenter(this_fish, 'memory\\tmpfc', Weight_Field='Power')

# list of fields from the meanCenter. You can filter out unwanted fields here 
mcFields = [f.name for f in arcpy.ListFields(meanCenter)]

# Create dictionary of values from the meanCenter featureclass
with arcpy.da.SearchCursor(meanCenter, mcFields) as sCur:
    for row in sCur:
        temp_dict = {k: v for k, v in zip(sCur.fields, row)}

# Merge dictionary from the featureclass to the main dictionary
master_dict.update(temp_dict)

# Prints a list of field name and value for QC
# row_values = [(k,v) for k,v in master_dict.items()]

# List of field names from the keys for QC
flds = [k for k in master_dict.keys()]

# use insert cursor to insert the values from the dictionary into the out_fc
with arcpy.da.InsertCursor(out_fc, flds) as iCur:
    iCur.insertRow([v for k, v in master_dict.items()])

# if you wanted to use an unpdate cursor:
# use update cursor to insert the values from the dictionary into the out_fc
# with arcpy.da.UpdateCursor(fc, flds) as uCur:
#     for row in uCur:
#         for fld in flds:
#             # uses the fld/key to get index and get value from the dictionary.
#             row[flds.index(fld)] = master_dict.get(fld)
#         uCur.updateRow(row)