Hi,
I have a script I am running in PyCharm and runs through a couple 'while' loops. The goal is to run through a series of polygons in a feature class (the first while loop), then for each polygon, start another while loop that adds up line segments until a desired amount is achieved while adding some attributes to a list. The list probably doesn't hold more than 50 or so items for each loop through, and a file is written, then it is cleared to begin the next loop (therefore, I don't believe the list is getting 'too big' and slowing things down). I noticed the more polygons I run, the slower and slower it runs as it progresses. I started logging the run times for each loop and it verified this. For instance, the first time I ran 10 polygons and it took over 6 hours. Then I decided to run 20 polygons (thinking it would take approx 12 hours, and it took 58 hours to finish! I reduced to 5 polygons and it took 1.5 hours. When I looked at the log of run times for each loop it started out taking 0.1267 seconds for each secondary loop of the first polygon, and by the time it got to polygon 5 it was taking 0.7157 seconds for the secondary loop. With each progression getting slower and slower. I noticed when I was doing the big runs of 20 polygons, the last polygon took like 9 hours to complete (whereas the first one takes a couple minutes). I'm at a loss for what is happening. I need to eventually be able to run 500 polygons with 50-100 secondary while loops and sitting at my computer running 5 at a time seems not a good use of time. And just for some computer specs, I run windows with 6 cores with i7 processor, I watch my CPU, which hovers around 50% (I usually have multiple running at once, though I've also tested with just one and it's the same issue), My memory stays around 30% and I have plenty of disk space. Any ideas on what my issue is or how to speed things up?
import arcpy
from numpy import random
from time import ctime
from time import perf_counter
start_time = ctime()
start = perf_counter()
print("program started at: " + start_time)
# Set environments / workspace. Workspace should point to a gdb that has all the effort point feature classes
arcpy.env.workspace = r"C:\Users\bhodge\Projects\Monuments_SpeciesDiversity\SimulRuns\Scratch_1.gdb"
arcpy.env.overwriteOutput = True
# this is the file of the clipped lines
fc = r"C:\Users\bhodge\Projects\Monuments_SpeciesDiversity\SimulRuns\INPUTS_Ex1.gdb\ClippedLines_1to100_half"
# this is the output location for the final output species sightings table
# finalOutput = r"C:\Users\bhodge\Projects\Monuments_SpeciesDiversity\FinalOutputs.gdb"
finalOutput = r"C:\Users\bhodge\Projects\Monuments_SpeciesDiversity\SimulRuns\OUTPUTS_Ex1.gdb"
sr = arcpy.Describe(fc).spatialReference
sightingsFC = r"C:\Users\bhodge\Projects\Monuments_SpeciesDiversity\SimulRuns\INPUTS_Ex1.gdb\All_Sightings"
#### RENAME THIS SOMETHING NEW EACH RUN ######################
finalSightingsOutput = arcpy.CreateFeatureclass_management(finalOutput, "FinalSightings_P36_40_Run_50", "POINT", sightingsFC, spatial_reference=sr)
arcpy.AddField_management(finalSightingsOutput, "PolyID", "LONG")
arcpy.AddField_management(finalSightingsOutput, "Run", "LONG")
# iterate through to run on each polyID (change this number to reflect the number of polyID number).
i = 36
while i <= 40:
print("starting polygon " + str(i))
# Create list to keep unique speciesID's
# speciesList = []
selectedPoly = arcpy.SelectLayerByAttribute_management(fc, "NEW_SELECTION", "PolyID = " + str(i), None)
print("polyid " +str(i) + " selected")
sort_fields = [["FILEID", "ASCENDING"], ["EVENTNO_1", "ASCENDING"]]
fc_Sorted = arcpy.Sort_management(selectedPoly, "fc_sort", sort_fields)
# Create the search cursor
print("sorted")
# find number of records for that poly (count) and convert to int, this will be used to set a range for a random start point
count = arcpy.GetCount_management(fc_Sorted)
count_int = int(count[0])
# create a loop to run through the random trackline grab a number of times
run = 1
while run <= 50:
start_run = perf_counter()
speciesList = []
# create random number between 0 and count, call it rn (random number)
rn = random.randint(1, count_int)
# print("random number: " + str(rn))
# j is starting length and is added to up to desired amount
j = 0
with arcpy.da.SearchCursor(fc_Sorted, ['SightingID_1', 'SightingID_2', 'Length_KM']) as scur:
for rownum, row in enumerate(scur, start=1):
if rownum >= rn:
if j < 3358.649:
sighting1 = row[0]
sighting2 = row[1]
length = row[2]
# print("trackline to add is" + str(length))
j = j + length
# print("new trackline amount is" + str(j))
if j < 3358.649 and ((sighting1 is not None) and (sighting1 not in speciesList)):
speciesList.append(sighting1)
# print("added " + str(sighting1) + " to list")
else:
pass
if j < 3358.649 and ((sighting2 is not None) and (sighting2 not in speciesList)):
# add sighting id to table
speciesList.append(sighting2)
# print("added " + str(sighting2) + " to list")
else:
pass
else:
pass
else:
pass
scur.reset()
for row in scur:
if j < 3358.649:
sighting1 = row[0]
sighting2 = row[1]
length = row[2]
# print("trackline to add is" + str(length))
j = j + length
# print("new trackline amount is" + str(j))
if j < 3358.649 and ((sighting1 is not None) and (sighting1 not in speciesList)):
speciesList.append(sighting1)
# print("added " + str(sighting1) + " to list")
else:
pass
if j < 3358.649 and ((sighting2 is not None) and (sighting2 not in speciesList)):
# add sighting id to table
speciesList.append(sighting2)
# print("added " + str(sighting2) + " to list")
else:
pass
else:
pass
del scur
# print(speciesList)
# see how many are in list
LenList = len(speciesList)
# if 0 then skip
if LenList > 0:
if LenList == 1:
# remove the comma at end
origStrList = str(tuple(speciesList))
strList = origStrList.replace(',','')
else:
# just convert normally
strList = str(tuple(speciesList))
sightingsInList = arcpy.SelectLayerByAttribute_management(sightingsFC, "NEW_SELECTION", "SightingID IN " + strList)
# make sure sightings are actually in polygon
#### UPDATE this with path to final output Polygons ####
polygons = r"C:\Users\bhodge\Projects\Monuments_SpeciesDiversity\SimulRuns\INPUTS_Ex1.gdb\Polys_500_half"
selectedPolyPOLY = arcpy.SelectLayerByAttribute_management(polygons, "NEW_SELECTION", "PolyID = " + str(i), None)
finalSightingsSelection = arcpy.SelectLayerByLocation_management(sightingsInList, "INTERSECT", selectedPolyPOLY, "", "SUBSET_SELECTION")
# Add the selected features to a temporary fc and add the polyID
arcpy.MakeFeatureLayer_management(finalSightingsSelection, "temp_lyr")
arcpy.AddField_management("temp_lyr", "PolyID", "LONG")
arcpy.CalculateField_management("temp_lyr", "PolyID", str(i))
arcpy.AddField_management("temp_lyr", "Run", "LONG")
arcpy.CalculateField_management("temp_lyr", "Run", str(run))
# append those features to a new, final output containing all sighints for all polys
arcpy.Append_management("temp_lyr", finalSightingsOutput)
print("added poly: " + str(i) + " run: " + str(run) + " sightings to final output file")
end_run = perf_counter()
run_time_min = ((end_run - start_run) / 60)
run_time_hour = ((end_run - start_run) / 36000)
print("time to execute run Min: " + str(run_time_min) + " Hours: " + str(run_time_hour))
run = run + 1
i = i + 1
print("added all species id's")
print("Done")
end_time = ctime()
print("program ended at: " + end_time)
end = perf_counter()
execution_time = ((end - start) / 3600)
print("execution time: " + str(execution_time) + " hours")