I have two questions about my code below. For the insertcursor portion it took 45 minutes to insert 542 rows into a table (not a featureclass table). That seems way too long for a small numbers of rows. Then a "StopIteration: iteration not started" error was raised during the first update in updatecursor portion. Advice on improving performance is requested and why was the error raised if I have 542 rows to update in the featureclass?
The overall purpose of this is to take portions of a valve inspection from sourceFC and it split data into targetTable and targetFC
import arcpy, os, time import Utils import MM logFileName = "C:/GISData/Utilities/Logs/ValveInspections.log" mmapp = MM.InitializeLicense() previousAUMode = MM.DisableAUs() workspace = 'Database Connections/OSAuth.sde' sourceFC = 'Database Connections/OSAuth.sde/Inspection/giValveInspection' targetTable = 'Database Connections/OSAuth.sde/GVALVEINSPECTION' targetFC = 'Database Connections/OSAuth.sde/Gas/gValve' fieldsToInsert = 'FACILITYID','REPAIRSMADE','CONDITIONFOUND','CONDITIONLEFT','MAINTENANCENOTES','TIMEWORKED','LUBRICATEDVALVE','WORKEDBY','INSPECTOR','INSPECTIOID','INSPECTIONDATE','INSPECTIONSTATUS','PAINTED','VACUUMED','TAGGED','WORKEDBY','FLAGGED','MARKER','INSECTICIDE','CGIBEG','CGIEND','MAPCORRECT','EXERCISED','GASVALVEOID' fieldsToUpdate = 'FACILITYID','VALVEBOX','INSPECTIONDATE','INSPECTIONSTATUS','TAGGED','FLAGGED','MARKER' logFile = Utils.Log("<PROCESS name='ValveInspections.py'><TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP>\n", None, logFileName) print("PROCESS ValveInspections.py start " + time.strftime("%Y%m%d.%H%M%S")) try: Utils.Log("<COMMENT>Starting edit operation</COMMENT>\n", logFile) print("Starting edit operation") edit = arcpy.da.Editor(workspace) edit.startEditing(True,True) edit.startOperation() except Exception as err: raise err print err.message Utils.Log("<ERROR>Failed to start edit operation</ERROR>\n", logFile) try: Utils.Log("<COMMENT>Searching sourceFC to insert rows into targetTable<TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Searching sourceFC to insert rows into targetTable start " + str(time.strftime("%Y%m%d.%H%M%S"))) ic = arcpy.da.InsertCursor(targetTable, fieldsToInsert) with arcpy.da.SearchCursor(sourceFC, fieldsToInsert, sql_clause=(None, 'ORDER BY INSPECTIONDATE')) as sc: #with arcpy.da.InsertCursor(targetTable, fieldsToInsert) as ic: for scRow in sc: arcpy.AddMessage("Inserting inspection for valve number " + (scRow[0]) + " with inspection date " + str((scRow[10]))) print("Inserting inspection for valve number " + (scRow[0]) + " with inspection date " + str((scRow[10]))) Utils.Log("<ACTION>Inserting inspection for " + (scRow[0]) + " with inspection date " + str((scRow[10])) + "</ACTION>\n", logFile) ic.insertRow((scRow)) Utils.Log("<COMMENT>Stopped searching sourceFC to insert rows into targetTable<TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Stopped searching sourceFC to insert rows into targetTable end " + str(time.strftime("%Y%m%d.%H%M%S"))) Utils.Log("<COMMENT>Searching sourceFC to update rows in targetFC<TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Searching sourceFC to update rows in targetFC start " + str(time.strftime("%Y%m%d.%H%M%S"))) with arcpy.da.SearchCursor(sourceFC, fieldsToUpdate, sql_clause=(None, 'ORDER BY INSPECTIONDATE')) as sc2: for sc2Row in sc2: valveNumber = (sc2Row[0]) #valveBox = (sc2Row[1]) uc = arcpy.da.UpdateCursor(targetFC, fieldsToUpdate, where_clause="FACILITYID = '" + (sc2Row[0]) + "'") arcpy.AddMessage("Updating gGasValve " + (sc2Row[0])) #+ " ValveBox to " + (sc2Row[1])) print("Updating gGasValve " + (sc2Row[0])) #+ " ValveBox to " + (sc2Row[1])) Utils.Log("<ACTION>Updating gGasValve " + (sc2Row[0]) + "</ACTION>\n", logFile) try: uc.updateRow((sc2Row)) except Exception as err: raise err print err.message Utils.Log("<ERROR>Failed updating gGasValve FacilityID " + valveNumber + "</ERROR>\n", logFile) Utils.Log("<COMMENT>Stopped searching sourceFC to update rows in targetFC<TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Stopped searching sourceFC to update rows in targetFC end " + str(time.strftime("%Y%m%d.%H%M%S"))) except Exception as err: raise err print err.message finally: edit.stopOperation() edit.stopEditing(True) mmapp.Shutdown del mmapp Utils.Log("<PROCESS name='ValveInspections.py'><TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP>\n", logFile) print("ValveInspections.py end '>" + str(time.strftime("%Y%m%d.%H%M%S")))
All source data should be read into a dictionary and then the insert cursor and update cursor should be populated from that. Never do embedded cursors that iterate through where clauses or use cursors that include the OrderBy clause. Totally a waste of time. Add the sort field as the first part of a tuple dictionary key and sort the dictionary keys. Performance will be incredibly fast. The untested code below should work and be done in under 1 minute for the cursor portions.
The update cursor error occurred because you were trying to update the search cursor row, not the update cursor row. InsertRow and updateRow do not have the same syntax.
See my blog about Turbo Charging Data Manipulation with Python Cursors and Dictionaries.
import arcpy, os, time import Utils import MM logFileName = "C:/GISData/Utilities/Logs/ValveInspections.log" mmapp = MM.InitializeLicense() previousAUMode = MM.DisableAUs() workspace = 'Database Connections/OSAuth.sde' sourceFC = 'Database Connections/OSAuth.sde/Inspection/giValveInspection' targetTable = 'Database Connections/OSAuth.sde/GVALVEINSPECTION' targetFC = 'Database Connections/OSAuth.sde/Gas/gValve' fieldsToInsert = ['FACILITYID','REPAIRSMADE','CONDITIONFOUND','CONDITIONLEFT','MAINTENANCENOTES','TIMEWORKED','LUBRICATEDVALVE','WORKEDBY','INSPECTOR','INSPECTIOID','INSPECTIONDATE','INSPECTIONSTATUS','PAINTED','VACUUMED','TAGGED','WORKEDBY','FLAGGED','MARKER','INSECTICIDE','CGIBEG','CGIEND','MAPCORRECT','EXERCISED','GASVALVEOID'] fieldsToUpdate = ['FACILITYID','VALVEBOX','INSPECTIONDATE','INSPECTIONSTATUS','TAGGED','FLAGGED','MARKER'] logFile = Utils.Log("<PROCESS name='ValveInspections.py'><TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP>\n", None, logFileName) print("PROCESS ValveInspections.py start " + time.strftime("%Y%m%d.%H%M%S")) try: Utils.Log("<COMMENT>Starting edit operation</COMMENT>\n", logFile) print("Starting edit operation") edit = arcpy.da.Editor(workspace) edit.startEditing(False,True) edit.startOperation() except Exception as err: raise err print err.message Utils.Log("<ERROR>Failed to start edit operation</ERROR>\n", logFile) try: Utils.Log("<COMMENT>Create dictionary from sourceFC to insert rows into targetTable start<TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Create dictionary from sourceFC to insert rows into targetTable start " + str(time.strftime("%Y%m%d.%H%M%S"))) valueDict = {(r[10],r[0]):(r[1:]) for r in arcpy.da.SearchCursor(sourceFC, fieldsToInsert)} ic = arcpy.da.InsertCursor(targetTable, fieldsToInsert) for keyValue in sorted(valueDict.keys()): scRow = list(valueDict[keyValue]) scRow.insert(0,keyValue[1]) arcpy.AddMessage("Inserting inspection for valve number " + (scRow[0]) + " with inspection date " + str((scRow[10]))) print("Inserting inspection for valve number " + (scRow[0]) + " with inspection date " + str((scRow[10]))) Utils.Log("<ACTION>Inserting inspection for " + (scRow[0]) + " with inspection date " + str((scRow[10])) + "</ACTION>\n", logFile) ic.insertRow(scRow) del ic edit.stopOperation() Utils.Log("<COMMENT>Stopped searching sourceFC to insert rows into targetTable<TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Stopped searching sourceFC to insert rows into targetTable end " + str(time.strftime("%Y%m%d.%H%M%S"))) except Exception as err: raise err print err.message Utils.Log("<ERROR>Failed Insert cursor operation<ERROR>\n", logFile) try: edit.startOperation() Utils.Log("<COMMENT>Create dictionary from sourceFC to update rows in targetFC<TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Create dictionary from sourceFC to update rows in targetFC start " + str(time.strftime("%Y%m%d.%H%M%S"))) valueDict = {r[0]:(r[1:]) for r in arcpy.da.SearchCursor(sourceFC, fieldsToUpdate)} uc = arcpy.da.UpdateCursor(targetFC, fieldsToUpdate) for updateRow in uc: keyValue = updateRow[0] if keyValue in valueDict: for n in range (1,len(fieldsToUpdate)): updateRow= valueDict[keyValue][n-1] arcpy.AddMessage("Updating gGasValve " + (keyValue) #+ " ValveBox to " + (sc2Row[1])) print("Updating gGasValve " + (keyValue) #+ " ValveBox to " + (sc2Row[1])) Utils.Log("<ACTION>Updating gGasValve " + (keyValue) + "</ACTION>\n", logFile) try: uc.updateRow(updateRow) except Exception as err: raise err print err.message Utils.Log("<ERROR>Failed updating gGasValve FacilityID " + valveNumber + "</ERROR>\n", logFile) del uc Utils.Log("<COMMENT>Stopped searching sourceFC to update rows in targetFC<TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Stopped searching sourceFC to update rows in targetFC end " + str(time.strftime("%Y%m%d.%H%M%S"))) except Exception as err: raise err print err.message finally: edit.stopOperation() edit.stopEditing(True) mmapp.Shutdown del mmapp Utils.Log("<PROCESS name='ValveInspections.py'><TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP>\n", logFile) print("ValveInspections.py end '>" + str(time.strftime("%Y%m%d.%H%M%S")))
Thank you Richard Fairhurst It now fails at line 36, while waiting for your response I will read more about dictionary.
Traceback info:
File "C:\GISData\Utilities\ValveInspections.py", line 36, in <module>
scRow.insert(0,keyValue(1))
Error Info:
'tuple' object is not callable
Line 36 is fixed. I should have used brackets, not parentheses, to access the item out of the tuple. As i said, it is untested code and little details like that get me nearly every time.
Great, its running now. I'll report back on the performance. Thank you. I'll need to do more reading on using brackets vs parentheses so I can better understand what you provided.
A note on performance. The majority of time in the loop should be used by the AddMessage, print, and log file processing, not the actual reading of the dictionary, or the insertion or update of rows by the cursor. To really see what the performance difference is from eliminating the embedded cursor and OrderBy processing of the cursor and how fast da cursors really are, the AddMessage, print and log statements should be commented out within the two cursor loops.
Some good news and some bad news... The updatecursor completed in 22 seconds. The insertcursor still took 45 minutes. I even removed the utils.log and arcpy.addmessage calls in an attempt to isolate the processing time.
First I have edited the code to stop and start an edit operation before moving from the insert cursor to the update cursor. Also, I had to add statements to delete the cursors. You should stop Python entirely and reboot to clear all of the InsertCursors your code runs may have left open.
What version of Desktop are you using. Do you have attribute indexes on the Insert fc? Is this Oracle, SQL Server or what? Are the tables versioned or unversioned? I have seen very slow sde performance before, but it usually is something about the fc or the editor session, not the da insertCursor directly. It only happens with sde. A file geodatabase would be done in under a minute (I did 121K inserts into a fgdb fc in 19 minutes even with the print statements left in the insert loop when I tested my code. Without the print statements the code finished 121K inserts in about 13 seconds). Get the difference in time for running just the ic.insertRow(scRow) command. Your print statements should have done that. Also, try changing the edit.startEditing(True,True) to edit.startEditing(False,True) or edit.startEditing(False,False). You do not need the undo stack since you don't include cancel operation logic.
Thanks for the help Richard Fairhurst. Although your code did not solve my performance issue the discussion was helpful. I discovered that the iteration error I mentioned in my original post was due to the lack of a FOR loop following the declaration of the updateCursor (uc), now added at line 62 below.
I reverted back to my original code using a WITH statement as that will release the cursors automatically (according to the documentation), thus negating the need for extra code to delete it during processing. I appreciate the education of using a dictionary but I don't see an improvement with performance while using it here.
To answer your last questions, we are using SQL Server on a versioned enterprise database. And using edit.startEditing(Boolean,Boolean) combinations did not affect the insertCursor performance. The documentation indicates some overrides depending on the DB environment.
My script will run once a year at the end of our inspection cycle, so I can live with the 45 minutes processing time, I have other items that need attention now. I will mark your answer to give you credit for the assistance.
My final version is posted below in case others may find it useful. It was slightly changed so I can post it here without sharing some details of our system, so formatting may be off. But the bulk of it is here for others to consume.
import arcpy, os, time, sys, traceback import Utils import MM logFileName = "C:/GISData/Utilities/Logs/ValveInspections.log" mmapp = MM.InitializeLicense() previousAUMode = MM.DisableAUs() workspace = 'Database Connections/OSAuth.sde' sourceFC = 'Database Connections/OSAuth.sde/Inspection/gValveInspection' targetTable = 'Database Connections/OSAuth.sde/GVALVEINSPECTION' targetFC = 'Database Connections/OSAuth.sde/Gas/gGasValve' fieldsToInsert = ['FACILITYID','REPAIRSMADE','CONDITIONFOUND','CONDITIONLEFT','MAINTENANCENOTES','TIMEWORKED','LUBRICATEDVALVE','WORKEDBY','INSPECTOR','INSPECTIONID','INSPECTIONDATE','INSPECTIONSTATUS','PAINTED','VACUUMED','TAGGED','WORKEDBY','FLAGGED','MARKER','INSECTICIDE','CGIBEGIN','CGIEND','MAPCORRECT','EXERCISED','GASVALVEOID'] fieldsToUpdate = ['FACILITYID','VALVEBOX','INSPECTIONDATE','INSPECTIONSTATUS','TAGGED','FLAGGED','MARKER'] logFile = Utils.Log("<PROCESS name='ValveInspections.py'><TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP>\n", None, logFileName) print("PROCESS ValveInspections.py start " + time.strftime("%Y%m%d.%H%M%S")) try: Utils.Log("<COMMENT>Starting edit operation</COMMENT>\n", logFile) print("Starting edit operation") edit = arcpy.da.Editor(workspace) edit.startEditing(False,True) edit.startOperation() except Exception as err: # Get the traceback object tb = sys.exc_info()[2] tbinfo = traceback.format_tb(tb)[0] # Concatenate information together concerning the error into a message string pymsg = "PYTHON ERRORS:\nTraceback info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()[1]) msgs = "ArcPy ERRORS:\n" + arcpy.GetMessages(2) + "\n" print pymsg print msgs Utils.Log("<ERROR>Failed to start edit operation</ERROR>\n", logFile) try: Utils.Log("<COMMENT>Searching sourceFC to insert rows into targetTable<TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Searching sourceFC to insert rows into targetTable start " + str(time.strftime("%Y%m%d.%H%M%S"))) #ic = arcpy.da.InsertCursor(targetTable, fieldsToInsert) with arcpy.da.SearchCursor(sourceFC, fieldsToInsert, sql_clause=(None, 'ORDER BY INSPECTIONDATE')) as sc: with arcpy.da.InsertCursor(targetTable, fieldsToInsert) as ic: for scRow in sc: arcpy.AddMessage("Inserting inspection for valve number " + (scRow[0]) + " with inspection date " + str((scRow[10]))) print("Inserting inspection for valve number " + (scRow[0]) + " with inspection date " + str((scRow[10]))) Utils.Log("<ACTION>Inserting inspection for " + (scRow[0]) + " with inspection date " + str((scRow[10])) + "</ACTION>\n", logFile) ic.insertRow((scRow)) Utils.Log("<COMMENT>Stopped searching sourceFC to insert rows into targetTable<TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Stopped searching sourceFC to insert rows into targetTable end " + str(time.strftime("%Y%m%d.%H%M%S"))) Utils.Log("<COMMENT>Searching sourceFC to update rows in targetFC<TIMESTAMP position='start'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Searching sourceFC to update rows in targetFC start " + str(time.strftime("%Y%m%d.%H%M%S"))) with arcpy.da.SearchCursor(sourceFC, fieldsToUpdate, sql_clause=(None, 'ORDER BY INSPECTIONDATE')) as sc2: for sc2Row in sc2: valveNumber = (sc2Row[0]) arcpy.AddMessage("Updating gGasValve " + (sc2Row[0])) print("Updating gGasValve " + (sc2Row[0])) Utils.Log("<ACTION>Updating gGasValve " + (sc2Row[0]) + "</ACTION>\n", logFile) try: uc = arcpy.da.UpdateCursor(targetFC, fieldsToUpdate, where_clause="FACILITYID = '" + (sc2Row[0]) + "'") for ucRow in uc: uc.updateRow(sc2Row) except Exception as err: # Get the traceback object tb = sys.exc_info()[2] tbinfo = traceback.format_tb(tb)[0] # Concatenate information together concerning the error into a message string pymsg = "PYTHON ERRORS:\nTraceback info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()[1]) msgs = "ArcPy ERRORS:\n" + arcpy.GetMessages(2) + "\n" print pymsg print msgs Utils.Log("<ERROR>Failed updating gGasValve FacilityID " + valveNumber + "</ERROR>\n", logFile) Utils.Log("<COMMENT>Stopped searching sourceFC to update rows in targetFC<TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP></COMMENT>\n", logFile) print("Stopped searching sourceFC to update rows in targetFC end " + str(time.strftime("%Y%m%d.%H%M%S"))) except Exception as err: # Get the traceback object tb = sys.exc_info()[2] tbinfo = traceback.format_tb(tb)[0] # Concatenate information together concerning the error into a message string pymsg = "PYTHON ERRORS:\nTraceback info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()[1]) msgs = "ArcPy ERRORS:\n" + arcpy.GetMessages(2) + "\n" print pymsg print msgs finally: edit.stopOperation() edit.stopEditing(True) mmapp.Shutdown del mmapp Utils.Log("<PROCESS name='ValveInspections.py'><TIMESTAMP position='end'>" + time.strftime("%Y%m%d.%H%M%S") + "</TIMESTAMP>\n", logFile) print("ValveInspections.py end " + str(time.strftime("%Y%m%d.%H%M%S")))