# Import system modules import arcpy from arcpy import env import string import random import time ### Configuration Section ### # data source path (full path) workspacePath = "C:\test.gdb" # input layers inputLayers = [] # intersection layer intersectionLayerName = "base_intersection" ### Utility Functions ### # function to add a new layer def addLayer(layerName): tmpLayer = { "LayerName": layerName, "Stats": [] } inputLayers.append(tmpLayer) return tmpLayer # Stat Calculation Types CAL_COUNT = 0 CAL_COUNT_PER_TYPE = 1 CAL_FIELD_SUM = 2 CAL_FIELD_SUM_PER_TYPE = 3 # function to add one stat output for CAL_COUNT calculation # NOTE: must provide a count-able field, such as 'OBJECTID' as faster to count than enumerating features def outputCount(layer, outputName, baseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_COUNT, "BaseField": baseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to add one stat output for CAL_COUNT_PER_TYPE calculation def outputCountPerCase(layer, outputName, baseFieldName, caseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_COUNT_PER_TYPE, "BaseField": baseFieldName, "CaseField": caseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to add one stat output for CAL_FIELD_SUM calculation def outputSum(layer, outputName, baseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_FIELD_SUM, "BaseField": baseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to add one stat output for CAL_FIELD_SUM_PER_TYPE calculation def outputSumPerCase(layer, outputName, baseFieldName, caseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_FIELD_SUM_PER_TYPE, "BaseField": baseFieldName, "CaseField": caseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to generate an unique string def id_generator(size=6, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for x in range(size)) # function to delete a temp file def deleteLayer(layerName): if arcpy.Exists(layerName): arcpy.Delete_management(layerName) print 'deleted ' + layerName else: print layerName + ' not exist' ### Feed Data ### newLayer = addLayer("point") # Stat: Count outputCount(newLayer, "Count", "OBJECTID") # Stat: Count Per Case outputCountPerCase(newLayer, "Count Per Case", "OBJECTID", "CaseField") # Stat: Sum outputSum(newLayer, "Sum", "SumField") # Stat: Sum Per Case outputSumPerCase(newLayer, "Sum Per Case", "SumField", "CaseField") ### Single Layer Processing (intersection analysis, and Stats calculation) ### def processLayer(layer): tmpLayerName = layer["LayerName"] print "start processing layer: " + tmpLayerName tmpStats = layer["Stats"] #layer to intersect tmpInputLayers = [tmpLayerName, intersectionLayerName] #generate an unique temp file for intersection results tmpOutputLayerName = tmpLayerName + "_output_" + id_generator() #do the Intersection arcpy.Intersect_analysis(tmpInputLayers, tmpOutputLayerName, "ALL", "", "INPUT") #process the outputs for tmpOutput in tmpOutputs: tmpOPFullName = tmpOutput["OutputName"] #generate an unique temp file for stat output tmpOPFileName = id_generator() tmpOPType = tmpOutput["Type"] #Stats Calculation if tmpOPType == CAL_COUNT: baseField = tmpOutput["BaseField"] print "count of field " + baseField statsFields = [[baseField, "COUNT"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "") elif tmpOPType == CAL_COUNT_PER_TYPE: baseField = tmpOutput["BaseField"] caseField = tmpOutput["CaseField"] print "count of field " + baseField + " by case field " + caseField statsFields = [[baseField, "COUNT"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField) elif tmpOPType == CAL_FIELD_SUM: baseField = tmpOutput["BaseField"] print "sum of field " + baseField statsFields = [[baseField, "SUM"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "") elif tmpOPType == CAL_FIELD_SUM_PER_TYPE: baseField = tmpOutput["BaseField"] caseField = tmpOutput["CaseField"] print "sum of field " + baseField + " by case field " + caseField statsFields = [[baseField, "SUM"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField) print "finished" #TODO: export to CSV #delete temp output stat file deleteLayer(tmpOPFileName) #delete temp intersection_result layer file deleteLayer(tmpOutputLayerName) ### Mass Processing (of all layers) ### try: # Set the workspace (to avoid having to type in the full path to the data every time) env.workspace = workspacePath # Process all layers for layer in inputLayers: processLayer(layer) except Exception, e: # If an error occurred, print line number and error message import traceback, sys tb = sys.exc_info()[2] print "Line %i" % tb.tb_lineno print e.message
#process the outputs for tmpOutput in tmpOutputs: tmpOPFullName = tmpOutput["OutputName"] #generate an unique temp file for stat output tmpOPFileName = id_generator() tmpOPType = tmpOutput["Type"] #Stats Calculation if tmpOPType == CAL_COUNT: baseField = tmpOutput["BaseField"] print "count of field " + baseField statsFields = [[baseField, "COUNT"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "") elif tmpOPType == CAL_COUNT_PER_TYPE: baseField = tmpOutput["BaseField"] caseField = tmpOutput["CaseField"] print "count of field " + baseField + " by case field " + caseField statsFields = [[baseField, "COUNT"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField) elif tmpOPType == CAL_FIELD_SUM: baseField = tmpOutput["BaseField"] print "sum of field " + baseField statsFields = [[baseField, "SUM"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "") elif tmpOPType == CAL_FIELD_SUM_PER_TYPE: baseField = tmpOutput["BaseField"] caseField = tmpOutput["CaseField"] print "sum of field " + baseField + " by case field " + caseField statsFields = [[baseField, "SUM"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField) print "finished" #TODO: export to CSV
the script is rather simple. Given a list of layers (point, polyline, polygon), intersect each of them with one same layer, then for each intersection result layer, do some simple summary stats including Count, Sum of one field, Count per Case (given a Case Field), and Sum per Case (given sum_field and case_field).
the logic is to iterate each layer to intersect with the base_intersection_layer, then calculate each stat.
when run it, very often Python crashes during the stat calculation after finishing the intersection of first layer. but sometimes when calculate the second Stat, sometimes third or the last.
here is the script:# Import system modules import arcpy from arcpy import env import string import random import time ### Configuration Section ### # data source path (full path) workspacePath = "C:\test.gdb" # input layers inputLayers = [] # intersection layer intersectionLayerName = "base_intersection" ### Utility Functions ### # function to add a new layer def addLayer(layerName): tmpLayer = { "LayerName": layerName, "Stats": [] } inputLayers.append(tmpLayer) return tmpLayer # Stat Calculation Types CAL_COUNT = 0 CAL_COUNT_PER_TYPE = 1 CAL_FIELD_SUM = 2 CAL_FIELD_SUM_PER_TYPE = 3 # function to add one stat output for CAL_COUNT calculation # NOTE: must provide a count-able field, such as 'OBJECTID' as faster to count than enumerating features def outputCount(layer, outputName, baseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_COUNT, "BaseField": baseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to add one stat output for CAL_COUNT_PER_TYPE calculation def outputCountPerCase(layer, outputName, baseFieldName, caseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_COUNT_PER_TYPE, "BaseField": baseFieldName, "CaseField": caseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to add one stat output for CAL_FIELD_SUM calculation def outputSum(layer, outputName, baseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_FIELD_SUM, "BaseField": baseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to add one stat output for CAL_FIELD_SUM_PER_TYPE calculation def outputSumPerCase(layer, outputName, baseFieldName, caseFieldName): tmpOutput = { "OutputName": outputName, "Type": CAL_FIELD_SUM_PER_TYPE, "BaseField": baseFieldName, "CaseField": caseFieldName } layer['Stats'].append(tmpOutput) return tmpOutput # function to generate an unique string def id_generator(size=6, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for x in range(size)) # function to delete a temp file def deleteLayer(layerName): if arcpy.Exists(layerName): arcpy.Delete_management(layerName) print 'deleted ' + layerName else: print layerName + ' not exist' ### Feed Data ### newLayer = addLayer("point") # Stat: Count outputCount(newLayer, "Count", "OBJECTID") # Stat: Count Per Case outputCountPerCase(newLayer, "Count Per Case", "OBJECTID", "CaseField") # Stat: Sum outputSum(newLayer, "Sum", "SumField") # Stat: Sum Per Case outputSumPerCase(newLayer, "Sum Per Case", "SumField", "CaseField") ### Single Layer Processing (intersection analysis, and Stats calculation) ### def processLayer(layer): tmpLayerName = layer["LayerName"] print "start processing layer: " + tmpLayerName tmpStats = layer["Stats"] #layer to intersect tmpInputLayers = [tmpLayerName, intersectionLayerName] #generate an unique temp file for intersection results tmpOutputLayerName = tmpLayerName + "_output_" + id_generator() #do the Intersection arcpy.Intersect_analysis(tmpInputLayers, tmpOutputLayerName, "ALL", "", "INPUT") #process the outputs for tmpOutput in tmpOutputs: tmpOPFullName = tmpOutput["OutputName"] #generate an unique temp file for stat output tmpOPFileName = id_generator() tmpOPType = tmpOutput["Type"] #Stats Calculation if tmpOPType == CAL_COUNT: baseField = tmpOutput["BaseField"] print "count of field " + baseField statsFields = [[baseField, "COUNT"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "") elif tmpOPType == CAL_COUNT_PER_TYPE: baseField = tmpOutput["BaseField"] caseField = tmpOutput["CaseField"] print "count of field " + baseField + " by case field " + caseField statsFields = [[baseField, "COUNT"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField) elif tmpOPType == CAL_FIELD_SUM: baseField = tmpOutput["BaseField"] print "sum of field " + baseField statsFields = [[baseField, "SUM"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "") elif tmpOPType == CAL_FIELD_SUM_PER_TYPE: baseField = tmpOutput["BaseField"] caseField = tmpOutput["CaseField"] print "sum of field " + baseField + " by case field " + caseField statsFields = [[baseField, "SUM"]] arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField) print "finished" #TODO: export to CSV #delete temp output stat file deleteLayer(tmpOPFileName) #delete temp intersection_result layer file deleteLayer(tmpOutputLayerName) ### Mass Processing (of all layers) ### try: # Set the workspace (to avoid having to type in the full path to the data every time) env.workspace = workspacePath # Process all layers for layer in inputLayers: processLayer(layer) except Exception, e: # If an error occurred, print line number and error message import traceback, sys tb = sys.exc_info()[2] print "Line %i" % tb.tb_lineno print e.message
I did some searchings of similar issue about python crash. Seems some cases are because of memory leak in ArcPy. I am not sure whether this applies to my case too. But would appreciate if anyone could advise some optimization tips to get around this pain.
thanks a lot!