Python crash: arcpy standalone script for intersection analysis and summary stats

SimonLeo · ‎11-10-2013

the script is rather simple. Given a list of layers (point, polyline, polygon), intersect each of them with one same layer, then for each intersection result layer, do some simple summary stats including Count, Sum of one field, Count per Case (given a Case Field), and Sum per Case (given sum_field and case_field).

the logic is to iterate each layer to intersect with the base_intersection_layer, then calculate each stat.

when run it, very often Python crashes during the stat calculation after finishing the intersection of first layer. but sometimes when calculate the second Stat, sometimes third or the last.

here is the script:

# Import system modules
import arcpy
from arcpy import env
import string
import random
import time

### 
 Configuration Section
###
# data source path (full path)
workspacePath = "C:\test.gdb"
# input layers
inputLayers = []
# intersection layer
intersectionLayerName = "base_intersection"

###
 Utility Functions
###
# function to add a new layer
def addLayer(layerName):
  tmpLayer = {
    "LayerName": layerName,
    "Stats": []
    }

  inputLayers.append(tmpLayer)

  return tmpLayer

# Stat Calculation Types
CAL_COUNT = 0
CAL_COUNT_PER_TYPE = 1
CAL_FIELD_SUM = 2
CAL_FIELD_SUM_PER_TYPE = 3

# function to add one stat output for CAL_COUNT calculation
# NOTE: must provide a count-able field, such as 'OBJECTID' as faster to count than enumerating features
def outputCount(layer, outputName, baseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_COUNT,
    "BaseField": baseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to add one stat output for CAL_COUNT_PER_TYPE calculation
def outputCountPerCase(layer, outputName, baseFieldName, caseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_COUNT_PER_TYPE,
    "BaseField": baseFieldName,
    "CaseField": caseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to add one stat output for CAL_FIELD_SUM calculation
def outputSum(layer, outputName, baseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_FIELD_SUM,
    "BaseField": baseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to add one stat output for CAL_FIELD_SUM_PER_TYPE calculation
def outputSumPerCase(layer, outputName, baseFieldName, caseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_FIELD_SUM_PER_TYPE,
    "BaseField": baseFieldName,
    "CaseField": caseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to generate an unique string
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
  return ''.join(random.choice(chars) for x in range(size))

# function to delete a temp file
def deleteLayer(layerName):
  if arcpy.Exists(layerName):
    arcpy.Delete_management(layerName)
    print 'deleted ' + layerName
  else:
    print layerName + ' not exist'

### 
 Feed Data
###
newLayer = addLayer("point")
# Stat: Count
outputCount(newLayer, "Count", "OBJECTID")
# Stat: Count Per Case
outputCountPerCase(newLayer, "Count Per Case", "OBJECTID", "CaseField")
# Stat: Sum
outputSum(newLayer, "Sum", "SumField")
# Stat: Sum Per Case
outputSumPerCase(newLayer, "Sum Per Case", "SumField", "CaseField")

### 
 Single Layer Processing (intersection analysis, and Stats calculation)
###
def processLayer(layer):
  tmpLayerName = layer["LayerName"]
  print "start processing layer: " + tmpLayerName
  tmpStats = layer["Stats"]

  #layer to intersect
  tmpInputLayers = [tmpLayerName, intersectionLayerName]
  
  #generate an unique temp file for intersection results
  tmpOutputLayerName = tmpLayerName + "_output_" + id_generator()

  #do the Intersection
  arcpy.Intersect_analysis(tmpInputLayers, tmpOutputLayerName, "ALL", "", "INPUT")

  #process the outputs
  for tmpOutput in tmpOutputs:
  
    tmpOPFullName = tmpOutput["OutputName"]
 #generate an unique temp file for stat output
    tmpOPFileName = id_generator()
    tmpOPType = tmpOutput["Type"]

    #Stats Calculation
    if tmpOPType == CAL_COUNT:
      baseField = tmpOutput["BaseField"]
      print "count of field " + baseField
      statsFields = [[baseField, "COUNT"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "")
   
    elif tmpOPType == CAL_COUNT_PER_TYPE:
      baseField = tmpOutput["BaseField"]
      caseField = tmpOutput["CaseField"] 
      print "count of field " + baseField + " by case field " + caseField
      statsFields = [[baseField, "COUNT"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField)
   
    elif tmpOPType == CAL_FIELD_SUM:
      baseField = tmpOutput["BaseField"] 
      print "sum of field " + baseField
      statsFields = [[baseField, "SUM"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "")
   
    elif tmpOPType == CAL_FIELD_SUM_PER_TYPE:
      baseField = tmpOutput["BaseField"]
      caseField = tmpOutput["CaseField"] 
      print "sum of field " + baseField + " by case field " + caseField
      statsFields = [[baseField, "SUM"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField)
   
    print "finished"

    #TODO: export to CSV
 
    #delete temp output stat file
    deleteLayer(tmpOPFileName)

  #delete temp intersection_result layer file
  deleteLayer(tmpOutputLayerName)

### 
 Mass Processing (of all layers)
###
try:
    # Set the workspace (to avoid having to type in the full path to the data every time)
    env.workspace = workspacePath
    
    # Process all layers
    for layer in inputLayers:
      processLayer(layer)
 
except Exception, e:
    # If an error occurred, print line number and error message
    import traceback, sys
    tb = sys.exc_info()[2]
    print "Line %i" % tb.tb_lineno
    print e.message

I did some searchings of similar issue about python crash. Seems some cases are because of memory leak in ArcPy. I am not sure whether this applies to my case too. But would appreciate if anyone could advise some optimization tips to get around this pain.

thanks a lot!

SimonLeo · ‎11-11-2013

it seems moving following code into a separate function solves the problem.

#process the outputs
  for tmpOutput in tmpOutputs:
  
    tmpOPFullName = tmpOutput["OutputName"]
 #generate an unique temp file for stat output
    tmpOPFileName = id_generator()
    tmpOPType = tmpOutput["Type"]

    #Stats Calculation
    if tmpOPType == CAL_COUNT:
      baseField = tmpOutput["BaseField"]
      print "count of field " + baseField
      statsFields = [[baseField, "COUNT"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "")
   
    elif tmpOPType == CAL_COUNT_PER_TYPE:
      baseField = tmpOutput["BaseField"]
      caseField = tmpOutput["CaseField"] 
      print "count of field " + baseField + " by case field " + caseField
      statsFields = [[baseField, "COUNT"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField)
   
    elif tmpOPType == CAL_FIELD_SUM:
      baseField = tmpOutput["BaseField"] 
      print "sum of field " + baseField
      statsFields = [[baseField, "SUM"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "")
   
    elif tmpOPType == CAL_FIELD_SUM_PER_TYPE:
      baseField = tmpOutput["BaseField"]
      caseField = tmpOutput["CaseField"] 
      print "sum of field " + baseField + " by case field " + caseField
      statsFields = [[baseField, "SUM"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField)
   
    print "finished"

    #TODO: export to CSV

the script is rather simple. Given a list of layers (point, polyline, polygon), intersect each of them with one same layer, then for each intersection result layer, do some simple summary stats including Count, Sum of one field, Count per Case (given a Case Field), and Sum per Case (given sum_field and case_field).

the logic is to iterate each layer to intersect with the base_intersection_layer, then calculate each stat.

when run it, very often Python crashes during the stat calculation after finishing the intersection of first layer. but sometimes when calculate the second Stat, sometimes third or the last.

here is the script:

# Import system modules
import arcpy
from arcpy import env
import string
import random
import time

### 
 Configuration Section
###
# data source path (full path)
workspacePath = "C:\test.gdb"
# input layers
inputLayers = []
# intersection layer
intersectionLayerName = "base_intersection"

###
 Utility Functions
###
# function to add a new layer
def addLayer(layerName):
  tmpLayer = {
    "LayerName": layerName,
    "Stats": []
    }

  inputLayers.append(tmpLayer)

  return tmpLayer

# Stat Calculation Types
CAL_COUNT = 0
CAL_COUNT_PER_TYPE = 1
CAL_FIELD_SUM = 2
CAL_FIELD_SUM_PER_TYPE = 3

# function to add one stat output for CAL_COUNT calculation
# NOTE: must provide a count-able field, such as 'OBJECTID' as faster to count than enumerating features
def outputCount(layer, outputName, baseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_COUNT,
    "BaseField": baseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to add one stat output for CAL_COUNT_PER_TYPE calculation
def outputCountPerCase(layer, outputName, baseFieldName, caseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_COUNT_PER_TYPE,
    "BaseField": baseFieldName,
    "CaseField": caseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to add one stat output for CAL_FIELD_SUM calculation
def outputSum(layer, outputName, baseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_FIELD_SUM,
    "BaseField": baseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to add one stat output for CAL_FIELD_SUM_PER_TYPE calculation
def outputSumPerCase(layer, outputName, baseFieldName, caseFieldName):
  tmpOutput = {
    "OutputName": outputName,
    "Type": CAL_FIELD_SUM_PER_TYPE,
    "BaseField": baseFieldName,
    "CaseField": caseFieldName
    }
  layer['Stats'].append(tmpOutput)

  return tmpOutput

# function to generate an unique string
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
  return ''.join(random.choice(chars) for x in range(size))

# function to delete a temp file
def deleteLayer(layerName):
  if arcpy.Exists(layerName):
    arcpy.Delete_management(layerName)
    print 'deleted ' + layerName
  else:
    print layerName + ' not exist'

### 
 Feed Data
###
newLayer = addLayer("point")
# Stat: Count
outputCount(newLayer, "Count", "OBJECTID")
# Stat: Count Per Case
outputCountPerCase(newLayer, "Count Per Case", "OBJECTID", "CaseField")
# Stat: Sum
outputSum(newLayer, "Sum", "SumField")
# Stat: Sum Per Case
outputSumPerCase(newLayer, "Sum Per Case", "SumField", "CaseField")

### 
 Single Layer Processing (intersection analysis, and Stats calculation)
###
def processLayer(layer):
  tmpLayerName = layer["LayerName"]
  print "start processing layer: " + tmpLayerName
  tmpStats = layer["Stats"]

  #layer to intersect
  tmpInputLayers = [tmpLayerName, intersectionLayerName]
  
  #generate an unique temp file for intersection results
  tmpOutputLayerName = tmpLayerName + "_output_" + id_generator()

  #do the Intersection
  arcpy.Intersect_analysis(tmpInputLayers, tmpOutputLayerName, "ALL", "", "INPUT")

  #process the outputs
  for tmpOutput in tmpOutputs:
  
    tmpOPFullName = tmpOutput["OutputName"]
 #generate an unique temp file for stat output
    tmpOPFileName = id_generator()
    tmpOPType = tmpOutput["Type"]

    #Stats Calculation
    if tmpOPType == CAL_COUNT:
      baseField = tmpOutput["BaseField"]
      print "count of field " + baseField
      statsFields = [[baseField, "COUNT"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "")
   
    elif tmpOPType == CAL_COUNT_PER_TYPE:
      baseField = tmpOutput["BaseField"]
      caseField = tmpOutput["CaseField"] 
      print "count of field " + baseField + " by case field " + caseField
      statsFields = [[baseField, "COUNT"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField)
   
    elif tmpOPType == CAL_FIELD_SUM:
      baseField = tmpOutput["BaseField"] 
      print "sum of field " + baseField
      statsFields = [[baseField, "SUM"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, "")
   
    elif tmpOPType == CAL_FIELD_SUM_PER_TYPE:
      baseField = tmpOutput["BaseField"]
      caseField = tmpOutput["CaseField"] 
      print "sum of field " + baseField + " by case field " + caseField
      statsFields = [[baseField, "SUM"]]
      arcpy.Statistics_analysis(tmpOutputLayerName, tmpOPFileName, statsFields, caseField)
   
    print "finished"

    #TODO: export to CSV
 
    #delete temp output stat file
    deleteLayer(tmpOPFileName)

  #delete temp intersection_result layer file
  deleteLayer(tmpOutputLayerName)

### 
 Mass Processing (of all layers)
###
try:
    # Set the workspace (to avoid having to type in the full path to the data every time)
    env.workspace = workspacePath
    
    # Process all layers
    for layer in inputLayers:
      processLayer(layer)
 
except Exception, e:
    # If an error occurred, print line number and error message
    import traceback, sys
    tb = sys.exc_info()[2]
    print "Line %i" % tb.tb_lineno
    print e.message

I did some searchings of similar issue about python crash. Seems some cases are because of memory leak in ArcPy. I am not sure whether this applies to my case too. But would appreciate if anyone could advise some optimization tips to get around this pain.

thanks a lot!