Script created by Chris Snyder; modified by C. Denninger # Script resulted from ESRI Forum posts dated 04FEB2010. # This script is used in a script tool within ArcGIS and calculates #..the median value of one entire field and posts that single median #...value to every row in one other entire field. import sys, arcgisscripting gp = arcgisscripting.create(9.3) inputTable = gp.GetParameterAsText(0) readField = gp.GetParameterAsText(1) #this needs to be a numeric field writeField = gp.GetParameterAsText(2) #this needs to be a double or float valueList = [] searchRows = gp.searchcursor(inputTable) searchRow = searchRows.next() while searchRow: searchRowValue = searchRow.getvalue(readField) if searchRowValue == None: pass #don't add a null value to the list! else: valueList.append(searchRowValue) searchRow = searchRows.next() del searchRow del searchRows valueList.sort() listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: #odd, so it's easy! medianValue = valueList[listLength / 2] updateRows = gp.updatecursor(inputTable) updateRow = updateRows.next() while updateRow: updateRow.setvalue(writeField, medianValue) updateRows.UpdateRow(updateRow) updateRow = updateRows.next() del updateRow del updateRows
import arcpy, sys, os inputTable = arcpy.GetParameterAsText(0) readField = arcpy.GetParameterAsText(1) writeField = arcpy.GetParameterAsText(2) valueList = [] searchRows = arcpy.da.SearchCursor(inputTable) searchRow = searchRows.next() while searchRow: searchRowValue = searchRow.getValue(readField) if searchRowValue == None: pass #don't add a null value to the list! else: valueList.append(searchRowValue) searchRow = searchRows.next() del seachRow del searchRows valueList.sort listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: medianValue = valueList[listLength / 2] updateRows = arcpy.da.UpdateCursor(inputTable) updateRow = updateRows.next() while updateRow: updateRow.setvalue(writeField, medianValue) updateRows.UpdateRow(updateRow) updateRow = updateRows.next() del updateRow del updateRows
Solved! Go to Solution.
valueList.sort listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: medianValue = valueList[listLength / 2]
medianValue = numpy.median(valueList)
if numpy.isnan(medianValue): print "Every value was null! Exiting script..." sys.exit()
with arcpy.da.SearchCursor(inputTable, [readField]) as searchRows: for searchRow in searchRows: if searchRow[0] != None: valueList.append(searchRow[0]) del searchRow del searchRows
with arcpy.da.UpdateCursor(inputTable, [writeField]) as updateRows: for row in updateRows: row[0] = medianValue updateRows.updateRow(row) del row del updateRows
 
					
				
		
'''
Written By Caleb Mackey
4/17/2013
Calculates Median Statistics
'''
import arcpy, os, sys, traceback
# env settings
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False
def GetMedian(in_list):
    sorted_list = sorted(in_list)
    median = int(round(len(sorted_list) / 2))
    if len(sorted_list)%2==0:
        med_val = float(sorted_list[median-1]
                        + sorted_list[median]) / 2
    else:
        med_val = sorted_list[median]
    return med_val
def GetMedianValues(source_fc, new_table, case_field, value_field):
    
    ''' Generates a table with Median Values, summarized by case_field. If the
        goal is to get the median for the entire table, use a case field that has
        the same value for all records.
        source_fc - input feature class to compute median statistics for
        new_table - output table
        case_field - similar to dissolve field, computes stats based on unique values in this field
        value_field - field that contains the actual values for statistics; must be numeric
    '''
    
    # Get unique value list for query
    print 'starting cursor'
    with arcpy.da.SearchCursor(source_fc, [case_field]) as rows:
        un_vals = list(set(r[0] for r in rows))
    lyr = arcpy.MakeFeatureLayer_management(source_fc,'source_layer')
    values = {}
    # Get Median UseValue for each station name
    for st in un_vals:
        query = '"{0}" = \'{1}\''.format(case_field, st)
        arcpy.SelectLayerByAttribute_management(lyr, 'NEW_SELECTION', query)
        use_vals = []
        with arcpy.da.SearchCursor(lyr, [value_field]) as rows:
            for row in rows:
                if row[0] != None:
                    use_vals.append(row[0])
        if len(use_vals) > 0:
            median = GetMedian(use_vals)
            values[st] = [median, len(use_vals)]
    # Create new Summary Statistics table with median
    #
    if arcpy.Exists(new_table):
        arcpy.Delete_management(new_table)
    arcpy.CreateTable_management(os.path.split(new_table)[0],os.path.basename(new_table))
    # Get field names and types
    for field in arcpy.ListFields(source_fc):
        if field.name in [case_field, value_field]:
            ftype = field.type
            name = field.name
            length = field.length
            pres = field.precision
            scale = field.scale
            if name == value_field:
                if new_table.endswith('.dbf'):
                    name = 'MED_' + value_field[:6]
                else:
                    name = 'MED_' + value_field
                value_field2 = name
            arcpy.AddField_management(new_table,name,ftype,pres,scale,length)
            
    # Add frequency field
    arcpy.AddField_management(new_table,'FREQUENCY','LONG')
    # Insert rows
    with arcpy.da.InsertCursor(new_table, [case_field, value_field2, 'FREQUENCY']) as rows:
        for k,v in sorted(values.iteritems()):
            rows.insertRow((k, v[0], v[1]))
            
    # report results
    print 'Created %s' %os.path.basename(new_table)
    arcpy.AddMessage('Created %s' %os.path.basename(new_table))
    # .dbf's are automatically given a 'Field1' field...Clean this up
    try:
        if new_table.endswith('.dbf'):
            arcpy.DeleteField_management(new_table, 'Field1')
    except:
        pass
    print 'Done'
if __name__ == '__main__':
##    # testing
##    source_fc = r'C:\Testing\Test.gdb\CSR_by_TWP'
####    new_table = r'C:\Testing\Test.gdb\Median_CSR' #gdb test
##    new_table = r'C:\Testing\Median_CSR.dbf'  #dbf test
##    case_field = 'NAME'
##    value_field = 'AVE_CSR'
    # Script tool params
    source_fc = arcpy.GetParameterAsText(0)
    new_table = arcpy.GetParameterAsText(1)
    case_field = arcpy.GetParameterAsText(2)
    value_field = arcpy.GetParameterAsText(3)
    GetMedianValues(source_fc, new_table, case_field, value_field)
    
Thanks for posting this, very helpful! I have updated it to Python 3.x, which is what ArcGIS Pro ships with:
'''
Written By Caleb Mackey
4/17/2013
Updated to Python 3.x (ArcGIS Pro 1.0) by Katja Krivoruchko
4/16/2015
Calculates Median Statistic
'''
import arcpy, os, sys, traceback
# env settings
arcpy.env.overwriteOutput = True
arcpy.env.qualifiedFieldNames = False
def GetMedian(in_list):
sorted_list = sorted(in_list)
median = int(round(len(sorted_list) / 2))
if len(sorted_list)%2==0:
med_val = float(sorted_list[median-1]
+ sorted_list[median]) / 2
else:
med_val = sorted_list[median]
return med_val
def GetMedianValues(source_fc, new_table, case_field, value_field):
''' Generates a table with Median Values, summarized by case_field. If the
goal is to get the median for the entire table, use a case field that has
the same value for all records.
source_fc - input feature class to compute median statistics for
new_table - output table
case_field - similar to dissolve field, computes stats based on unique values in this field
value_field - field that contains the actual values for statistics; must be numeric
'''
# Get unique value list for query
with arcpy.da.SearchCursor(source_fc, [case_field]) as rows:
un_vals = list(set(r[0] for r in rows))
lyr = arcpy.MakeFeatureLayer_management(source_fc,'source_layer')
values = {}
# Get Median UseValue for each station name
for st in un_vals:
query = '"{0}" = \'{1}\''.format(case_field, st)
arcpy.SelectLayerByAttribute_management(lyr, 'NEW_SELECTION', query)
use_vals = []
with arcpy.da.SearchCursor(lyr, [value_field]) as rows:
for row in rows:
if row[0] != None:
use_vals.append(row[0])
if len(use_vals) > 0:
median = GetMedian(use_vals)
values[st] = [median, len(use_vals)]
# Create new Summary Statistics table with median
#
if arcpy.Exists(new_table):
arcpy.Delete_management(new_table)
arcpy.CreateTable_management(os.path.split(new_table)[0],os.path.basename(new_table))
# Get field names and types
for field in arcpy.ListFields(source_fc):
if field.name in [case_field, value_field]:
ftype = field.type
name = field.name
length = field.length
pres = field.precision
scale = field.scale
if name == value_field:
if new_table.endswith('.dbf'):
name = 'MED_' + value_field[:6]
else:
name = 'MED_' + value_field
value_field2 = name
arcpy.AddField_management(new_table,name,ftype,pres,scale,length)
# Add frequency field
arcpy.AddField_management(new_table,'FREQUENCY','LONG')
# Insert rows
with arcpy.da.InsertCursor(new_table, [case_field, value_field2, 'FREQUENCY']) as rows:
for k,v in sorted(values.items()):
rows.insertRow((k, v[0], v[1]))
# report results
print ('Created %s' %os.path.basename(new_table) )
arcpy.AddMessage('Created %s' %os.path.basename(new_table))
# .dbf's are automatically given a 'Field1' field...Clean this up
try:
if new_table.endswith('.dbf'):
arcpy.DeleteField_management(new_table, 'Field1')
except:
pass
#print 'Done'
if __name__ == '__main__':
# Script tool params
source_fc = arcpy.GetParameterAsText(0)
new_table = arcpy.GetParameterAsText(1)
case_field = arcpy.GetParameterAsText(2)
value_field = arcpy.GetParameterAsText(3)
GetMedianValues(source_fc, new_table, case_field, value_field)
Thank you Caleb...worked like a charm! I naively thought that 'median' was just going to be a simple calculation function in the Field Calculator...thank god for your script!
 
					
				
		
No problem, glad to see this is still getting used!
valueList.sort listLength = len(valueList) if listLength == 0: print "Every value was null! Exiting script..."; sys.exit() elif listLength % 2 == 0: #even, so get the mean of the 2 center values medianValue = (valueList[listLength / 2] + valueList[listLength / 2 - 1]) / 2.0 else: medianValue = valueList[listLength / 2]
medianValue = numpy.median(valueList)
if numpy.isnan(medianValue): print "Every value was null! Exiting script..." sys.exit()
Once numeric values are obtained from a field, via whatever means, medians are relatively easy to determine using numpy even if the field contains NaN's
>>> import numpy as np >>> a = np.array([1,2,np.NaN,3.,4,np.NaN],dtype='float64') >>> np.median(a) 3.5 >>> np.nanmedian(a) 2.5 >>>
that's why I love numpy and panda
