need to calculate Zonal Statistics for a large number of polygons (400,000+++)
I have some experience with this...I have a large dataset of ~2 million polygons (many are small) that I do some zonal stats on to get the mean elevation, slope, etc. of each poly. I found that many of the polys are in fact too small to be represented by a single pixel (I use a 10m DEM), so of the 2 million polys, only 1.5 million of them make it into the raster layer (the rasterized version of the polygons). Since I want stats for all of the records (even the very small ones) I take the ones that didn't make it into the raster (aka the zones) and convert them to their constituent centroid points, Then I use the "Sample" tool on them. If it helps, here is an excerpt of some Python code that I use. Note the use of the .difference property of a Python set() object - a very handy function in this case.#Blah blah.....
#Sets the rasters to use
inDemGrd = root + "\\gis_layers\\dem10m"
slopePctGrd = root + "\\gis_layers\\dem10m_slp"
inSlpstabGrd = root + "\\gis_layers\\slpstab"
#Process: Define the LDO fc to use
overlayFC = root + r"\ldo_" + str(ldoIterationDate) + r"\ldo_database.gdb\ldo_database"
#Sets some more gp settings
dsc = gp.describe(inDemGrd)
gp.cellsize = inDemGrd
gp.extent = overlayFC
gp.snapraster = inDemGrd
#Process: Converts overlayFC to a grid and creates a vat
oidFieldName = gp.describe(overlayFC).oidfieldname
overlayGrd = "remsoftid"
gp.CalculateField_management(overlayFC, "REMSOFT_ID", "[" + oidFieldName + "]", "VB"); showGpMessage() #REMSOFT_ID acts as a temporary unique id since you can't rasterize on OBJECTID
gp.FeatureToRaster_conversion(overlayFC, "REMSOFT_ID", overlayGrd, gp.cellsize); showGpMessage()
if os.path.exists(gp.workspace + "\\" + overlayGrd + "\\vat.adf") == False:
gp.BuildRasterAttributeTable_management(overlayGrd, "OVERWRITE"); showGpMessage()
#Process: Clears the gp.extent
gp.extent = ""
#Process: Figures out what OBJECTIDs from overlayFC didn't make it into overlayGrd
message = "Compiling list of overlayFC OBJECTIDs..."; showPyMessage()
objectidSet = set()
searchRows = gp.searchcursor(overlayFC, "", "", oidFieldName, "")
searchRow = searchRows.next()
while searchRow:
objectidSet.add(searchRow.getvalue(oidFieldName))
searchRow = searchRows.next()
del searchRow
del searchRows
message = "Compiling list of pixel values..."; showPyMessage()
valueSet = set()
searchRows = gp.searchcursor(overlayGrd, "", "", "VALUE", "")
searchRow = searchRows.next()
while searchRow:
valueSet.add(searchRow.VALUE)
searchRow = searchRows.next()
del searchRow
del searchRows
diffSet = set.difference(objectidSet, valueSet)
oidList = []
for item in diffSet:
oidList.append(item)
oidList.sort()
selOidTbl = overlayFC[0:-len(overlayFC.split("\\")[-1]) - 1] + "\\sel_oid"
gp.CreateTable_management(overlayFC[0:-len(overlayFC.split("\\")[-1]) - 1], "sel_oid", "", ""); showGpMessage()
gp.AddField_management(selOidTbl, "SEL_OID", "LONG"); showGpMessage()
insertRows = gp.insertcursor(selOidTbl)
for oid in oidList:
insertRow = insertRows.newrow()
insertRow.SEL_OID = oid
insertRows.insertrow(insertRow)
del insertRow
del insertRows
del oidList
del diffSet
del objectidSet
del valueSet
#Process: Some of the REMSOFT_IDs don't get preserved in overlayGrd so lets make a point FC for them
overlayFL = "overlay_feature_layer"
gp.MakeFeatureLayer_management(overlayFC, overlayFL, "", "", ""); showGpMessage()
gp.SelectLayerByAttribute_management(overlayFL, "NEW_SELECTION", gp.describe(overlayFC).oidfieldname + " IN (SELECT SEL_OID FROM sel_oid)"); showGpMessage()
polyCentersFC = fgdbPath + "\\polygon_centers"
gp.FeatureToPoint_management(overlayFL, polyCentersFC, "CENTROID"); showGpMessage()
gp.SelectLayerByAttribute_management(overlayFL, "CLEAR_SELECTION", ""); showGpMessage()
gp.Delete_management(selOidTbl, ""); showGpMessage()
gp.CalculateField_management(overlayFC, "REMSOFT_ID", "-1", "VB"); showGpMessage()
#Process: Change the gp.extent
gp.extent = gp.describe(inDemGrd).extent
#Process: Create a eucliduan distance grid for the roads we care about
transFC = root + "\\gis_layers\\state_land_trans.gdb\\state_land_trans"
distanceRoadsFC = fgdbPath + "\\distance_roads"
gp.Select_analysis (transFC, distanceRoadsFC, "ROAD_STATUS_LBL in ('Active','Closed','Decommissioned','Orphaned','Planned','Unknown','') OR ROAD_STATUS_LBL IS NULL"); showGpMessage()
roadGrd = "roads"
gp.FeatureToRaster_conversion(distanceRoadsFC, "OBJECTID", roadGrd, gp.cellsize); showGpMessage()
roadDistGrd = "road_dist"
somaExp = "int(eucdistance(" + roadGrd + ") + .5)"
oldCellSize = gp.cellsize
gp.cellsize = "35" #WORKAROUND: Briefly make the cell size a bit larger so that the now-memory-intensive eucdistance function doesn't throw a memory allocation error (works with 35, bombs at 33.842092, 30, 34)
gp.SingleOutputMapAlgebra_sa(somaExp, roadDistGrd); showGpMessage()
gp.cellsize = oldCellSize
#Process: Does some zonal stat calculations
#For elevation
zonalElevMeanGrd = "zn_elev_mean"
gp.ZonalStatistics_sa(overlayGrd, "VALUE", inDemGrd, zonalElevMeanGrd, "MEAN", "NODATA"); showGpMessage()
#For slope
zonalSlpMeanGrd = "zn_slp_mean"
gp.ZonalStatistics_sa(overlayGrd, "VALUE", slopePctGrd, zonalSlpMeanGrd, "MEAN", "NODATA"); showGpMessage()
#For road distance
zonalRoadDistMeanGrd = "zn_rdis_mean"
gp.ZonalStatistics_sa(overlayGrd, "VALUE", roadDistGrd, zonalRoadDistMeanGrd, "MEAN", "NODATA"); showGpMessage()
#Process: Combines the overlayGrd, zonalElevMeanGrd, zonalSlpMeanGrd, and zonalRoadDistMeanGrd so we can report on the REMSOFTID (aka "value" field in overlayGrd)
combo1Grd = "combo1"
gp.Combine_sa(overlayGrd + ";" + zonalElevMeanGrd + ";" + zonalSlpMeanGrd + ";" + zonalRoadDistMeanGrd, combo1Grd); showGpMessage()
if os.path.exists(gp.workspace + "\\" + combo1Grd + "\\vat.adf") == False:
gp.BuildRasterAttributeTable_management(combo1Grd, "OVERWRITE"); showGpMessage()
#Process: Converts combo1Grd.vat to a real table that we can actually index
vatTV = "vat_table_view"
gp.MakeTableView_management(combo1Grd, vatTV, ""); showGpMessage()
combo1GridVatTbl = fgdbPath + "\\combo1_grid_vat"
gp.CopyRows_management(vatTV, combo1GridVatTbl, ""); showGpMessage()
gp.AddIndex_management(combo1GridVatTbl, "remsoftid", "remsoftid_index", "UNIQUE", "ASCENDING"); showGpMessage()
#Process: Uses the Sample tool to get at the REMSOFTIDs that didn't make it into overlayGrd
sample1Tbl = fgdbPath + "\\sample1"
gp.Sample_sa(inDemGrd + ";" + slopePctGrd + ";" + roadDistGrd, polyCentersFC, sample1Tbl, "NEAREST"); showGpMessage()
gp.AddField_management(sample1Tbl, "REMSOFTID", "LONG"); showGpMessage()
tableTV = "table_view"
gp.MakeTableView_management(sample1Tbl, tableTV, ""); showGpMessage()
gp.AddJoin_management(tableTV, "OBJECTID", polyCentersFC, "OBJECTID", "KEEP_COMMON"); showGpMessage()
gp.CalculateField_management(tableTV, "REMSOFTID", "[polygon_centers.REMSOFT_ID]", "VB"); showGpMessage()
gp.RemoveJoin_management(tableTV, "polygon_centers"); showGpMessage()
gp.AddIndex_management(sample1Tbl, "REMSOFTID", "remsoftid_index", "UNIQUE", "ASCENDING"); showGpMessage()
#Process: Clears the gp.extent
gp.extent = ""
#Process: Populates the DEM_ELV and DEM_SLP fields in overlayFC
#For values in combo1GridVatTbl
gp.MakeFeatureLayer_management(overlayFC, overlayFL, "", "", ""); showGpMessage()
gp.AddJoin_management(overlayFL, gp.describe(overlayFC).oidfieldname, combo1GridVatTbl, "REMSOFTID", "KEEP_COMMON"); showGpMessage()
gp.CalculateField_management(overlayFL, "DEM_ELV", "[combo1_grid_vat.ZN_ELEV_MEAN]", "VB"); showGpMessage()
gp.CalculateField_management(overlayFL, "DEM_SLP", "[combo1_grid_vat.ZN_SLP_MEAN]", "VB"); showGpMessage()
gp.CalculateField_management(overlayFL, "ROAD_DIST", "[combo1_grid_vat.ZN_RDIS_MEAN]", "VB"); showGpMessage()
gp.RemoveJoin_management(overlayFL, "combo1_grid_vat"); showGpMessage()
#For values in sample1Tbl
gp.MakeFeatureLayer_management(overlayFC, overlayFL, "", "", ""); showGpMessage()
gp.AddJoin_management(overlayFL, gp.describe(overlayFC).oidfieldname, sample1Tbl, "REMSOFTID", "KEEP_COMMON"); showGpMessage()
gp.CalculateField_management(overlayFL, "DEM_ELV", "[sample1.dem10m]", "VB"); showGpMessage()
gp.CalculateField_management(overlayFL, "DEM_SLP", "[sample1.dem10m_slp]", "VB"); showGpMessage()
gp.CalculateField_management(overlayFL, "ROAD_DIST", "[sample1.road_dist]", "VB"); showGpMessage()
gp.RemoveJoin_management(overlayFL, "sample1"); showGpMessage()
#Blah blah....