Hi,
I'm trying to complete a spatial join between US Postal Service zip codes and US Census ZCTAs. Zip codes and ZCTAs do not perfectly overlap, and many overlap quite a bit. I used the Spatial Join tool and the "Largest Overlap" relationship, and want an output that gives me the zip code and its ZCTA that overlaps it the most. But, when I run it, the consistent result is a mismatch between the Zip Codes and their ZCTAs (I figured this out by visually double checking). Instead, the tool aligns the zip code to a ZCTA that has a very small overlap with the zip code, rather than the zip code with the most overlap (see pics attached).
I've ensured the layers are in the same gdb, the projections are the same, and the geometries are solid. I've tried ESRI's suggested solve for a similar question (Intersect and then Summarize Within). Results remain misaligned. I've successfully used the Largest Overlap relationship for a separate project, but can't get it right for this one. Please help -- thank you!
Exact same problem for me! Polygons that have 100% overlap are joining correctly, but those that overlap multiple polygons are joining to the target polygon with the smallest - rather than largest - area of overlap. This appears to be a fairly common issue...anyone come up with an idea? Thanks!
I am experiencing this same issue. What I have found is that if both/all datasets are the same exact projected coordinate system, I seem to get the correct values. Of course I can't test every feature and it would seem prudent to ensure that the the projected coordinates system is an equal area projection (which I did also) to ensure areas can be accurately compared.
Also, I would add that Esri either need to clarify this in the documentation or enhance the tool code to ensure data with differing coordinates systems are accurately calculating and comparing areas.
I know I'm quite late, but for anyone else having this problem, I use this python toolbox as a workaround.
import arcpy
import os
from arcpy import env
from arcpy.sa import *
class Toolbox(object):
def __init__(self):
self.label = "Largest Overlap Toolbox"
self.alias = ""
self.tools = [SimpleSpatialJoinLargestOverlap]
class SimpleSpatialJoinLargestOverlap(object):
def __init__(self):
self.label = "Simple Spatial Join (Largest Overlap)"
self.description = "Creates a spatial join with the features that have the largest overlap"
self.canRunInBackground = False
def getParameterInfo(self):
#Define parameter definitions
out_work = arcpy.Parameter(
displayName="Output Workspace",
name="out_work",
datatype="DEWorkspace",
parameterType="Required",
direction="Input")
target_features = arcpy.Parameter( #0
displayName="Target Features",
name="target_features",
datatype="GPFeatureLayer",
parameterType="Required",
direction="Input")
join_features = arcpy.Parameter( #1
displayName="Join Features",
name="join_features",
datatype="GPFeatureLayer",
parameterType="Required",
direction="Input")
out_fc = arcpy.Parameter( #2
displayName="Output Feature Class",
name="out_fc",
datatype="GPFeatureLayer",
parameterType="Required",
direction="Output")
keep_all = arcpy.Parameter( # 3
displayName="Keep All",
name="keep_all",
datatype="GPBoolean",
parameterType="Optional",
direction="Input")
keep_all.value = True
spatial_rel= arcpy.Parameter( # 4
displayName = "largest_overlap",
name = "spatial_rel",
datatype = "GPString",
parameterType = "Optional",
direction = "Input")
spatial_rel.filter.type = "ValueList"
spatial_rel.filter.list = ['largest_overlap']
params = [target_features, #0
join_features, #1
out_fc, #2
keep_all, #3
spatial_rel] #4
return params
def isLicensed(self):
return True
def execute(self, parameters, messages):
arcpy.env.overwriteOutput = True
#Define parameters
target_features = parameters[0].valueAsText
join_features = parameters[1].valueAsText
out_fc = parameters[2].valueAsText
keep_all = parameters[3].valueAsText
spatial_rel = parameters[4].valueAsText
if spatial_rel == "largest_overlap":
# Calculate intersection between Target Feature and Join Features
intersect = arcpy.analysis.Intersect([target_features, join_features], "in_memory/intersect", "ONLY_FID")
# Find which Join Feature has the largest overlap with each Target Feature
# Need to know the Target Features shape type, to know to read the SHAPE_AREA oR SHAPE_LENGTH property
geom = "AREA" if arcpy.Describe(target_features).shapeType.lower() == "polygon" and arcpy.Describe(join_features).shapeType.lower() == "polygon" else "LENGTH"
ifields = arcpy.ListFields(intersect)
fields = [[field.name for field in ifields if field.name.startswith('FID')][0],
[field.name for field in ifields if field.name.startswith('FID')][1],
"SHAPE@{0}".format(geom)]
overlap_dict = {}
with arcpy.da.SearchCursor(intersect, fields) as scur:
for row in scur:
try:
if row[2] > overlap_dict[row[0]][1]:
overlap_dict[row[0]] = [row[1], row[2]]
except:
overlap_dict[row[0]] = [row[1], row[2]]
# Copy the target features and write the largest overlap join feature ID to each record
# Set up all fields from the target features + ORIG_FID
fieldmappings = arcpy.FieldMappings()
fieldmappings.addTable(target_features)
fieldmap = arcpy.FieldMap()
fieldmap.addInputField(target_features, arcpy.Describe(target_features).OIDFieldName)
fld = fieldmap.outputField
fld.type, fld.name, fld.aliasName = "LONG", "ORIG_FID", "ORIG_FID"
fieldmap.outputField = fld
fieldmappings.addFieldMap(fieldmap)
# Perform the copy
arcpy.conversion.FeatureClassToFeatureClass(target_features, os.path.dirname(out_fc), os.path.basename(out_fc), "", fieldmappings)
# Add a new field JOIN_FID to contain the fid of the join feature with the largest overlap
arcpy.management.AddField(out_fc, "JOIN_FID", "LONG")
# Calculate the JOIN_FID field
with arcpy.da.UpdateCursor(out_fc, ["ORIG_FID", "JOIN_FID"]) as ucur:
for row in ucur:
try:
row[1] = overlap_dict[row[0]][0]
ucur.updateRow(row)
except:
if not keep_all:
ucur.deleteRow()
# Join all attributes from the join features to the output
joinfields = [x.name for x in arcpy.ListFields(join_features) if not x.required]
arcpy.management.JoinField(out_fc, "JOIN_FID", join_features, arcpy.Describe(join_features).OIDFieldName, joinfields)
return