I need to identify clusters that contain a maximum of 15 features within a certain distance of each other, but I’m not sure how to do that. The Find Point Clusters tool in GeoAnalytics Desktop only allows you to set a minimum number of features per cluster, not a maximum. Even if I run the tool with a minimum of 15, how can I isolate only the clusters that contain exactly 15 features? Ideally I would need this in arcpy, but for testing I would like to know how to do this in Pro.
I don't suppose you have the spatial statistics extension
Spatially Constrained Multivariate Clustering (Spatial Statistics)—ArcGIS Pro | Documentation
The size of the clusters can be managed with the Cluster Size Constraints parameter. You can set minimum and maximum thresholds that each cluster must meet.
I forgot to mention that they have to be within a certain distance of each other.
not sure whether the spatial weights matrix inclusion (see the above link) would help.
If they have to be within a certain distance of one another, how do you intend to filter out points that don't meet the min/and/or/max distance criteria?
If points are too close to one another, do you intend to drop one or more? or take an average location? (eg aggregate)
If points are too far from on another, do you intend to assign them to the nearest cluster perhaps breaking you number limit.
Perhaps a visual of what you are trying to cluster might help. Also don't rule out that a manual approach may be the most expedient (good old H.I.), or at least as a first step in getting to a final solution.
If they have to be within a certain distance of one another, how do you intend to filter out points that don't meet the min/and/or/max distance criteria? - Filter them out yes or some how mark them, for example a numeric field with 0 or 1
If points are too close to one another, do you intend to drop one or more? or take an average location? (eg aggregate) - Yes
If points are too far from on another, do you intend to assign them to the nearest cluster perhaps breaking you number limit. - No, I am looking for exactly cluster of 15.
I have attached an image
I have the following code, which does mark the output feature class with 1, but it doesn't appear correct. The highlighted is what has 1 but you can see points that look like they should be part of the cluster
import arcpy
import pandas as pd
from collections import Counter
# --- INPUTS ---
input_points = "Centroids_2_10"
out_feature_class = r"C:\Temp\ClusterProject.gdb\PointClusters"
clusters_exact15 = r"C:\Temp\\ClusterProject.gdb\Clusters_Exactly15"
# --- CONSTANTS ---
search_radius = 800 # feet
target_cluster_size = 15
# Set overwrite output
arcpy.env.overwriteOutput = True
def find_clusters_of_exact_size():
"""
Find clusters of exactly 15 points within 800 feet using graph-based method.
This is the most reliable method for your specific requirement.
"""
print("="*70)
print("FINDING CLUSTERS OF EXACTLY 15 POINTS WITHIN 800 FEET")
print("="*70)
# Step 1: Create near table
print("\n1. Creating near table...")
near_table = r"memory\near_table"
arcpy.analysis.GenerateNearTable(
in_features=input_points,
near_features=input_points,
out_table=near_table,
search_radius=f"{search_radius} Feet", # Specify units
method="PLANAR",
closest="ALL",
closest_count=0 # All points within radius
)
print("2. Processing connections...")
# Read near table
near_data = []
with arcpy.da.SearchCursor(near_table, ['IN_FID', 'NEAR_FID', 'NEAR_DIST']) as cursor:
for row in cursor:
near_data.append(row)
# Build adjacency dictionary
adjacency = {}
for in_fid, near_fid, dist in near_data:
if in_fid != near_fid: # Skip self-connections
adjacency.setdefault(in_fid, set()).add(near_fid)
adjacency.setdefault(near_fid, set()).add(in_fid)
print("3. Finding connected components (clusters)...")
# Find connected components using BFS
visited = set()
all_clusters = []
for point_id in adjacency.keys():
if point_id not in visited:
# Start new cluster
cluster = set()
stack = [point_id]
while stack:
current = stack.pop()
if current not in visited:
visited.add(current)
cluster.add(current)
# Add all neighbors
for neighbor in adjacency.get(current, set()):
if neighbor not in visited:
stack.append(neighbor)
all_clusters.append(cluster)
# Find isolated points (not in adjacency at all)
all_point_ids = set()
with arcpy.da.SearchCursor(input_points, ["OID@"]) as cursor:
for row in cursor:
all_point_ids.add(row[0])
isolated_points = all_point_ids - visited
for point_id in isolated_points:
all_clusters.append({point_id})
print(f"\n4. Found {len(all_clusters)} total clusters")
# Count cluster sizes
cluster_sizes = [len(c) for c in all_clusters]
size_counter = Counter(cluster_sizes)
print("\nCluster size distribution:")
for size, count in sorted(size_counter.items()):
print(f" Size {size}: {count} clusters")
# Find clusters with exactly 15 points
exact_15_clusters = [c for c in all_clusters if len(c) == target_cluster_size]
print(f"\n5. Found {len(exact_15_clusters)} clusters with exactly {target_cluster_size} points!")
if not exact_15_clusters:
print("No clusters found with exactly 15 points.")
return
# Step 5: Assign cluster IDs
print("\n6. Assigning cluster IDs...")
# Create mapping from point OID to cluster ID
point_to_cluster = {}
point_to_exact_cluster = {}
# Assign IDs to all clusters
for i, cluster in enumerate(all_clusters):
cluster_id = i + 1
for point_id in cluster:
point_to_cluster[point_id] = cluster_id
# Assign special IDs to exact 15 clusters
for i, cluster in enumerate(exact_15_clusters):
exact_cluster_id = i + 1
for point_id in cluster:
point_to_exact_cluster[point_id] = exact_cluster_id
# Step 6: Create output feature classes
print("\n7. Creating output feature classes...")
# Create main output with all clusters
arcpy.CopyFeatures_management(input_points, out_feature_class)
# Add cluster fields
arcpy.AddField_management(out_feature_class, "Cluster_ID", "LONG")
arcpy.AddField_management(out_feature_class, "Exact15_ID", "LONG")
arcpy.AddField_management(out_feature_class, "Is_Exact15", "SHORT")
# Update with cluster assignments
with arcpy.da.UpdateCursor(out_feature_class, ["OID@", "Cluster_ID", "Exact15_ID", "Is_Exact15"]) as cursor:
for row in cursor:
point_id = row[0]
row[1] = point_to_cluster.get(point_id, -1) # -1 for isolated points
row[2] = point_to_exact_cluster.get(point_id, -1) # -1 if not in exact 15 cluster
row[3] = 1 if point_id in point_to_exact_cluster else 0
cursor.updateRow(row)
print(f" Created: {out_feature_class}")
# Create feature class with only exact 15 clusters
where_clause = "Is_Exact15 = 1"
temp_layer = "exact_clusters_layer"
arcpy.MakeFeatureLayer_management(out_feature_class, temp_layer, where_clause)
arcpy.CopyFeatures_management(temp_layer, clusters_exact15)
arcpy.Delete_management(temp_layer)
print(f" Created: {clusters_exact15}")
# Step 7: Summary statistics
print("\n" + "="*70)
print("SUMMARY RESULTS")
print("="*70)
total_points = len(all_point_ids)
isolated_count = len(isolated_points)
clustered_points = total_points - isolated_count
print(f"\nTotal points analyzed: {total_points}")
print(f"Points in clusters: {clustered_points}")
print(f"Isolated points (no neighbors within {search_radius} ft): {isolated_count}")
print(f"Total clusters found: {len(all_clusters)}")
print(f"Clusters with exactly {target_cluster_size} points: {len(exact_15_clusters)}")
# Detailed breakdown of exact 15 clusters
print(f"\nExact 15-point clusters (Cluster IDs):")
for i, cluster in enumerate(exact_15_clusters, 1):
print(f" Cluster {i}: {len(cluster)} points")
# Suggestions for next steps
print("\n" + "="*70)
print("NEXT STEPS")
print("="*70)
print("\n1. View 'Clusters_Exactly15' to see the 15-point clusters")
print("2. View 'PointClusters' to see all clusters (field: Cluster_ID)")
print("3. Isolated points have Cluster_ID = -1")
print("4. To adjust results, try:")
print(f" - Increase search radius > {search_radius} ft for larger clusters")
print(f" - Decrease search radius < {search_radius} ft for smaller clusters")
print(f" - Target different cluster size (not 15)")
# Clean up
arcpy.Delete_management(near_table)
return len(exact_15_clusters)
def try_alternative_methods():
"""Try alternative methods if graph-based doesn't work"""
print("\n" + "="*70)
print("ALTERNATIVE APPROACHES")
print("="*70)
print("\nIf the graph-based method doesn't work, try these:")
print("\n1. SIMPLE BUFFER AND INTERSECT:")
print(" - Buffer each point by 400 ft (half of 800)")
print(" - Find overlapping buffers")
print(" - Group points with overlapping buffers")
print("\n2. SPATIAL JOIN:")
print(" - Spatial join points to themselves within 800 ft")
print(" - Create adjacency matrix from join results")
print("\n3. MANUAL ITERATION:")
print(" - For each point, find all neighbors within 800 ft")
print(" - Build groups iteratively")
print("\n4. USE EXISTING RESULTS:")
print(" You already have 12 clusters of exactly 15 points!")
print(" These are in 'Clusters_Exactly15' feature class")
# Run the analysis
if __name__ == "__main__":
try:
result = find_clusters_of_exact_size()
if result == 0:
print("\nNo exact 15-point clusters found. Trying alternative approaches...")
try_alternative_methods()
else:
print(f"\nSUCCESS! Found {result} clusters of exactly 15 points.")
print("\nThe graph-based method is complete and reliable.")
except Exception as e:
print(f"\nERROR: {str(e)}")
print("\nTrying alternative approach...")
try_alternative_methods()
sorry, I can't see any easy way of getting any clustering algorithm to meet you distance and count restrictions given your point pattern
Ya, I wasn't sure if there was or not. It does seem like this should be a capability somehow.
The best I came with is something like this,
import arcpy
from collections import defaultdict, deque
# ------------------------------------------------
# ENVIRONMENT
# ------------------------------------------------
arcpy.env.overwriteOutput = True
arcpy.env.addOutputsToMap = False
# ------------------------------------------------
# INPUTS
# ------------------------------------------------
input_points = "Centroids_2_10"
output_fc = r"C:\Temp\ClusterProject.gdb\Groups_Of_15"
search_distance = "1320 Feet"
target_size = 15
group_field = "GROUP_ID"
# ------------------------------------------------
# 1. COPY INPUT
# ------------------------------------------------
print("1. Copying input points...")
arcpy.management.CopyFeatures(input_points, output_fc)
oid_field = arcpy.Describe(output_fc).OIDFieldName
# ------------------------------------------------
# 2. GENERATE NEAR TABLE (ALL PAIRS)
# ------------------------------------------------
print("2. Generating Near Table...")
near_table = "in_memory\\near_table"
arcpy.analysis.GenerateNearTable(
in_features=output_fc,
near_features=output_fc,
out_table=near_table,
search_radius=search_distance,
location="NO_LOCATION",
angle="NO_ANGLE",
closest="ALL",
closest_count="",
method="GEODESIC"
)
# ------------------------------------------------
# 3. BUILD ADJACENCY GRAPH
# ------------------------------------------------
print("3. Building adjacency graph...")
adj = defaultdict(set)
with arcpy.da.SearchCursor(near_table, ["IN_FID", "NEAR_FID"]) as cur:
for a, b in cur:
if a != b:
adj[a].add(b)
adj[b].add(a)
# ------------------------------------------------
# 4. FIND CONNECTED COMPONENTS
# ------------------------------------------------
print("4. Finding connected components...")
visited = set()
components = []
all_oids = [row[0] for row in arcpy.da.SearchCursor(output_fc, [oid_field])]
for oid in all_oids:
if oid in visited:
continue
queue = deque([oid])
visited.add(oid)
component = {oid}
while queue:
current = queue.popleft()
for neighbor in adj.get(current, []):
if neighbor not in visited:
visited.add(neighbor)
component.add(neighbor)
queue.append(neighbor)
components.append(component)
print(f" → Found {len(components)} connected components")
# ------------------------------------------------
# 5. SPLIT EACH COMPONENT INTO GROUPS OF 15
# ------------------------------------------------
print("5. Splitting components into groups of 15...")
groups = {}
group_id = 1
for comp in components:
remaining = set(comp)
while len(remaining) >= target_size:
seed = remaining.pop()
subgroup = [seed]
queue = deque([seed])
while queue and len(subgroup) < target_size:
current = queue.popleft()
for n in adj.get(current, []):
if n in remaining:
remaining.remove(n)
subgroup.append(n)
queue.append(n)
if len(subgroup) == target_size:
break
# Assign subgroup
for oid in subgroup:
groups[oid] = group_id
group_id += 1
print(f" → Created {group_id - 1} groups of {target_size}")
# ------------------------------------------------
# 6. WRITE GROUP FIELD
# ------------------------------------------------
print("6. Writing GROUP_ID field...")
field_names = [f.name for f in arcpy.ListFields(output_fc)]
if group_field not in field_names:
arcpy.management.AddField(output_fc, group_field, "LONG")
with arcpy.da.UpdateCursor(output_fc, [oid_field, group_field]) as cur:
for oid, _ in cur:
cur.updateRow((oid, groups.get(oid)))
# ------------------------------------------------
# 7. OUTPUT ONLY VALID GROUPS (OPTIONAL)
# ------------------------------------------------
print("7. Creating output feature class (groups of 15 only)...")
valid_fc = output_fc + "_Only15"
arcpy.management.MakeFeatureLayer(
output_fc,
"groups15_lyr",
f"{group_field} IS NOT NULL"
)
arcpy.management.CopyFeatures("groups15_lyr", valid_fc)
arcpy.management.Delete("groups15_lyr")
# ------------------------------------------------
# CLEANUP
# ------------------------------------------------
arcpy.management.Delete("in_memory")
print("\nDONE")