Find identical tool using different xy tolerance values

ThomasPlazek · ‎08-15-2023

Hi, I am working on trying to write a script to identify point data within a certain distance of each other. This point data is broken down into 6 types as well. These types have different xy tolerance values, so I am trying to assign the xy tolerance to each type, then loop through each of them and output the results into a separate table for each type. The issue is that ArcPro is just putting all of the original points into every output table and assigning them all to the same FEAT_SEQ. Every output table is identical. Any help would be appreciated!

*note: for privacy reasons, I have redacted the output gdb location

----------------------------------------------------------------------------------------------------------------------------

import arcpy

output_workspace = r"GDB_location.gdb"

location_domain = {
"Depot": 100,
"Station": 100,
"Stop": 5,
"Yard": 100,
"Switch": 5,
"Level_Crossing": 5,

}

try:
for location_type, xy_tolerance in location_domain.items():

output_feature_class = arcpy.CreateUniqueName(f"Identical_{location_type}", output_workspace)

arcpy.management.FindIdentical('Railway_POI_1', output_feature_class, ["shape"], location_domain[location_type])

print(f"Duplicated records for {location_type} have been identified and saved to {output_feature_class}.")
except arcpy.ExecuteError:
print(arcpy.GetMessages())

ThomasPlazek · ‎11-01-2023

Thanks for your reply! As it turns out, the Find Identical tool has some limitations and does not allow you to use different xy tolerance values for different types within the same feature class. My solution was to break down each type into its own feature dataset and assign a global xy tolerance to each type. The SRID was set to WGS84 in this case, but it could be changed as needed.

import arcpy
import re

arcpy.env.workspace = r"host_gdb_location"
input_feature_layer = "Railroad_POI_1"
field_name = "TYPE1"

unique_values = set()
with arcpy.da.SearchCursor(input_feature_layer, field_name) as cursor:
    for row in cursor:
        if row[0] is not None:  # Exclude null values
            unique_values.add(row[0])

xy_tolerances = {
    "Depot": "100 Meters",
    "Station": "100 Meters" ,
    "Stop": "5 Meters",
    "Yard": "100 Meters",
    "Switch": "5 Meters",
    "Level_Crossing": "5 Meters"
}

for value in unique_values:
    sql_expression = f"{arcpy.AddFieldDelimiters(input_feature_layer, field_name)} = '{value}'"
    
    arcpy.SelectLayerByAttribute_management(input_feature_layer, "NEW_SELECTION", sql_expression)
    
    selection_count = int(arcpy.GetCount_management(input_feature_layer).getOutput(0))
    
    if selection_count > 0:
        sanitized_value = re.sub(r'[^a-zA-Z0-9_]', '', value)[:40]
        if not sanitized_value:
            sanitized_value = "Unknown"
        
        feature_dataset_name = f"FeatureDataset_{sanitized_value}"
        arcpy.CreateFeatureDataset_management(arcpy.env.workspace, feature_dataset_name, arcpy.SpatialReference(4326))  
        
        feature_class_name = f"FeatureClass_{sanitized_value}"
        feature_class_path = arcpy.CreateFeatureclass_management(feature_dataset_name, feature_class_name, "POINT", "", "DISABLED", "DISABLED")
        
        arcpy.management.CopyFeatures(input_feature_layer, feature_class_path)
        
        xy_tolerance = xy_tolerances.get(sanitized_value, None)
        if xy_tolerance:
            arcpy.env.XYTolerance = xy_tolerance
        
        find_identical_output = f"FindIdentical_{feature_class_name}"
        arcpy.FindIdentical_management(feature_class_path, find_identical_output, ["Shape"], output_record_option="ONLY_DUPLICATES" )
        
        arcpy.env.workspace, identical_output_table, where_clause="FEAT_SEQ >= 1")
        
        arcpy.env.XYTolerance = None
        
        print(f"{selection_count} features of type '{value}' processed.")
    else:
        print(f"No features of type '{value}' selected.")

arcpy.SelectLayerByAttribute_management(input_feature_layer, "CLEAR_SELECTION")

View solution in original post

SamSzotkowski · ‎08-29-2023

As I understand, your "Railway_POI_1" layer has 6 different types of point features you're interested in. Any "Depot" type points should be considered identical if they are within 100 units of each other, for example. As output we want all "identical" points to collapse to a single point.

I'm not sure what's causing your issue exactly, maybe you might get different results using the output_record_option argument.

But here's also a different way you could approach the problem, a bit more "visual" and easy to wrap my head around each step. Could definitely be simplified but hopefully easy enough to follow:

For each item in your location_domain dictionary, select all features of that type and store the selection in a variable.
Run Pairwise Buffer on the selection, using the appropriate distance for each feature type. Store this buffer table in memory.
Run Pairwise Dissolve on the buffer polygons. Store this in memory as well.
For each dissolve polygon, you could either then grab the centroid as your output, or spatial join the dissolve polygons to the selected points and delete all but one point in each polygon.

Code example:

import arcpy

in_points = "Railway_POI_1"
type_field = "location_type"
output_workspace = r"GDB_location.gdb"

location_domain = {
    "Depot": 100,
    "Station": 100,
    "Stop": 5,
    "Yard": 100,
    "Switch": 5,
    "Level_Crossing": 5,
}

for location_type, xy_tolerance in location_domain.items():
    where = f"{type_field} = {location_type}"
    pt_selection = arcpy.management.SelectLayerByAttribute(
        in_points, "NEW_SELECTION", where
    )
    
    buffer = "memory\\buffer"
    arcpy.analysis.PairwiseBuffer(pt_selection, buffer, xy_tolerance)
    
    dissolve = "memory\\dissolve"
    arcpy.analysis.PairwiseDissolve(buffer, dissolve, multi_part="SINGLE_PART")
    
    out_points = arcpy.CreateUniqueName(
        f"Identical_{location_type}", output_workspace
    )
    arcpy.management.FeatureToPoint(dissolve, out_points, "INSIDE")

    arcpy.management.Delete("memory")

ThomasPlazek · ‎11-01-2023

Thanks for your reply! As it turns out, the Find Identical tool has some limitations and does not allow you to use different xy tolerance values for different types within the same feature class. My solution was to break down each type into its own feature dataset and assign a global xy tolerance to each type. The SRID was set to WGS84 in this case, but it could be changed as needed.

import arcpy
import re

arcpy.env.workspace = r"host_gdb_location"
input_feature_layer = "Railroad_POI_1"
field_name = "TYPE1"

unique_values = set()
with arcpy.da.SearchCursor(input_feature_layer, field_name) as cursor:
    for row in cursor:
        if row[0] is not None:  # Exclude null values
            unique_values.add(row[0])

xy_tolerances = {
    "Depot": "100 Meters",
    "Station": "100 Meters" ,
    "Stop": "5 Meters",
    "Yard": "100 Meters",
    "Switch": "5 Meters",
    "Level_Crossing": "5 Meters"
}

for value in unique_values:
    sql_expression = f"{arcpy.AddFieldDelimiters(input_feature_layer, field_name)} = '{value}'"
    
    arcpy.SelectLayerByAttribute_management(input_feature_layer, "NEW_SELECTION", sql_expression)
    
    selection_count = int(arcpy.GetCount_management(input_feature_layer).getOutput(0))
    
    if selection_count > 0:
        sanitized_value = re.sub(r'[^a-zA-Z0-9_]', '', value)[:40]
        if not sanitized_value:
            sanitized_value = "Unknown"
        
        feature_dataset_name = f"FeatureDataset_{sanitized_value}"
        arcpy.CreateFeatureDataset_management(arcpy.env.workspace, feature_dataset_name, arcpy.SpatialReference(4326))  
        
        feature_class_name = f"FeatureClass_{sanitized_value}"
        feature_class_path = arcpy.CreateFeatureclass_management(feature_dataset_name, feature_class_name, "POINT", "", "DISABLED", "DISABLED")
        
        arcpy.management.CopyFeatures(input_feature_layer, feature_class_path)
        
        xy_tolerance = xy_tolerances.get(sanitized_value, None)
        if xy_tolerance:
            arcpy.env.XYTolerance = xy_tolerance
        
        find_identical_output = f"FindIdentical_{feature_class_name}"
        arcpy.FindIdentical_management(feature_class_path, find_identical_output, ["Shape"], output_record_option="ONLY_DUPLICATES" )
        
        arcpy.env.workspace, identical_output_table, where_clause="FEAT_SEQ >= 1")
        
        arcpy.env.XYTolerance = None
        
        print(f"{selection_count} features of type '{value}' processed.")
    else:
        print(f"No features of type '{value}' selected.")

arcpy.SelectLayerByAttribute_management(input_feature_layer, "CLEAR_SELECTION")