Hi, I am working on trying to write a script to identify point data within a certain distance of each other. This point data is broken down into 6 types as well. These types have different xy tolerance values, so I am trying to assign the xy tolerance to each type, then loop through each of them and output the results into a separate table for each type. The issue is that ArcPro is just putting all of the original points into every output table and assigning them all to the same FEAT_SEQ. Every output table is identical. Any help would be appreciated!
*note: for privacy reasons, I have redacted the output gdb location
----------------------------------------------------------------------------------------------------------------------------
import arcpy
output_workspace = r"GDB_location.gdb"
location_domain = {
"Depot": 100,
"Station": 100,
"Stop": 5,
"Yard": 100,
"Switch": 5,
"Level_Crossing": 5,
}
try:
for location_type, xy_tolerance in location_domain.items():
output_feature_class = arcpy.CreateUniqueName(f"Identical_{location_type}", output_workspace)
arcpy.management.FindIdentical('Railway_POI_1', output_feature_class, ["shape"], location_domain[location_type])
print(f"Duplicated records for {location_type} have been identified and saved to {output_feature_class}.")
except arcpy.ExecuteError:
print(arcpy.GetMessages())
Solved! Go to Solution.
Thanks for your reply! As it turns out, the Find Identical tool has some limitations and does not allow you to use different xy tolerance values for different types within the same feature class. My solution was to break down each type into its own feature dataset and assign a global xy tolerance to each type. The SRID was set to WGS84 in this case, but it could be changed as needed.
import arcpy
import re
arcpy.env.workspace = r"host_gdb_location"
input_feature_layer = "Railroad_POI_1"
field_name = "TYPE1"
unique_values = set()
with arcpy.da.SearchCursor(input_feature_layer, field_name) as cursor:
for row in cursor:
if row[0] is not None: # Exclude null values
unique_values.add(row[0])
xy_tolerances = {
"Depot": "100 Meters",
"Station": "100 Meters" ,
"Stop": "5 Meters",
"Yard": "100 Meters",
"Switch": "5 Meters",
"Level_Crossing": "5 Meters"
}
for value in unique_values:
sql_expression = f"{arcpy.AddFieldDelimiters(input_feature_layer, field_name)} = '{value}'"
arcpy.SelectLayerByAttribute_management(input_feature_layer, "NEW_SELECTION", sql_expression)
selection_count = int(arcpy.GetCount_management(input_feature_layer).getOutput(0))
if selection_count > 0:
sanitized_value = re.sub(r'[^a-zA-Z0-9_]', '', value)[:40]
if not sanitized_value:
sanitized_value = "Unknown"
feature_dataset_name = f"FeatureDataset_{sanitized_value}"
arcpy.CreateFeatureDataset_management(arcpy.env.workspace, feature_dataset_name, arcpy.SpatialReference(4326))
feature_class_name = f"FeatureClass_{sanitized_value}"
feature_class_path = arcpy.CreateFeatureclass_management(feature_dataset_name, feature_class_name, "POINT", "", "DISABLED", "DISABLED")
arcpy.management.CopyFeatures(input_feature_layer, feature_class_path)
xy_tolerance = xy_tolerances.get(sanitized_value, None)
if xy_tolerance:
arcpy.env.XYTolerance = xy_tolerance
find_identical_output = f"FindIdentical_{feature_class_name}"
arcpy.FindIdentical_management(feature_class_path, find_identical_output, ["Shape"], output_record_option="ONLY_DUPLICATES" )
arcpy.env.workspace, identical_output_table, where_clause="FEAT_SEQ >= 1")
arcpy.env.XYTolerance = None
print(f"{selection_count} features of type '{value}' processed.")
else:
print(f"No features of type '{value}' selected.")
arcpy.SelectLayerByAttribute_management(input_feature_layer, "CLEAR_SELECTION")
As I understand, your "Railway_POI_1" layer has 6 different types of point features you're interested in. Any "Depot" type points should be considered identical if they are within 100 units of each other, for example. As output we want all "identical" points to collapse to a single point.
I'm not sure what's causing your issue exactly, maybe you might get different results using the output_record_option argument.
But here's also a different way you could approach the problem, a bit more "visual" and easy to wrap my head around each step. Could definitely be simplified but hopefully easy enough to follow:
Code example:
import arcpy
in_points = "Railway_POI_1"
type_field = "location_type"
output_workspace = r"GDB_location.gdb"
location_domain = {
"Depot": 100,
"Station": 100,
"Stop": 5,
"Yard": 100,
"Switch": 5,
"Level_Crossing": 5,
}
for location_type, xy_tolerance in location_domain.items():
where = f"{type_field} = {location_type}"
pt_selection = arcpy.management.SelectLayerByAttribute(
in_points, "NEW_SELECTION", where
)
buffer = "memory\\buffer"
arcpy.analysis.PairwiseBuffer(pt_selection, buffer, xy_tolerance)
dissolve = "memory\\dissolve"
arcpy.analysis.PairwiseDissolve(buffer, dissolve, multi_part="SINGLE_PART")
out_points = arcpy.CreateUniqueName(
f"Identical_{location_type}", output_workspace
)
arcpy.management.FeatureToPoint(dissolve, out_points, "INSIDE")
arcpy.management.Delete("memory")
Thanks for your reply! As it turns out, the Find Identical tool has some limitations and does not allow you to use different xy tolerance values for different types within the same feature class. My solution was to break down each type into its own feature dataset and assign a global xy tolerance to each type. The SRID was set to WGS84 in this case, but it could be changed as needed.
import arcpy
import re
arcpy.env.workspace = r"host_gdb_location"
input_feature_layer = "Railroad_POI_1"
field_name = "TYPE1"
unique_values = set()
with arcpy.da.SearchCursor(input_feature_layer, field_name) as cursor:
for row in cursor:
if row[0] is not None: # Exclude null values
unique_values.add(row[0])
xy_tolerances = {
"Depot": "100 Meters",
"Station": "100 Meters" ,
"Stop": "5 Meters",
"Yard": "100 Meters",
"Switch": "5 Meters",
"Level_Crossing": "5 Meters"
}
for value in unique_values:
sql_expression = f"{arcpy.AddFieldDelimiters(input_feature_layer, field_name)} = '{value}'"
arcpy.SelectLayerByAttribute_management(input_feature_layer, "NEW_SELECTION", sql_expression)
selection_count = int(arcpy.GetCount_management(input_feature_layer).getOutput(0))
if selection_count > 0:
sanitized_value = re.sub(r'[^a-zA-Z0-9_]', '', value)[:40]
if not sanitized_value:
sanitized_value = "Unknown"
feature_dataset_name = f"FeatureDataset_{sanitized_value}"
arcpy.CreateFeatureDataset_management(arcpy.env.workspace, feature_dataset_name, arcpy.SpatialReference(4326))
feature_class_name = f"FeatureClass_{sanitized_value}"
feature_class_path = arcpy.CreateFeatureclass_management(feature_dataset_name, feature_class_name, "POINT", "", "DISABLED", "DISABLED")
arcpy.management.CopyFeatures(input_feature_layer, feature_class_path)
xy_tolerance = xy_tolerances.get(sanitized_value, None)
if xy_tolerance:
arcpy.env.XYTolerance = xy_tolerance
find_identical_output = f"FindIdentical_{feature_class_name}"
arcpy.FindIdentical_management(feature_class_path, find_identical_output, ["Shape"], output_record_option="ONLY_DUPLICATES" )
arcpy.env.workspace, identical_output_table, where_clause="FEAT_SEQ >= 1")
arcpy.env.XYTolerance = None
print(f"{selection_count} features of type '{value}' processed.")
else:
print(f"No features of type '{value}' selected.")
arcpy.SelectLayerByAttribute_management(input_feature_layer, "CLEAR_SELECTION")