EDIT: Thanks to @RobertEisler for providing an example showing my transect_type = "PROPER" to be incorrect (the dotted black lines in all the pictures). As far as I am aware, the "BOUNDARY" option is still the optimal algorithm.
As @KimOllivier already noted, only checking vertex-to-vertex is not a sufficient algorithm to solve this problem.
Here is my approach. I am working under the assumption that the longest line will contain two vertices, but does not necessarily end at those two vertices. The user has the option to include the edges of shapes, or require the transect to be entirely internal [this is wrong, see EDIT at the top and replies down below] (red vs. dotted, respectively; sometimes these are the same). Multipart polygons only get one transect generated, so that's why a few shapes appear to be missing lines.
Shapes involving true curves have transects estimated using "densify" to generate actual vertices, so those are only close approximations. See the second screenshot, where the line should presumably be right up against the hole. Would be happy to hear suggestions on this.
These are test polygons, but on a real-world parcel dataset where the number of vertex-to-vertex combinations within a polygon can get into the hundreds of thousands, my version can take a while. Seemed like the bottleneck was converting every vertex coordinate combination to a Polyline object, so I would be very interested in optimizations if anyone has suggestions.
To summarize--script generates lines from all vertex-to-vertex pairs, extends those lines a bit (to cover cases like the castle shape at the top of the first below), clips them back to the polygon boundary, and then finds the longest in the list of lines within the given geometry.
EDIT: The "stopwatch as sw" module in this code is a custom time/message reporting module. Delete it or replace those with standard "print()" if anyone ends up using this.
Transect line outputs.
Longest transect through a feature composed of curved boundaries.
"""LONGEST TRANSECT"""
#---------------------------------------------------------------------------------------#
# IMPORTS
import collections
import itertools
import math
import os
import sys
import traceback
import uuid
import arcpy
import stopwatch as sw
#---------------------------------------------------------------------------------------#
# SETTINGS
arcpy.env.overwriteOutput = True
arcpy.env.workspace = **YOUR GEODATABASE PATH**
#---------------------------------------------------------------------------------------#
# INPUTS
polygons = "POLYGON"
polygon_id_fld = "POLY_GUID"
transects = "TRANSECT_BOUNDARY_TEST"
# Boundary = transect can be entire edge. Proper = transect must be witihin polygon.
transect_type = "BOUNDARY" # ["BOUNDARY", "PROPER"]
# Only save the longest line for each polygon?
discard_nonmax_transects = False
#---------------------------------------------------------------------------------------#
# FUNCTIONS
def create_geom_dict(poly_fc, poly_id_fld, generalize=False):
"""Read poly geoms to information dict.
returns {GUID: {POLYGON: <poly_obj>, POINTS: [<pnt_obj>, <pnt_obj>]}, ...}"""
poly_geom_dict = collections.defaultdict(dict)
with arcpy.da.SearchCursor(poly_fc, [poly_id_fld, "SHAPE@"]) as scurs:
# For each row in the FC table (could be single, multi, have holes, etc.).
for guid, geom in scurs:
poly_geom_dict[guid]["POLYGON"] = geom
# Curves to segments; generalize segments to speed process (?).
if geom.hasCurves:
geom = geom.generalize(1)
elif generalize:
geom = geom.generalize(0.5)
# For each part of the geometry:
for i, part in enumerate(geom, start=1):
print(f" Part {i}")
# For each point in this part of the geometry:
for j, point in enumerate(part, start=1):
if point:
poly_geom_dict[guid].setdefault("POINTS", []).append(point)
else:
print(f" Point {j:>2}--Null point. Start interior ring.")
stopwatch.stamptime(msg=f"READ {guid} GEOMETRY")
return poly_geom_dict
def line_within_polygon(line, polygon, relation):
"""If the line is outside the polygon, or of zero length, return False."""
if not line.within(polygon, relation=relation) or line.length == 0:
return False
else:
return True
#---------------------------------------------------------------------------------------#
# MAIN
stopwatch = sw.Stopwatch()
stopwatch.starttime(msg="PROCESS STARTING")
spat_ref = arcpy.da.Describe(polygons)["spatialReference"].factoryCode
poly_geom_dict = create_geom_dict(poly_fc=polygons, poly_id_fld=polygon_id_fld)
# Remove duplicate points; added due to closing geoms, maybe rings?
for guid in poly_geom_dict:
unique_coors = set((p.X, p.Y) for p in poly_geom_dict[guid]["POINTS"])
poly_geom_dict[guid]["POINTS"] = [arcpy.Point(*coor) for coor in unique_coors]
# Create transect FC. Add Fields.
trans_fc = arcpy.management.CreateFeatureclass(out_path=arcpy.env.workspace,
out_name=transects,
geometry_type="POLYLINE",
spatial_reference=spat_ref)
flds = [("POLY_GUID", "GUID"), ("LINE_GUID", "GUID"), ("LONGEST", "TEXT")]
for fld_name, fld_type in flds:
arcpy.management.AddField(in_table=trans_fc, field_name=fld_name,
field_type=fld_type, field_length=1)
stopwatch.stamptime(msg="CREATED TRANSECT FC\n")
# Walk through the dictionary making lines:
# These lists house intermediate geometries; review during testing.
test_multiparts = []
test_extensions = []
for guid, geom_dict in poly_geom_dict.items():
stopwatch.stamptime(msg=f"PROCESSING STARTED ON {guid}")
vert_pairs = len(list(itertools.combinations(iterable=geom_dict["POINTS"], r=2)))
print(f"VERTEX PAIRS TO PROCESS: {vert_pairs}")
line_geoms = []
for p1, p2 in itertools.combinations(iterable=geom_dict["POINTS"], r=2):
line = arcpy.Polyline(inputs=arcpy.Array([p1, p2]))
# WITHIN POLYGON
poly_geom = geom_dict["POLYGON"]
if not line_within_polygon(line, poly_geom, relation=transect_type):
continue
# EXTEND LINE
new_pnts = arcpy.Array()
# Extension distance is somewhat arbitrary. Extend line 1/4 perimeter
# of the Extent object. Seems to be enough (?).
extend_distance = poly_geom.extent.polygon.boundary().length / 4
angle = math.atan2(p2.Y - p1.Y, p2.X - p1.X)
new_pnts.add(arcpy.Point(p1.X - extend_distance * math.cos(angle),
p1.Y - extend_distance * math.sin(angle)))
new_pnts.add(arcpy.Point(p2.X + extend_distance * math.cos(angle),
p2.Y + extend_distance * math.sin(angle)))
# CLIP TO POLYGON
extended_line = arcpy.Polyline(new_pnts)
clipped_line = extended_line.intersect(poly_geom, 2)
test_extensions.append(extended_line)
# Get all points on line. >2 possible from edge intersections.
merged_array = arcpy.Array()
for part in clipped_line:
for point in part:
merged_array.add(point)
# Sort points along X, then Y. Since the line is straight, we don't
# need to do linear referencing or anything. Keep first/last only.
# Removes needless vertices and reorders points on line.
sort_arr = arcpy.Array(sorted(merged_array, key=lambda p: (p.X, p.Y)))
full_polyline = arcpy.Polyline(arcpy.Array([sort_arr[0], sort_arr[-1]]))
test_multiparts.append(full_polyline)
# If line extends over white space between multipart polygons.
# isclose() is used here for float precision issues.
if not math.isclose(full_polyline.length, clipped_line.length):
# Retain parts of extended lines that are still within polygons.
# Append each part as a separate line for accurate GDB lengths.
for edge_part in full_polyline.intersect(poly_geom, 2):
line_geoms.append(arcpy.Polyline(edge_part))
else:
# Otherwise, append the new line as-is.
line_geoms.append(full_polyline)
# Export intermediate geometries for testing review.
for geoms, fc_name in [(test_multiparts, "MULTIS"), (test_extensions, "EXTENSIONS")]:
arcpy.management.CopyFeatures(geoms, fc_name)
stopwatch.stamptime(msg=f"{guid} POLYLINES CREATED")
# Sort by length; option to retain only the max length lines, discard the rest.
line_geoms_by_length = sorted(line_geoms, key=lambda x: x.length, reverse=True)
stopwatch.stamptime(msg=f"{guid} TRANSECTS SORTED BY LENGTH")
if discard_nonmax_transects:
line_geoms_by_length = line_geoms_by_length[:1]
# Write lines for this polygon to the feature class. Create UUIDs; mark the longest.
flds = ["SHAPE@", "POLY_GUID", "LINE_GUID", "LONGEST"]
with arcpy.da.InsertCursor(trans_fc, flds) as icurs:
for i, line_geom in enumerate(line_geoms_by_length):
if i == 0:
icurs.insertRow([line_geom, guid, uuid.uuid4(), "X"])
else:
icurs.insertRow([line_geom, guid, uuid.uuid4(), None])
stopwatch.stamptime(msg=f"WROTE {guid} TRANSECTS\n")
# Remove records where the geometry is identical.
# This should only happen when an edge is extended through a polygon, and ends
# up being perfectly coincident with an internal line.
before_lines = int(arcpy.management.GetCount(transects)[0])
arcpy.management.DeleteIdentical(in_dataset=transects, fields="Shape")
after_lines = int(arcpy.management.GetCount(transects)[0])
stopwatch.stamptime(msg=f"{before_lines - after_lines} IDENTICAL LINE(S) REMOVED")
stopwatch.stamptime(msg="PROCESS COMPLETE")