#------------------------------------------------------------------------------- # Name: LongestLine.py # Purpose: find the longest inside line across a polygon # Limitations: only single part polygons, no donuts # Author: kimo # # Created: 29/04/2014 # 30/04/2014 changed crossing test to each features # exclude triangles (use trigonometry for this degenerate case) # longest line only tested between vertices may need to densify first # # Copyright: (c) kimo 2014 # Licence: Creative Commons 3.0 New Zealand #------------------------------------------------------------------------------- import arcpy import sys import itertools import datetime gdb = sys.argv[1] fcPoly = 'forest' fcOut = 'fan' arcpy.env.workspace = gdb arcpy.env.overwriteOutput = True begintime = datetime.datetime.now() desc = arcpy.Describe(fcPoly) sr = desc.spatialReference arcpy.env.outputCoordinateSystem = sr if arcpy.Exists(fcOut): arcpy.management.Delete(fcOut) arcpy.management.CreateFeatureclass(gdb,fcOut,"POLYLINE",spatial_reference=sr) arcpy.management.AddField(fcOut,"ORIGID","LONG") icur = arcpy.da.InsertCursor(fcOut,['ORIGID','SHAPE@']) with arcpy.da.SearchCursor(fcPoly,["OBJECTID","SHAPE@"]) as cur: for row in cur: gPoly = row[1] id = row[0] print "Begin id {} parts {} points {} type {}".format(id,gPoly.partCount,gPoly.pointCount,gPoly.type) if gPoly.partCount == 1 and gPoly.pointCount > 4: # exclude triangles starttime = datetime.datetime.now() lstV = [] for part in gPoly: for v in part: lstV.append(v) oCount = 0 pCount = 0 rCount = 0 maxlength = -1 # use itertools to get unique combinations of pairs # leave in lines coincident with boundary segments # because they will always be less than max length if > triangle for vPair in itertools.combinations(lstV[:-1],2): # exclude last duplicated poly vertex aLine = arcpy.Polyline(arcpy.Array(vPair),sr) # dont even bother doing spatial test if already shorter for speed if aLine.length > maxlength : if gPoly.contains(aLine): # not lines crossing edges mLine = aLine maxlength = aLine.length pCount+=1 else : oCount+=1 else: rCount+=1 print "Max swapped {} Overlap rejects {} Already shorter {}".format(pCount,oCount,rCount) rec = (id,mLine) icur.insertRow(rec) msg = "{} {} time {}".format(id,maxlength,datetime.datetime.now() - starttime) print msg arcpy.AddMessage(msg) del icur print "done",datetime.datetime.now() - begintime
EDIT: Thanks to @RobertEisler for providing an example showing my transect_type = "PROPER" to be incorrect (the dotted black lines in all the pictures). As far as I am aware, the "BOUNDARY" option is still the optimal algorithm.
As @KimOllivier already noted, only checking vertex-to-vertex is not a sufficient algorithm to solve this problem.
Here is my approach. I am working under the assumption that the longest line will contain two vertices, but does not necessarily end at those two vertices. The user has the option to include the edges of shapes, or require the transect to be entirely internal [this is wrong, see EDIT at the top and replies down below] (red vs. dotted, respectively; sometimes these are the same). Multipart polygons only get one transect generated, so that's why a few shapes appear to be missing lines.
Shapes involving true curves have transects estimated using "densify" to generate actual vertices, so those are only close approximations. See the second screenshot, where the line should presumably be right up against the hole. Would be happy to hear suggestions on this.
These are test polygons, but on a real-world parcel dataset where the number of vertex-to-vertex combinations within a polygon can get into the hundreds of thousands, my version can take a while. Seemed like the bottleneck was converting every vertex coordinate combination to a Polyline object, so I would be very interested in optimizations if anyone has suggestions.
To summarize--script generates lines from all vertex-to-vertex pairs, extends those lines a bit (to cover cases like the castle shape at the top of the first below), clips them back to the polygon boundary, and then finds the longest in the list of lines within the given geometry.
EDIT: The "stopwatch as sw" module in this code is a custom time/message reporting module. Delete it or replace those with standard "print()" if anyone ends up using this.
Transect line outputs.
Longest transect through a feature composed of curved boundaries.
"""LONGEST TRANSECT"""
#---------------------------------------------------------------------------------------#
# IMPORTS
import collections
import itertools
import math
import os
import sys
import traceback
import uuid
import arcpy
import stopwatch as sw
#---------------------------------------------------------------------------------------#
# SETTINGS
arcpy.env.overwriteOutput = True
arcpy.env.workspace = **YOUR GEODATABASE PATH**
#---------------------------------------------------------------------------------------#
# INPUTS
polygons = "POLYGON"
polygon_id_fld = "POLY_GUID"
transects = "TRANSECT_BOUNDARY_TEST"
# Boundary = transect can be entire edge. Proper = transect must be witihin polygon.
transect_type = "BOUNDARY" # ["BOUNDARY", "PROPER"]
# Only save the longest line for each polygon?
discard_nonmax_transects = False
#---------------------------------------------------------------------------------------#
# FUNCTIONS
def create_geom_dict(poly_fc, poly_id_fld, generalize=False):
"""Read poly geoms to information dict.
returns {GUID: {POLYGON: <poly_obj>, POINTS: [<pnt_obj>, <pnt_obj>]}, ...}"""
poly_geom_dict = collections.defaultdict(dict)
with arcpy.da.SearchCursor(poly_fc, [poly_id_fld, "SHAPE@"]) as scurs:
# For each row in the FC table (could be single, multi, have holes, etc.).
for guid, geom in scurs:
poly_geom_dict[guid]["POLYGON"] = geom
# Curves to segments; generalize segments to speed process (?).
if geom.hasCurves:
geom = geom.generalize(1)
elif generalize:
geom = geom.generalize(0.5)
# For each part of the geometry:
for i, part in enumerate(geom, start=1):
print(f" Part {i}")
# For each point in this part of the geometry:
for j, point in enumerate(part, start=1):
if point:
poly_geom_dict[guid].setdefault("POINTS", []).append(point)
else:
print(f" Point {j:>2}--Null point. Start interior ring.")
stopwatch.stamptime(msg=f"READ {guid} GEOMETRY")
return poly_geom_dict
def line_within_polygon(line, polygon, relation):
"""If the line is outside the polygon, or of zero length, return False."""
if not line.within(polygon, relation=relation) or line.length == 0:
return False
else:
return True
#---------------------------------------------------------------------------------------#
# MAIN
stopwatch = sw.Stopwatch()
stopwatch.starttime(msg="PROCESS STARTING")
spat_ref = arcpy.da.Describe(polygons)["spatialReference"].factoryCode
poly_geom_dict = create_geom_dict(poly_fc=polygons, poly_id_fld=polygon_id_fld)
# Remove duplicate points; added due to closing geoms, maybe rings?
for guid in poly_geom_dict:
unique_coors = set((p.X, p.Y) for p in poly_geom_dict[guid]["POINTS"])
poly_geom_dict[guid]["POINTS"] = [arcpy.Point(*coor) for coor in unique_coors]
# Create transect FC. Add Fields.
trans_fc = arcpy.management.CreateFeatureclass(out_path=arcpy.env.workspace,
out_name=transects,
geometry_type="POLYLINE",
spatial_reference=spat_ref)
flds = [("POLY_GUID", "GUID"), ("LINE_GUID", "GUID"), ("LONGEST", "TEXT")]
for fld_name, fld_type in flds:
arcpy.management.AddField(in_table=trans_fc, field_name=fld_name,
field_type=fld_type, field_length=1)
stopwatch.stamptime(msg="CREATED TRANSECT FC\n")
# Walk through the dictionary making lines:
# These lists house intermediate geometries; review during testing.
test_multiparts = []
test_extensions = []
for guid, geom_dict in poly_geom_dict.items():
stopwatch.stamptime(msg=f"PROCESSING STARTED ON {guid}")
vert_pairs = len(list(itertools.combinations(iterable=geom_dict["POINTS"], r=2)))
print(f"VERTEX PAIRS TO PROCESS: {vert_pairs}")
line_geoms = []
for p1, p2 in itertools.combinations(iterable=geom_dict["POINTS"], r=2):
line = arcpy.Polyline(inputs=arcpy.Array([p1, p2]))
# WITHIN POLYGON
poly_geom = geom_dict["POLYGON"]
if not line_within_polygon(line, poly_geom, relation=transect_type):
continue
# EXTEND LINE
new_pnts = arcpy.Array()
# Extension distance is somewhat arbitrary. Extend line 1/4 perimeter
# of the Extent object. Seems to be enough (?).
extend_distance = poly_geom.extent.polygon.boundary().length / 4
angle = math.atan2(p2.Y - p1.Y, p2.X - p1.X)
new_pnts.add(arcpy.Point(p1.X - extend_distance * math.cos(angle),
p1.Y - extend_distance * math.sin(angle)))
new_pnts.add(arcpy.Point(p2.X + extend_distance * math.cos(angle),
p2.Y + extend_distance * math.sin(angle)))
# CLIP TO POLYGON
extended_line = arcpy.Polyline(new_pnts)
clipped_line = extended_line.intersect(poly_geom, 2)
test_extensions.append(extended_line)
# Get all points on line. >2 possible from edge intersections.
merged_array = arcpy.Array()
for part in clipped_line:
for point in part:
merged_array.add(point)
# Sort points along X, then Y. Since the line is straight, we don't
# need to do linear referencing or anything. Keep first/last only.
# Removes needless vertices and reorders points on line.
sort_arr = arcpy.Array(sorted(merged_array, key=lambda p: (p.X, p.Y)))
full_polyline = arcpy.Polyline(arcpy.Array([sort_arr[0], sort_arr[-1]]))
test_multiparts.append(full_polyline)
# If line extends over white space between multipart polygons.
# isclose() is used here for float precision issues.
if not math.isclose(full_polyline.length, clipped_line.length):
# Retain parts of extended lines that are still within polygons.
# Append each part as a separate line for accurate GDB lengths.
for edge_part in full_polyline.intersect(poly_geom, 2):
line_geoms.append(arcpy.Polyline(edge_part))
else:
# Otherwise, append the new line as-is.
line_geoms.append(full_polyline)
# Export intermediate geometries for testing review.
for geoms, fc_name in [(test_multiparts, "MULTIS"), (test_extensions, "EXTENSIONS")]:
arcpy.management.CopyFeatures(geoms, fc_name)
stopwatch.stamptime(msg=f"{guid} POLYLINES CREATED")
# Sort by length; option to retain only the max length lines, discard the rest.
line_geoms_by_length = sorted(line_geoms, key=lambda x: x.length, reverse=True)
stopwatch.stamptime(msg=f"{guid} TRANSECTS SORTED BY LENGTH")
if discard_nonmax_transects:
line_geoms_by_length = line_geoms_by_length[:1]
# Write lines for this polygon to the feature class. Create UUIDs; mark the longest.
flds = ["SHAPE@", "POLY_GUID", "LINE_GUID", "LONGEST"]
with arcpy.da.InsertCursor(trans_fc, flds) as icurs:
for i, line_geom in enumerate(line_geoms_by_length):
if i == 0:
icurs.insertRow([line_geom, guid, uuid.uuid4(), "X"])
else:
icurs.insertRow([line_geom, guid, uuid.uuid4(), None])
stopwatch.stamptime(msg=f"WROTE {guid} TRANSECTS\n")
# Remove records where the geometry is identical.
# This should only happen when an edge is extended through a polygon, and ends
# up being perfectly coincident with an internal line.
before_lines = int(arcpy.management.GetCount(transects)[0])
arcpy.management.DeleteIdentical(in_dataset=transects, fields="Shape")
after_lines = int(arcpy.management.GetCount(transects)[0])
stopwatch.stamptime(msg=f"{before_lines - after_lines} IDENTICAL LINE(S) REMOVED")
stopwatch.stamptime(msg="PROCESS COMPLETE")
I've put together a model that generates an approximation of the longest line contained within a polygon. It's quite similar to @VinceE 's solution but done through modelbuilder.
It iterates through a feature class and goes through each polygon with a unique ID and then generates points along the boundaries of the polygon at a specified distance, generates a table with all possible combinations of points and the distance between each pair, it then deletes the duplicates that are created as you end up with a row for a to b and one for b to a for each combination of points, from these points it generates lines between each pair, it then clips all the lines to the original polygon and selects the one with the maximum length.
Longest line model
See step by step below for how to set up the different parameters in the model.
Iterate Feature Selection (Optional)
Using this means the model will run through the same process for each polygon within a feature class that has a unique ID from the field you have selected. The model can also be run with just a feature class as input but it will ignore individual polygons within the feature class and will generate a single longest line from the whole feature. E.g I have been using this to find the longest line across each island in a cluster of islands. If using the iterator it will generate a line for each island, if just using feature class which has multiple islands stored in it I will end up with a single line across the longest island.
Generate points
Input feature is your polygon of interest. Set distance at which points should be placed.
Generate near
Input features and Near features are the same layer, the points that were generated in the previous step. Search radius should be large enough to encompass your whole polygon.
Delete Identical
Input is the table generated in the previous step.
Deletes half the records as each combination of points is calculated in the near table in one direction and then the other e.g. Y to X is one row and X to Y is a new row which is identical but in the opposite direction.
XY to line
Input is the table with duplicates removed from the previous step. Make sure to set spatial reference to the same as your map.
Pairwise clip
Clips lines generated in previous step to the original polygon. Input features is lines from previous step, Clip features is the polygon you started with.
Select
This selects the longest line from the set of lines you have generated and outputs as a feature class. The expression below has the format Field = (SELECT MAX(Field) FROM (Table)).
Curious about this approach--any chance you would be able to attach a copy of your toolbox here?