Pythonic Code Structure

PeterWilson · ‎02-24-2016

I'm busy rewriting my hydrological model python scripts into functions that are modular and found that my original python script runs twice as fast as my updated python script. The only reason I can find is that the original scripts outputs are saved in memory before being passed onto the next function. My new python script is writing and reading each output from disk. I might be incorrect in my understanding. If anyone can give me guidance on improving my new python script in both pythonic structure and performance.

Original Python Script

'''
Created on May 20, 2015


@author: PeterW
'''
# import system modules and site packages
import os
import time
import arcpy
import ArcHydroTools


# check out Spatial Analyst Extension
arcpy.CheckOutExtension("Spatial")


# set environment settings
arcpy.env.overwriteOutput = True


# set input and output arguments
raw = r"E:\Python\Temp\Model04\DEM\raw"
rasWs = r"E:\Python\Temp\Model04\Layers04"
outWs = r"E:\Python\Temp\Model04\Model04.gdb"


# Processing time
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int(sec_elapsed % (60 * 60) / 60)
    s = sec_elapsed % 60
    return "{}h:{:>02}m:{:>05.2f}s".format(h, m, s)


start_time1 = time.time()


# archydro variables
fill_sinks = os.path.join(rasWs, "fil")
flow_dir = os.path.join(rasWs, "fdr")
flow_acc = os.path.join(rasWs, "fac")
streams = os.path.join(rasWs, "str")
stream_seg = os.path.join(rasWs, "strlnk")
catchment_grid = os.path.join(rasWs, "cat")
catchment_poly = os.path.join(outWs, "Layers","Catchment")
drainage_line = os.path.join(outWs, "Layers", "DrainageLine")
adj_catch = os.path.join(outWs, "Layers", "AdjointCatchment")


try:
    # calculate the fill sinks
    arcpy.AddMessage("Processing Fill Sinks")
    ArcHydroTools.FillSinks(raw, fill_sinks)
    
    # calculate the flow direction
    arcpy.AddMessage("Processing Flow Direction")
    ArcHydroTools.FlowDirection(fill_sinks, flow_dir)
        
    # calculate the flow accumulation
    arcpy.AddMessage("Processing Flow Accumulation")
    ArcHydroTools.FlowAccumulation(flow_dir, flow_acc)
    
    # calculate the maximum flow accumulation
    arcpy.AddMessage("Processing Flow Accumulation Maximum")
    maxcellsResult = arcpy.GetRasterProperties_management(flow_acc, "MAXIMUM")
    maxcells = maxcellsResult.getOutput(0)
    print maxcells
    
    # calculate the stream threshold number of cells
    arcpy.AddMessage("Processing Stream Threshold")
    stream_threshold_numcells = (int(maxcells)*0.125/100)
    print stream_threshold_numcells
    
    # calculate the stream definition
    arcpy.AddMessage("Processing Stream Definition")
    ArcHydroTools.StreamDefinition(flow_acc, stream_threshold_numcells, streams)
    
    # calculate the stream segmentation
    arcpy.AddMessage("Processing Stream Segmentation")
    ArcHydroTools.StreamSegmentation(streams, flow_dir, stream_seg)
    
    # calculate the catchment grid delineation
    arcpy.AddMessage("Processing Catchment Grid Delineation")
    ArcHydroTools.CatchmentGridDelineation(flow_dir, stream_seg, catchment_grid)
    
    # calculate the catchment polygons from the catchment grid
    arcpy.AddMessage("Processing Catchment Polygons")
    ArcHydroTools.CatchmentPolyProcessing(catchment_grid, catchment_poly)
    
    # calculate the drainage lines from the stream segmentation grid
    arcpy.AddMessage("Processing DrainageLines")
    ArcHydroTools.DrainageLineProcessing(stream_seg, flow_dir, drainage_line)
    
    # calculate the adjoint catchment polygons
    arcpy.AddMessage("Processing Ajdoint Catchments")
    ArcHydroTools.AdjointCatchment(drainage_line, catchment_poly, adj_catch)
    
    arcpy.AddMessage("Completed Processing archydro Main Model")


except:
    print(arcpy.GetMessages(2))
    pass


# Determine the time take to process hydrological characteristics
end_time1 = time.time()
print ("It took {} to process hydrological characteristics".format(hms_string(end_time1 - start_time1)))


arcpy.CheckInExtension("Spatial")

New Python Script

'''
Created on Feb 24, 2016


@author: PeterW
'''
# import system modules and site packages
import time
from pathlib import Path
import arcpy
import ArcHydroTools


# check out extension
arcpy.CheckOutExtension("Spatial")


# set environment settings
arcpy.env.overwriteOutput = True


# set input and output arguments
dem = r"E:\Python\Temp\Model04\DEM\raw"
raster_workspace = r"E:\Python\Temp\Model04\Layers04"
fgdb = Path(r"E:\Python\Temp\Model04\Model04.gdb")


# Processing time
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int(sec_elapsed % (60 * 60) / 60)
    s = sec_elapsed % 60
    return "{}h:{:>02}m:{:>05.2f}s".format(h, m, s)


start_time1 = time.time()


# generate fill sinks grid
def  fill_sinks(dem, raster_workspace):
    fill_sinks = "{0}\\{1}".format(raster_workspace, "fil")
    arcpy.AddMessage("Processing Fill Sinks")
    ArcHydroTools.FillSinks(dem, fill_sinks)

fill_sinks(dem, raster_workspace)


# generate flow direction grid
def flow_direction(raster_workspace):
    fill_sinks = "{0}\\{1}".format(raster_workspace, "fil")
    flow_direction = "{0}\\{1}".format(raster_workspace, "fdr")
    arcpy.AddMessage("Processing Flow Direction")
    ArcHydroTools.FlowDirection(fill_sinks, flow_direction)
  
flow_direction(raster_workspace)


# generate flow accumulation grid
def flow_accumulation(raster_workspace):
    flow_direction = "{0}\\{1}".format(raster_workspace, "fdr")
    flow_accumulation = "{0}\\{1}".format(raster_workspace, "fac")
    arcpy.AddMessage("Processing Flow Accumulation")
    ArcHydroTools.FlowAccumulation(flow_direction, flow_accumulation)


flow_accumulation(raster_workspace)


# calculate stream threshold based on 0.5% of maximum flow accumulation
def stream_threshold(raster_workspace):
    flow_accumulation = "{0}\\{1}".format(raster_workspace, "fac")   
    arcpy.AddMessage("Processing Flow Stream Threshold")
    maxcellsResult = arcpy.GetRasterProperties_management(flow_accumulation, "MAXIMUM")
    maxcells = maxcellsResult.getOutput(0)
    arcpy.AddMessage("{} Maximum Cells".format(maxcells))
    stream_threshold_numcells = (int(maxcells)*0.5/100)
    arcpy.AddMessage("{} Stream Threshold".format(stream_threshold_numcells))
    return stream_threshold_numcells


numcells = stream_threshold(raster_workspace)


# generate the stream definition grid
def stream_definition(raster_workspace):
    flow_accumulation = "{0}\\{1}".format(raster_workspace, "fac")
    stream = "{0}\\{1}".format(raster_workspace, "str")
    arcpy.AddMessage("Processing Stream Definition")
    ArcHydroTools.StreamDefinition(flow_accumulation, numcells, stream)


stream_definition(raster_workspace)


# generate the stream segmentation grid
def stream_segmentation(raster_workspace):
    stream = "{0}\\{1}".format(raster_workspace, "str")
    flow_direction = "{0}\\{1}".format(raster_workspace, "fac")
    stream_link = "{0}\\{1}".format(raster_workspace, "strlnk")
    arcpy.AddMessage("Processing Stream Segmentation")
    ArcHydroTools.StreamSegmentation(stream, flow_direction, stream_link)


stream_segmentation(raster_workspace)


# calculate the catchment grid delineation
def catchment_grid(raster_workspace):
    flow_direction = "{0}\\{1}".format(raster_workspace, "fdr")
    stream_link = "{0}\\{1}".format(raster_workspace, "strlnk")
    catchment_grid = "{0}\\{1}".format(raster_workspace, "cat")
    arcpy.AddMessage("Processing Catchment Grid Delineation")
    ArcHydroTools.CatchmentGridDelineation(flow_direction, stream_link, catchment_grid)


catchment_grid(raster_workspace)


# calculate the catchment polygons from the catchment grid
def catchment_polygon(raster_workspace, fgdb):
    catchment_grid = "{0}\\{1}".format(raster_workspace, "cat")
    catchment_polygon = "{0}\\{1}".format(Path(fgdb, "Layers"), "Catchment")
    arcpy.AddMessage("Processing Catchment Polygon")
    ArcHydroTools.CatchmentPolyProcessing(catchment_grid, catchment_polygon)


catchment_polygon(raster_workspace, fgdb)


# calculate the drainage lines from the stream segmentation grid
def drainage_line(raster_workspace, fgdb):
    stream_link = "{0}\\{1}".format(raster_workspace, "strlnk")
    flow_direction = "{0}\\{1}".format(raster_workspace, "fdr")
    drainage_line = "{0}\\{1}".format(Path(fgdb, "Layers"), "DrainageLine")
    arcpy.AddMessage("Processing DrainageLine")
    ArcHydroTools.DrainageLineProcessing(stream_link, flow_direction, drainage_line)


drainage_line(raster_workspace, fgdb)


# calculate the adjoint catchment polygons
def adjoint_catchment(fgdb):
    drainage_line = "{0}\\{1}".format(Path(fgdb, "Layers"), "DrainageLine")
    catchment_polygon = "{0}\\{1}".format(Path(fgdb, "Layers"), "Catchment")
    adjoint_catchment = "{0}\\{1}".format(Path(fgdb, "Layers"), "AdjointCatchment")
    arcpy.AddMessage("Processing Adjoint Catchment")
    ArcHydroTools.AdjointCatchment(drainage_line, catchment_polygon, adjoint_catchment)


adjoint_catchment(fgdb)


arcpy.CheckInExtension("Spatial") 


# Determine the time take to process hydrological characteristics
end_time1 = time.time()
print ("It took {} to process hydrological characteristics".format(hms_string(end_time1 - start_time1)))

JasonTipton · ‎02-25-2016

Group all of your methods at the top under imports instead of co-mingling functions and statements.
Define your variables once instead of multiple times. (flow_accumulation, flow_direction, etc) assuming they are always the same in every method.
- Line 98 (Script 2): It looks like you assigned flow accumulation "fac" to the flow_direction variable
Don't pass RasterWorkspace into every function. Declare it once and use it inside each method.
You don't really need methods.
- After you do the previous, the way it is currently written, you really just need print statements in between each statement letting the user know what is going on. Cutting back on recreating the same variable over and over will cut a lot of code out.

These won't improve performance*, but will make it more readable, less code to manage, and less code to mess up.

*It may insignificantly improve by nanoseconds by not having to re-create the same variable over and over again!

View solution in original post

DanPatterson_Retired · ‎02-24-2016

try timing the individual components to see where your bottleneck is. This decorator needs your functions to return something... even if it is nothing. Your functions don't return anything

from functools import wraps
#------- decorator -----------------------
def delta_time(func):
    """simple timing decorator function"""
    @wraps(func)
    def wrapper(*args, **kwargs):
        t0 = time.time()                # start time
        result = func(*args, **kwargs)  # ... run the function ...
        t1 = time.time()                # end time
        print("Results for... {}".format(func.__name__))
        print("  time taken ...{:12.9f} sec.".format(t1-t0))
        #print("\n {}".format(result))  # uncomment to print within wrapper
        return result                  # return the result of the function
    return wrapper

useage... just put the @delta_time line above functions that you want to time

@delta_time  
def alpha(input=0):  
    """useless function 1 """  
    return input

PeterWilson · ‎02-25-2016

Hi Dan

Thanks for the following, I'll have a look at the following this evening and get back to you if I get stuck, if that fine with you.

LukeWebb · ‎02-25-2016

Totally unrelated to your problem, but I noticed you are using string processing to build file paths. There is a built in python module that handles this for you, and works across operating systems etc if this is ever a concern.

In simple terms, it joins parameters, adding a slash in between.

fill_sinks = "{0}\\{1}".format(raster_workspace, "fil")

Becomes:

fill_sinks = os.path.join(raster_workspace, "fil")

You can also do as many parameters as you like to build longer paths.

fill_sinks = os.path.join(raster_workspace, "fil", "output", "test.shp")

Returns: 'E:\Python\Temp\Model04\Layers04\fil\output\test.shp'

PeterWilson · ‎02-25-2016

Hi Luke

If you look at my original Python Script I used os.path.join and decided to move to PathLib Path. The unfortunate thing is that all ArcPy scripts require paths to be string and not objects hence using strings to create the paths from the Path objects.

DanPatterson_Retired · ‎02-25-2016

Peter, I suspect most are still using python 2.7.x. For those that are interested, pathlib became available in python 3.4 more documentation is given here

11.1. pathlib — Object-oriented filesystem paths — Python 3.5.1 documentation

JasonTipton · ‎02-25-2016

Yeah.... I'm still on 2.7.x. What's the problem with os.path.join at 3.4? Not having 3.7 installed, but just looking through the documentation and playing around with a 3.x shell online, it seems that os.path.join returns a string object as it always has. It looks like the new pathlib is a way to store paths as objects instead of as strings so instead of having to do os.path.xyz(), you now may be able to do path.xyz.

Where I'm confused is how this affects the script. At the end of the day, Peter Wilson is just formatting the path object back to a string. I don't see why os.path.join wouldn't have worked here (not that it's better than using the pathlib). Also it looks path has its own ways to concatenate paths.

I went ahead and installed pathlib into 2.7. It looks like you can do this:

# constructor
p = Path('a', 'b', 'c')
print(p)
# append paths to end with the "/"
p = p / 'd'
print(p)
print(str(p))

and it will print

WindowsPath('a/b/c)
WindowsPath('a/b/c/d')
a\b\c

Either way

os.path.join
str(Path)      # and 
"{}".format(Path)

should all get you to the same end goal: A string representing a path.

DanPatterson_Retired · ‎02-25-2016

Jason

It is an alternate... that is all, the initial question was why Peter was doing it... the answer is because he is using python 3.4 and for his environment, he must prefer it.

I haven't completed my tests to see how it handles escape characters should one forget to use raw notation (there is a current thread on paths and path constructs and Darren posted a poll). One can also use glob as an alternate to os.

I think pathlib may be an attempt to further unify file handling under multiple platforms and environments. Further testing might reveal advantages that I haven't come across yet.

ADDENDUM

Filenames and file paths in Python

for addition quasi-related materials

JasonTipton · ‎02-25-2016

I misread his comment. I thought that he had moved from os.path.join to Path because he was having issues with os.path.join() and arcpy. I now see that he moved *in favor* of Path but that arcpy isn't expecting a Path object but a string.

From a python perspective the Path object is an improvement because you are now dealing with a smarter object than a dumb string. The string doesn't know it is a path so you have to call some function to get the file name or directory or to append a subdirectory to the string. From looking at the methods, it looks like you can even create a directory or check if a path exists or not, all from the object itself without having to call another function.

As for escape characters, it looks like it doesn't handle them unless you handle them: same as a string or os.path.join(). At the end of the day, I think you're still better off letting python add your slashes and just pass in your directory names in a list and let python make your path for you, whether that be os.path.join(a,b,c,d,e,...) or Path(a,b,c) / d / e / ...

# WRONG -- Don't use \t or \a
p = Path("dog\turtle\ant")
p
>> WindowsPath('dog\turtle\x07nt')
print(p)
>> dog    urtlent

# RIGHT
p = Path("dog", "turtle", "ant")
p
>> WindowsPath('dog/turtle/ant')
print(p)
>> dog\turtle\ant

# If you need to append, you are using the / operator, not the literal character '/'
p / 'cat' / 'mouse
>> WindowsPath('dog/turtle/ant/cat/mouse')

DanPatterson_Retired · ‎02-25-2016

Jason...

Good, this will be useful to know for people working in ArcGIS Pro which is using python 3.4.x. ArcMap 10.4 is sticking with the python 2.7.x version.

So as people ... (transition from/stick with )... (ArcMap/ArcGIS Pro) things with become ..(clearer/more confusing).