Raster generalization functions

DanPatterson · ‎06-08-2022

Basic functions

These are required for the aggregation since they produce the sliding and block views of the input array.

import numpy as np
from numpy.lib.stride_tricks import as_strided

# ----------------------------------------------------------------------
#
# (1) stride .... code section from arraytools.tools
def stride(a, win=(3, 3), stepby=(1, 1)):
    """Provide a 2D sliding/moving view of an array.
    There is no edge correction for outputs. Use the `_pad_` function first.

    See arraytools.stride for full details.
    ----------------------------------------------------------
    """
    err = """Array shape, window and/or step size error.
    Use win=(3,) with stepby=(1,) for 1D array
    or win=(3,3) with stepby=(1,1) for 2D array
    or win=(1,3,3) with stepby=(1,1,1) for 3D
    ----    a.ndim != len(win) != len(stepby) ----
    """
    assert (a.ndim == len(win)) and (len(win) == len(stepby)), err
    shape = np.array(a.shape)  # array shape (r, c) or (d, r, c)
    win_shp = np.array(win)    # window      (3, 3) or (1, 3, 3)
    ss = np.array(stepby)      # step by     (1, 1) or (1, 1, 1)
    newshape = tuple(((shape - win_shp) // ss) + 1) + tuple(win_shp)
    newstrides = tuple(np.array(a.strides) * ss) + a.strides
    a_s = as_strided(a, shape=newshape, strides=newstrides, subok=True).squeeze()
    return a_s

# (2) block .... code section from arraytools.tools
#
def block(a, win=(3, 3)):
    """Calls stride with step_by equal to win size.
    No padding of the array, so this works best when win size is divisible
    in both directions

    Note:
        See block_arr if you want padding
    """
    a_b = stride(a, win=win, stepby=win)
    return a_b

Now a function that allows you to aggregate on a variety of statistical parameters, such as:

maximum, mean, median, minimum, range, sum and central value

def aggreg_(a, win=(3,3), agg_type='mode'):
    """Aggregate an array using a specified window size and an aggregation
    type

    Parameters
    ----------
    a : array
        2D array to perform the aggregation
    win : tuple/list
        the shape of the window to construct the blocks
    agg_type : string aggregation type
        max, mean, median, min, mode, range, sum, central

    >>> a = np.array([[nan, nan, nan,  0.,  0.,  0.],
                      [nan, nan, nan,  0.,  0.,  0.],
                      [nan, nan, nan,  0.,  0.,  0.],
                      [ 1.,  1.,  1.,  2.,  2.,  2.],
                      [ 1.,  1.,  1.,  2.,  2.,  2.],
                      [ 1.,  1.,  1.,  2.,  2.,  2.]])
    >>> aggreg_(a, win(3,3), agg_type='mode')
    array([[nan,  0.],
           [ 1.,  2.]])
    """
    blocks = block(a, win=win)  # -- requires `block` from above
    out = []
    for bl in blocks:
        b_arr = []
        for b in bl:
            if agg_type == 'mode':
                uni = np.unique(b).tolist()
                cnts = [len(np.nonzero(b==u)[0]) for u in uni]
                idx = cnts.index(max(cnts))
                b_arr.append(uni[idx])
            elif agg_type == 'max':
                b_arr.append(np.nanmax(b))
            elif agg_type == 'mean':
                b_arr.append(np.nanmean(b))
            elif agg_type == 'median':
                b_arr.append(np.nanmedian(b))
            elif agg_type == 'min':
                b_arr.append(np.nanmin(b))
            elif agg_type == 'range':
                b_arr.append((np.nanmax(b) - np.nanmin(b)))
            elif agg_type == 'sum':
                b_arr.append(np.nansum(b))
            elif agg_type == 'central':
                b_arr.append(b[b.shape[0]//2, b.shape[1]//2])
            else:
                tweet("\naggregation type not found {}".format(agg_type))
                b_arr.append(b[b.shape[0]//2, b.shape[1]//2])
        out.append(b_arr)
    out = np.array(out)
    return out

Now take an array of a particular size (40 x 40 to keep it manageable for display)

arr  # -- a 40 x 40 array from 0 to 1600 displaying 5x5 blocks in the corners 
array([[   0,    1,    2,    3,    4, ...,   35,   36,   37,   38,   39],
       [  40,   41,   42,   43,   44, ...,   75,   76,   77,   78,   79],
       [  80,   81,   82,   83,   84, ...,  115,  116,  117,  118,  119],
       [ 120,  121,  122,  123,  124, ...,  155,  156,  157,  158,  159],
       [ 160,  161,  162,  163,  164, ...,  195,  196,  197,  198,  199],
       ...,
       [1400, 1401, 1402, 1403, 1404, ..., 1435, 1436, 1437, 1438, 1439],
       [1440, 1441, 1442, 1443, 1444, ..., 1475, 1476, 1477, 1478, 1479],
       [1480, 1481, 1482, 1483, 1484, ..., 1515, 1516, 1517, 1518, 1519],
       [1520, 1521, 1522, 1523, 1524, ..., 1555, 1556, 1557, 1558, 1559],
       [1560, 1561, 1562, 1563, 1564, ..., 1595, 1596, 1597, 1598, 1599]])

Now some values for the 5x5 blocks

a0 = aggreg_(arr, win=(5,5), agg_type='min')
a1 = aggreg_(arr, win=(5,5), agg_type='max')
a2 = aggreg_(arr, win=(5,5), agg_type='median')

a0  # -- minimum by block 
array([[   0,    5,   10,   15,   20,   25,   30,   35],
       [ 200,  205,  210,  215,  220,  225,  230,  235],
       [ 400,  405,  410,  415,  420,  425,  430,  435],
       [ 600,  605,  610,  615,  620,  625,  630,  635],
       [ 800,  805,  810,  815,  820,  825,  830,  835],
       [1000, 1005, 1010, 1015, 1020, 1025, 1030, 1035],
       [1200, 1205, 1210, 1215, 1220, 1225, 1230, 1235],
       [1400, 1405, 1410, 1415, 1420, 1425, 1430, 1435]])

a1  # -- maximum by block 
array([[ 164,  169,  174,  179,  184,  189,  194,  199],
       [ 364,  369,  374,  379,  384,  389,  394,  399],
       [ 564,  569,  574,  579,  584,  589,  594,  599],
       [ 764,  769,  774,  779,  784,  789,  794,  799],
       [ 964,  969,  974,  979,  984,  989,  994,  999],
       [1164, 1169, 1174, 1179, 1184, 1189, 1194, 1199],
       [1364, 1369, 1374, 1379, 1384, 1389, 1394, 1399],
       [1564, 1569, 1574, 1579, 1584, 1589, 1594, 1599]])

a2  # -- central value by block 
array([[  82.,   87.,   92.,   97.,  102.,  107.,  112.,  117.],
       [ 282.,  287.,  292.,  297.,  302.,  307.,  312.,  317.],
       [ 482.,  487.,  492.,  497.,  502.,  507.,  512.,  517.],
       [ 682.,  687.,  692.,  697.,  702.,  707.,  712.,  717.],
       [ 882.,  887.,  892.,  897.,  902.,  907.,  912.,  917.],
       [1082., 1087., 1092., 1097., 1102., 1107., 1112., 1117.],
       [1282., 1287., 1292., 1297., 1302., 1307., 1312., 1317.],
       [1482., 1487., 1492., 1497., 1502., 1507., 1512., 1517.]])

Lots of other options, I will let the user explore some of the options.

You can of course get arrays back and forth between ArcGIS Pro using arcpy functions.

from arcpy import env
from arcpy.da import Describe
from arcgisscripting import NumPyArrayToRaster, RasterToNumPyArray
env.overwriteOutput = True

# -- raster to array
r_in = # some path to a raster
desc = Describe(r_in)
cell_sze = desc['meanCellWidth']
LL = desc['extent'].lowerLeft
nodata = desc['noDataValue']
arr = RasterToNumPyArray(r_in)

# -- results back out
r_out = # eg r"C:\Temp\SomeRaster.tif" ... output path dto raster
cell_sze =  # your number here
nodata = np.nan # or some other value
r0 = NumPyArrayToRaster(out, lower_left_corner=LL,
                        x_cell_size=cell_sze,
                        value_to_nodata=nodata)
r0.save(r_out)

Of course the above can be run on a single input or a raster can be tiled first or chunks read from disk depending on the raster size.

That's all for now.