These are required for the aggregation since they produce the sliding and block views of the input array.
import numpy as np
from numpy.lib.stride_tricks import as_strided
# ----------------------------------------------------------------------
#
# (1) stride .... code section from arraytools.tools
def stride(a, win=(3, 3), stepby=(1, 1)):
"""Provide a 2D sliding/moving view of an array.
There is no edge correction for outputs. Use the `_pad_` function first.
See arraytools.stride for full details.
----------------------------------------------------------
"""
err = """Array shape, window and/or step size error.
Use win=(3,) with stepby=(1,) for 1D array
or win=(3,3) with stepby=(1,1) for 2D array
or win=(1,3,3) with stepby=(1,1,1) for 3D
---- a.ndim != len(win) != len(stepby) ----
"""
assert (a.ndim == len(win)) and (len(win) == len(stepby)), err
shape = np.array(a.shape) # array shape (r, c) or (d, r, c)
win_shp = np.array(win) # window (3, 3) or (1, 3, 3)
ss = np.array(stepby) # step by (1, 1) or (1, 1, 1)
newshape = tuple(((shape - win_shp) // ss) + 1) + tuple(win_shp)
newstrides = tuple(np.array(a.strides) * ss) + a.strides
a_s = as_strided(a, shape=newshape, strides=newstrides, subok=True).squeeze()
return a_s
# (2) block .... code section from arraytools.tools
#
def block(a, win=(3, 3)):
"""Calls stride with step_by equal to win size.
No padding of the array, so this works best when win size is divisible
in both directions
Note:
See block_arr if you want padding
"""
a_b = stride(a, win=win, stepby=win)
return a_b
Now a function that allows you to aggregate on a variety of statistical parameters, such as:
maximum, mean, median, minimum, range, sum and central value
def aggreg_(a, win=(3,3), agg_type='mode'):
"""Aggregate an array using a specified window size and an aggregation
type
Parameters
----------
a : array
2D array to perform the aggregation
win : tuple/list
the shape of the window to construct the blocks
agg_type : string aggregation type
max, mean, median, min, mode, range, sum, central
>>> a = np.array([[nan, nan, nan, 0., 0., 0.],
[nan, nan, nan, 0., 0., 0.],
[nan, nan, nan, 0., 0., 0.],
[ 1., 1., 1., 2., 2., 2.],
[ 1., 1., 1., 2., 2., 2.],
[ 1., 1., 1., 2., 2., 2.]])
>>> aggreg_(a, win(3,3), agg_type='mode')
array([[nan, 0.],
[ 1., 2.]])
"""
blocks = block(a, win=win) # -- requires `block` from above
out = []
for bl in blocks:
b_arr = []
for b in bl:
if agg_type == 'mode':
uni = np.unique(b).tolist()
cnts = [len(np.nonzero(b==u)[0]) for u in uni]
idx = cnts.index(max(cnts))
b_arr.append(uni[idx])
elif agg_type == 'max':
b_arr.append(np.nanmax(b))
elif agg_type == 'mean':
b_arr.append(np.nanmean(b))
elif agg_type == 'median':
b_arr.append(np.nanmedian(b))
elif agg_type == 'min':
b_arr.append(np.nanmin(b))
elif agg_type == 'range':
b_arr.append((np.nanmax(b) - np.nanmin(b)))
elif agg_type == 'sum':
b_arr.append(np.nansum(b))
elif agg_type == 'central':
b_arr.append(b[b.shape[0]//2, b.shape[1]//2])
else:
tweet("\naggregation type not found {}".format(agg_type))
b_arr.append(b[b.shape[0]//2, b.shape[1]//2])
out.append(b_arr)
out = np.array(out)
return out
Now take an array of a particular size (40 x 40 to keep it manageable for display)
arr # -- a 40 x 40 array from 0 to 1600 displaying 5x5 blocks in the corners
array([[ 0, 1, 2, 3, 4, ..., 35, 36, 37, 38, 39],
[ 40, 41, 42, 43, 44, ..., 75, 76, 77, 78, 79],
[ 80, 81, 82, 83, 84, ..., 115, 116, 117, 118, 119],
[ 120, 121, 122, 123, 124, ..., 155, 156, 157, 158, 159],
[ 160, 161, 162, 163, 164, ..., 195, 196, 197, 198, 199],
...,
[1400, 1401, 1402, 1403, 1404, ..., 1435, 1436, 1437, 1438, 1439],
[1440, 1441, 1442, 1443, 1444, ..., 1475, 1476, 1477, 1478, 1479],
[1480, 1481, 1482, 1483, 1484, ..., 1515, 1516, 1517, 1518, 1519],
[1520, 1521, 1522, 1523, 1524, ..., 1555, 1556, 1557, 1558, 1559],
[1560, 1561, 1562, 1563, 1564, ..., 1595, 1596, 1597, 1598, 1599]])
Now some values for the 5x5 blocks
a0 = aggreg_(arr, win=(5,5), agg_type='min')
a1 = aggreg_(arr, win=(5,5), agg_type='max')
a2 = aggreg_(arr, win=(5,5), agg_type='median')
a0 # -- minimum by block
array([[ 0, 5, 10, 15, 20, 25, 30, 35],
[ 200, 205, 210, 215, 220, 225, 230, 235],
[ 400, 405, 410, 415, 420, 425, 430, 435],
[ 600, 605, 610, 615, 620, 625, 630, 635],
[ 800, 805, 810, 815, 820, 825, 830, 835],
[1000, 1005, 1010, 1015, 1020, 1025, 1030, 1035],
[1200, 1205, 1210, 1215, 1220, 1225, 1230, 1235],
[1400, 1405, 1410, 1415, 1420, 1425, 1430, 1435]])
a1 # -- maximum by block
array([[ 164, 169, 174, 179, 184, 189, 194, 199],
[ 364, 369, 374, 379, 384, 389, 394, 399],
[ 564, 569, 574, 579, 584, 589, 594, 599],
[ 764, 769, 774, 779, 784, 789, 794, 799],
[ 964, 969, 974, 979, 984, 989, 994, 999],
[1164, 1169, 1174, 1179, 1184, 1189, 1194, 1199],
[1364, 1369, 1374, 1379, 1384, 1389, 1394, 1399],
[1564, 1569, 1574, 1579, 1584, 1589, 1594, 1599]])
a2 # -- central value by block
array([[ 82., 87., 92., 97., 102., 107., 112., 117.],
[ 282., 287., 292., 297., 302., 307., 312., 317.],
[ 482., 487., 492., 497., 502., 507., 512., 517.],
[ 682., 687., 692., 697., 702., 707., 712., 717.],
[ 882., 887., 892., 897., 902., 907., 912., 917.],
[1082., 1087., 1092., 1097., 1102., 1107., 1112., 1117.],
[1282., 1287., 1292., 1297., 1302., 1307., 1312., 1317.],
[1482., 1487., 1492., 1497., 1502., 1507., 1512., 1517.]])
Lots of other options, I will let the user explore some of the options.
You can of course get arrays back and forth between ArcGIS Pro using arcpy functions.
from arcpy import env
from arcpy.da import Describe
from arcgisscripting import NumPyArrayToRaster, RasterToNumPyArray
env.overwriteOutput = True
# -- raster to array
r_in = # some path to a raster
desc = Describe(r_in)
cell_sze = desc['meanCellWidth']
LL = desc['extent'].lowerLeft
nodata = desc['noDataValue']
arr = RasterToNumPyArray(r_in)
# -- results back out
r_out = # eg r"C:\Temp\SomeRaster.tif" ... output path dto raster
cell_sze = # your number here
nodata = np.nan # or some other value
r0 = NumPyArrayToRaster(out, lower_left_corner=LL,
x_cell_size=cell_sze,
value_to_nodata=nodata)
r0.save(r_out)
Of course the above can be run on a single input or a raster can be tiled first or chunks read from disk depending on the raster size.
That's all for now.