Dan_Patterson

Rolling Statistics for grid data... an alternative

Blog Post created by Dan_Patterson Champion on Nov 14, 2017

Rolling stats...  The 3x3 moving/sliding/rolling window.. This kernel type will focus on returning statistics like your FocalStatistics functions... but without needing the Spatial Analyst extension...

 

They are used ....

 

First the results.

a      # ---- A grid/raster/array ----

array([[5, 1, 2, 0, 5, 4, 7, 3, 2, 2],
       [2, 5, 9, 9, 6, 9, 2, 1, 3, 0],
       [7, 2, 7, 4, 4, 4, 3, 7, 7, 7],
       [3, 9, 6, 8, 0, 1, 8, 5, 0, 7],
       [8, 4, 6, 8, 1, 9, 6, 9, 5, 5],
       [3, 9, 9, 5, 3, 1, 5, 5, 2, 3],
       [0, 5, 2, 0, 9, 4, 7, 9, 9, 0],
       [2, 5, 5, 4, 3, 7, 7, 8, 3, 5],
       [5, 2, 4, 5, 3, 6, 5, 2, 7, 2],
       [4, 6, 4, 9, 8, 6, 8, 6, 2, 0]])


a_s = stride(a, r_c=(3,3))  # ---- create an array of rolling windows 3x3 ----

# ---- Now calculate all the statistics --------------------------------------

rolling_stats(a_s, no_null=False, prn=True)
Min...
[[1 0 0 0 2 1 1 0]
[2 2 0 0 0 1 0 0]
[2 2 0 0 0 1 0 0]
[3 4 0 0 0 1 0 0]
[0 0 0 0 1 1 2 0]
[0 0 0 0 1 1 2 0]
[0 0 0 0 3 2 2 0]
[2 2 3 3 3 2 2 0]]
Max...
[[9 9 9 9 9 9 7 7]
[9 9 9 9 9 9 8 7]
[9 9 8 9 9 9 9 9]
[9 9 9 9 9 9 9 9]
[9 9 9 9 9 9 9 9]
[9 9 9 9 9 9 9 9]
[5 5 9 9 9 9 9 9]
[6 9 9 9 8 8 8 8]]
Mean...
[[ 4.44  4.33  5.11  5.    4.89  4.44  3.89  3.56]
[ 5.56  6.56  5.89  5.    4.11  4.44  4.    4.11]
[ 5.78  6.    4.89  4.33  4.    5.78  5.56  5.78]
[ 6.33  7.11  5.11  4.    3.78  5.44  5.    4.56]
[ 5.11  5.33  4.78  4.44  5.    6.11  6.33  5.22]
[ 4.44  4.89  4.44  4.    5.11  5.89  6.11  4.89]
[ 3.33  3.56  3.89  4.56  5.67  6.11  6.33  5.  ]
[ 4.11  4.89  5.    5.67  5.89  6.11  5.33  3.89]]
Med...
[[ 5.  4.  5.  4.  4.  4.  3.  3.]
[ 6.  7.  6.  4.  4.  4.  3.  5.]
[ 6.  6.  6.  4.  4.  6.  6.  7.]
[ 6.  8.  6.  3.  3.  5.  5.  5.]
[ 5.  5.  5.  4.  5.  6.  6.  5.]
[ 5.  5.  4.  4.  5.  7.  7.  5.]
[ 4.  4.  4.  4.  6.  7.  7.  5.]
[ 4.  5.  4.  6.  6.  6.  6.  3.]]
Sum...
[[40 39 46 45 44 40 35 32]
[50 59 53 45 37 40 36 37]
[52 54 44 39 36 52 50 52]
[57 64 46 36 34 49 45 41]
[46 48 43 40 45 55 57 47]
[40 44 40 36 46 53 55 44]
[30 32 35 41 51 55 57 45]
[37 44 45 51 53 55 48 35]]
Std...
[[ 2.67  3.2   2.85  2.62  2.02  2.5   2.28  2.59]
[ 2.59  2.36  2.73  3.09  2.88  2.83  2.71  2.96]
[ 2.2   2.16  2.73  3.16  2.98  2.62  2.59  2.39]
[ 2.4   1.79  3.    3.37  3.15  2.83  2.58  2.5 ]
[ 3.    2.91  3.26  3.34  2.87  2.56  2.26  3.08]
[ 2.91  2.73  2.83  2.62  2.42  2.28  2.38  3.03]
[ 1.76  1.71  2.33  2.45  1.94  2.02  2.36  3.2 ]
[ 1.29  1.79  2.    2.    1.79  1.73  2.31  2.56]]
Var...
[[  7.14  10.22   8.1    6.89   4.1    6.25   5.21   6.69]
[  6.69   5.58   7.43   9.56   8.32   8.02   7.33   8.77]
[  4.84   4.67   7.43  10.     8.89   6.84   6.69   5.73]
[  5.78   3.21   8.99  11.33   9.95   8.02   6.67   6.25]
[  8.99   8.44  10.62  11.14   8.22   6.54   5.11   9.51]
[  8.47   7.43   8.02   6.89   5.88   5.21   5.65   9.21]
[  3.11   2.91   5.43   6.02   3.78   4.1    5.56  10.22]
[  1.65   3.21   4.     4.     3.21   2.99   5.33   6.54]]
Range

# ---- If you have nodata values, they can be accommodated ------------------

 

Now the code functions

 

import numpy as np
from numpy.lib.stride_tricks import as_strided

def _check(a, r_c, subok=False):
    """Performs the array checks necessary for stride and block.
    : a   - Array or list.
    : r_c - tuple/list/array of rows x cols.
    : subok - from numpy 1.12 added, keep for now
    :Returns:
    :------
    :Attempts will be made to ...
    :  produce a shape at least (1*c).  For a scalar, the
    :  minimum shape will be (1*r) for 1D array or (1*c) for 2D
    :  array if r<c.  Be aware
    """

    if isinstance(r_c, (int, float)):
        r_c = (1, int(r_c))
    r, c = r_c
    if a.ndim == 1:
        a = np.atleast_2d(a)
    r, c = r_c = (min(r, a.shape[0]), min(c, a.shape[1]))
    a = np.array(a, copy=False, subok=subok)
    return a, r, c, tuple(r_c)


def _pad(a, nan_edge=False):
    """Pad a sliding array to allow for stats"""
    if nan_edge:
        a = np.pad(a, pad_width=(1, 2), mode="constant",
                   constant_values=(np.NaN, np.NaN))
    else:
        a = np.pad(a, pad_width=(1, 1), mode="reflect")
    return a


def stride(a, r_c=(3, 3)):
    """Provide a 2D sliding/moving view of an array.
    :  There is no edge correction for outputs.
    :
    :Requires:
    :--------
    : _check(a, r_c) ... Runs the checks on the inputs.
    : a - array or list, usually a 2D array.  Assumes rows is >=1,
    :     it is corrected as is the number of columns.
    : r_c - tuple/list/array of rows x cols.  Attempts  to
    :     produce a shape at least (1*c).  For a scalar, the
    :     minimum shape will be (1*r) for 1D array or 2D
    :     array if r<c.  Be aware
    """

    a, r, c, r_c = _check(a, r_c)
    shape = (a.shape[0] - r + 1, a.shape[1] - c + 1) + r_c
    strides = a.strides * 2
    a_s = (as_strided(a, shape=shape, strides=strides)).squeeze()
    return a_s


# ----------------------------------------------------------------------
# (18) rolling stats .... code section
def rolling_stats(a, no_null=True, prn=True):
    """Statistics on the last two dimensions of an array.
    :Requires
    :--------
    : a - 2D array  **Note, use 'stride' above to obtain rolling stats
    : no_null - boolean, whether to use masked values (nan) or not.
    : prn - boolean, to print the results or return the values.
    :
    :Returns
    :-------
    : The results return an array of 4 dimensions representing the original
    : array size and block size
    : eg.  original = 6x6 array   block = 3x3
    :      breaking the array into 4 chunks
    """

    a = np.asarray(a)
    a = np.atleast_2d(a)
    ax = None
    if a.ndim > 1:
        ax = tuple(np.arange(len(a.shape))[-2:])
    if no_null:
        a_min = a.min(axis=ax)
        a_max = a.max(axis=ax)
        a_mean = a.mean(axis=ax)
        a_med = np.median(a, axis=ax)
        a_sum = a.sum(axis=ax)
        a_std = a.std(axis=ax)
        a_var = a.var(axis=ax)
        a_ptp = a_max - a_min
    else:
        a_min = np.nanmin(a, axis=(ax))
        a_max = np.nanmax(a, axis=(ax))
        a_mean = np.nanmean(a, axis=(ax))
        a_med = np.nanmedian(a, axis=(ax))
        a_sum = np.nansum(a, axis=(ax))
        a_std = np.nanstd(a, axis=(ax))
        a_var = np.nanvar(a, axis=(ax))
        a_ptp = a_max - a_min
    if prn:
        s = ['Min', 'Max', 'Mean', 'Med', 'Sum', 'Std', 'Var', 'Range']
        frmt = "...\n{}\n".join([i for i in s])
        v = [a_min, a_max, a_mean, a_med, a_sum, a_std, a_var, a_ptp]
        args = [indent(str(i), '... ') for i in v]
        print(frmt.format(*args))
    else:
        return a_min, a_max, a_mean, a_med, a_sum, a_std, a_var, a_ptp

Outcomes