It works... here is the script and how I created a floating point array in the range 0-100.0 and reclassed to 8 bins of 1 to 8.
# coding: utf-8
"""
Script: reclass_array_demo.py
Author: Dan.Patterson@carleton.ca
Modified: 2016-05-19
Purpose: For Rebecca... array has no, nodata to simplify
References:
RasterToNumPyArray and NumPyArrayToRaster can be used to and from
raster formats of different types.
"""
import numpy as np
from textwrap import dedent
def arr_reclass(a, bins=[], new_bins=[], mask=False, mask_val=None):
"""a - integer or floating point array to be reclassed using
bins - sequential list/array of the lower limits of each class
include one value higher to cover the upper range.
mask - whether the raster contains nodata values or values to
be masked with mask_val
array dimensions will be squeezed
"""
a_rc = np.zeros_like(a)
if (len(bins) < 2): # or (len(new_bins <2)):
print("Bins = {} new = {} won't work".format(bins,new_bins))
return a
if len(new_bins) < 2:
new_bins = np.arange(1,len(bins)+2)
new_classes = zip(bins[:-1],bins[1:],new_bins)
for rc in new_classes:
q1 = (a >= rc[0])
q2 = (a < rc[1])
z = np.where(q1 & q2, rc[2],0)
a_rc = a_rc + z
return a_rc
if __name__=="__main__":
"""For Rebecca Uncomment the #z and #np.save lines to create the array, then you can load it blah blah"""
r = 7000
c = 14000
#z = np.abs(100 * np.random.random_sample((r,c)) -100) # make an array with rand 0-100
#np.save("f:/temp/z_7k_14k.npy",z)
z = np.load("f:/temp/z_7k_14k.npy")
bins = [0,5,10,15,20,25,30,60,100]
new_bins = [1, 2, 3, 4, 5, 6, 7, 8]
mask = False
mask_val = None
a_rc = arr_reclass(z, bins=bins, new_bins=new_bins)
np.save("f:/temp/reclass_z_7k_14k.npy",a_rc)
So it works, but it too about 2 seconds to complete, so I may have to see if I can vectorize it some more to keep within in my one coffee sip requirement
And here are the results which are as expected for the classes given and the sampling framework
>>> a_rc
array([[ 7., 6., 8., ..., 8., 7., 7.],
[ 7., 7., 7., ..., 8., 7., 8.],
[ 7., 7., 8., ..., 8., 7., 7.],
...,
[ 8., 8., 7., ..., 8., 4., 6.],
[ 8., 7., 7., ..., 7., 7., 8.],
[ 2., 1., 8., ..., 8., 3., 8.]])
>>> a_rc.shape
(7000, 14000)
>>> np.histogram(a_rc,bins=[1, 2, 3, 4, 5, 6, 7, 8])
(array([ 4903001, 4898616, 4900343, 4899584, 4898414, 4901199, 68598843]), array([1, 2, 3, 4, 5, 6, 7, 8]))
>>>
array size on disk
