Python script to calculate NDVI anomalies
https://geonet.esri.com/people/xander_bakker
I´d like to automate all my workflow, from downloaded original raster datasets to anomaly calculator:
3 years data and folder structure can be downloaded here.
Question 1: What is the best option: to run separated scripts or create a unique code?
Question 2: How to translate the model builder to python script (Python script 2)?
Question 3: If the best option is to run separated, how to make it in a logical sequence (script 1 -> script 2 -> script 3)
Summary workflow:
Python scripts 1 and 3 are working. The scrip 2 (Pre-processing) is a model in a toolbox.
Python script 1: written by myself, maybe need to be improved!
#Rename original files
import arcpy, datetime, glob, os
arcpy.env.overwriteOutput = True
arcpy.env.workspace=r'O:\Modis250\Originais'
infolder='O:\Modis250\Originals'
outfolder='O:\Modis250\NDVI_10dias'
list1=arcpy.ListRasters("*.tif")
list1.sort()
for raster in list1:
source_path = os.path.join(infolder, raster)
oldFilename=raster
dd = int(oldFilename[15:18])
if dd < 11: dd = 1
elif dd < 21: dd = 2
elif dd < 31: dd = 3
elif dd < 41: dd = 4
elif dd < 51: dd = 5
elif dd < 61: dd = 6
elif dd < 71: dd = 7
elif dd < 81: dd = 8
elif dd < 91: dd = 9
elif dd < 101: dd = 10
elif dd < 111: dd = 11
elif dd < 121: dd = 12
elif dd < 131: dd = 13
elif dd < 141: dd = 14
elif dd < 151: dd = 15
elif dd < 161: dd = 16
elif dd < 171: dd = 17
elif dd < 181: dd = 18
elif dd < 191: dd = 19
elif dd < 201: dd = 20
elif dd < 211: dd = 21
elif dd < 221: dd = 22
elif dd < 231: dd = 23
elif dd < 241: dd = 24
elif dd < 251: dd = 25
elif dd < 261: dd = 26
elif dd < 271: dd = 27
elif dd < 281: dd = 28
elif dd < 291: dd = 29
elif dd < 301: dd = 30
elif dd < 311: dd = 31
elif dd < 321: dd = 32
elif dd < 331: dd = 33
elif dd < 341: dd = 34
elif dd < 351: dd = 35
else: dd = 36
dd = str(dd)
newFilename="ndvi_" + oldFilename[11:15] + "_dec_" + dd + ".tif"
destination_path=os.path.join(outfolder, newFilename)
os.rename(source_path, destination_path)
Model Builder -> Python script 2
Python script 3: Python script to calculate NDVI anomalies
def main():
import arcpy
import os
arcpy.env.overwriteOutput = True
# Check out SA license
arcpy.CheckOutExtension("spatial")
# settings
ws_norm = r'D:\Modis250\NDVI_10dias_norm'
ws_mean = r'D:\Modis250\NDVI_10dias_mean'
ws_anom = r'D:\Modis250\NDVI_10dias_anom'
# create a list of all raster in norm folder
arcpy.env.workspace = ws_norm
lst_ndvi_ras = arcpy.ListRasters()
lst_ndvi_ras.sort()
print ("lst_ndvi_ras: {}".format(lst_ndvi_ras))
# get list of years
decades = GetListOfDecades(lst_ndvi_ras)
decades.sort()
print ("decades: {}".format(decades))
# loop through each year
for decade in decades:
# Get rasters for given year
lst_ndvi_decade = GetListOfRasterForGivenDecade(lst_ndvi_ras, decade)
print("lst_ndvi_decade: {}".format(lst_ndvi_decade))
# calculate mean raster for decade
lst_ndvi_decade_paths = [os.path.join(ws_norm, r) for r in lst_ndvi_decade]
print("lst_ndvi_decade_paths: {}".format(lst_ndvi_decade_paths))
mean_ras = arcpy.sa.CellStatistics(lst_ndvi_decade_paths, "MEAN", "DATA")
# store mean ras for decade
# mean_dec_d_.tif
out_name_mean = "mean_dec_{}.tif".format(decade) ###
out_name_path_mean = os.path.join(ws_mean, out_name_mean)
print ("out_name_path_mean: {}".format(out_name_path_mean))
mean_ras.save(out_name_path_mean)
# loop through each raster in decade
for ndvi_ras in lst_ndvi_decade:
# calculate anom for each raster in decade
# anom_2002_dec_1.tif = norm_ndvi_2002_dec_1.tif - mean_dec_1_.tif;
print ("ndvi_ras: {}".format(ndvi_ras))
# minus_ras = arcpy.sa.Minus(ndvi_ras, out_name_path_mean)
out_name_path_norm = os.path.join(ws_norm, ndvi_ras)
minus_ras = arcpy.Raster(out_name_path_mean) - arcpy.Raster(out_name_path_norm)
# store anom raster
out_name_anom = ndvi_ras.replace("norm_ndvi", "anom")
out_name_path_anom = os.path.join(ws_anom, out_name_anom)
minus_ras.save(out_name_path_anom)
print ("out_name_path_anom: {}".format(out_name_path_anom))
# return SA license
arcpy.CheckInExtension("spatial")
def GetListOfDecades(lst):
# norm_ndvi_yyyy_dec_d.tif
lst1 = list(set([r.split("_")[4] for r in lst]))
return [r.split('.')[0] for r in lst1]
def GetListOfRasterForGivenDecade(lst, decade):
lst1 = []
search = "_{}.tif".format(decade)
for r in lst:
if search in r.lower():
lst1.append(r)
return lst1
if __name__ == '__main__':
main()
Solved! Go to Solution.
Python routine to calculate MODIS NDVI anomaly:
modisndvi calculation ndvi
Summary workflow:
def main():
import arcpy
import os
import shutil
arcpy.env.overwriteOutput = True
ws_in = r'D:\Modis250\Originais'
ws_out = r'D:\Modis250\NDVI_10dias'
arcpy.env.workspace = ws_in
lst_ras = arcpy.ListRasters("*.tif")
for ras_name in sorted(lst_ras):
# MODIS.ndvi.YYYYjjj.yL6000.BOKU.tif
in_name_path = os.path.join(ws_in, ras_name)
y = ras_name[11:15]
j = int(ras_name[15:18])
d = GetDecadeFromJulian(int(y), j)
out_name = "ndvi_{}_dec_{}.tif".format(y, d)
out_name_path = os.path.join(ws_out, out_name)
#os.rename(in_name_path, out_name_path)
shutil.move(in_name_path, out_name_path)
print (out_name + " renamed")
def GetDecadeFromJulian(year, j):
import datetime
date = datetime.datetime(year-1, 12, 31) + datetime.timedelta(j)
m = (date.month - 1) * 3
if date.day <= 10:
d = 1
elif date.day <= 20:
d = 2
else:
d = 3
return m + d
if __name__ == '__main__':
main()
def main():
import arcpy
import os
arcpy.CheckOutExtension("spatial")
arcpy.env.overwriteOutput = True
# Set Geoprocessing environments
ws_scr = r'D:\Modis250\Temp' #Scratch workspace
ws_in = r'D:\Modis250\NDVI_10dias' #Input workspace
ws_out = r'D:\Modis250\NDVI_10dias_norm' #Output workspace
mask = r'D:\Modis250\SRTM\mde_sc_90.tif' #Raster used as mask
arcpy.env.scratchWorkspace = ws_scr
arcpy.env.workspace = ws_in
lst_ras = arcpy.ListRasters("*.tif")
lst_ras.sort()
# Pre-processing original rasters
for raster1 in lst_ras:
print ("Processing: " + raster1)
#outRaster1= Original raster projected from CGS-WGS1964 to CGS-SIRGAS-2000
outRaster1 = os.path.join(ws_scr, "rep_{}".format(raster1)) # Temporary file
arcpy.ProjectRaster_management(raster1, outRaster1,"GEOGCS['GCS_SIRGAS_2000',DATUM['D_SIRGAS_2000',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]", "NEAREST", "1.8000795216263E-03 1.80029836029412E-03", "'SAD_1969_To_WGS_1984_14 + SAD_1969_To_SIRGAS_2000_1'", "", "GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]]")
print ("rep_{}".format(raster1) + " projected")
#outRaster2 = outRaster1 masked by Area of Interest (AOI)
outRaster2 = arcpy.sa.ExtractByMask(outRaster1, mask) # Temporary file
outRaster2.save(os.path.join(ws_scr, "mask_{}".format(raster1)))
print ("mask_{}".format(raster1) + " masked")
#outRaster3 = Overwrited spurious data values from outRaster2
outRaster3 = arcpy.sa.Con(outRaster2 < 1, 0, arcpy.sa.Con(outRaster2 > 9998, 9999, outRaster2)) # Temporary file
outRaster3.save(os.path.join(ws_scr, "aju_{}".format(raster1)))
print ("aju_{}".format(raster1) + " ajusted")
#outRaster4 = Standardized outRaster3
#ras_mean = arcpy.GetRasterProperties_management(outRaster3, "MEAN")
#ras_std = arcpy.GetRasterProperties_management(outRaster3, "STD")
ras_mean = float(arcpy.GetRasterProperties_management(outRaster3, "MEAN").getOutput(0))
ras_std = float(arcpy.GetRasterProperties_management(outRaster3, "STD").getOutput(0))
outRaster4 = ((outRaster3-ras_mean)/ras_std) # Temporary file
outRaster4.save(os.path.join(ws_scr, "ras_std_{}".format(raster1)))
print ("ras_std_{}".format(raster1) + " standardized")
#outRaster = Final raster scaled to 0 -100
#ras_max = arcpy.GetRasterProperties_management(outRaster4, "MAXIMUM")
#ras_min = arcpy.GetRasterProperties_management(outRaster4, "MINIMUM")
ras_max = float(arcpy.GetRasterProperties_management(outRaster4, "MAXIMUM").getOutput(0))
ras_min = ras_min = float(arcpy.GetRasterProperties_management(outRaster4, "MINIMUM").getOutput(0))
outRaster = ((outRaster4-ras_min)/(ras_max-ras_min) * 100) # Final result
outRaster.save(os.path.join(ws_out, "norm_{}".format(raster1)))
print ("norm_{}".format(raster1) + " processed")
arcpy.CheckInExtension("spatial")
if __name__ == '__main__':
main()
def main():
import arcpy
import os
arcpy.env.overwriteOutput = True
# Check out SA license
arcpy.CheckOutExtension("spatial")
# settings
ws_norm = r'D:\Modis250\NDVI_10dias_norm'
ws_mean = r'D:\Modis250\NDVI_10dias_mean'
ws_anom = r'D:\Modis250\NDVI_10dias_anom'
## ws_norm = r'C:\GeoNet\Modis250\NDVI_10dias_norm'
## ws_mean = r'C:\GeoNet\Modis250\NDVI_10dias_mean'
## ws_anom = r'C:\GeoNet\Modis250\NDVI_10dias_anom'
# create a list of all raster in norm folder
arcpy.env.workspace = ws_norm
lst_ndvi_ras = arcpy.ListRasters()
#print ("Raster list: {}".format(lst_ndvi_ras))
# get list of years
decades = GetListOfDecades(lst_ndvi_ras)
decades.sort()
#print ("Decades list: {}".format(decades))
# loop through each year
for decade in decades:
# Get rasters for given year
lst_ndvi_decade = GetListOfRasterForGivenDecade(lst_ndvi_ras, decade)
print("Decade by year list: {}".format(lst_ndvi_decade))
# calculate mean raster for decade
lst_ndvi_decade_paths = [os.path.join(ws_norm, r) for r in lst_ndvi_decade]
#print("lst_ndvi_decade_paths: {}".format(lst_ndvi_decade_paths))
mean_ras = arcpy.sa.CellStatistics(lst_ndvi_decade_paths, "MEAN", "DATA")
# store mean ras for decade
# mean_dec_d_.tif
out_name_mean = "mean_dec_{}.tif".format(decade) ###
out_name_path_mean = os.path.join(ws_mean, out_name_mean)
print ("Mean raster: {}".format(out_name_mean))
mean_ras.save(out_name_path_mean)
# loop through each raster in decade
for ndvi_ras in lst_ndvi_decade:
# calculate anom for each raster in decade
# anom_2002_dec_1.tif = norm_ndvi_2002_dec_1.tif - mean_dec_1_.tif;
print ("Processing: {}".format(ndvi_ras))
# minus_ras = arcpy.sa.Minus(ndvi_ras, out_name_path_mean)
out_name_path_norm = os.path.join(ws_norm, ndvi_ras)
minus_ras = arcpy.Raster(out_name_path_norm) - arcpy.Raster(out_name_path_mean)
# store anom raster
out_name_anom = ndvi_ras.replace("norm_ndvi", "anom")
out_name_path_anom = os.path.join(ws_anom, out_name_anom)
minus_ras.save(out_name_path_anom)
print ("Anom: {}".format(out_name_path_anom))
# return SA license
arcpy.CheckInExtension("spatial")
def GetListOfDecades(lst):
# norm_ndvi_yyyy_dec_d.tif
lst1 = list(set([r.split("_")[4] for r in lst]))
return [r.split('.')[0] for r in lst1]
def GetListOfRasterForGivenDecade(lst, decade):
lst1 = []
search = "_{}.tif".format(decade)
for r in lst:
if search in r.lower():
lst1.append(r)
return lst1
if __name__ == '__main__':
main()
Batch file to run on prompt:
@ECHO OFF
REM Runs NDVI anomaly scripts
ECHO Backup original files
xcopy D:\Modis250\Originais\*.tif D:\Modis250\Originais_bkp /a /c /i /y /z
PAUSE
ECHO Selecting decade and renaming original files to "ndvi_YYYY_dec_dd.tif" standard.
C:\Python27\ArcGIS10.5\Python.exe D:\Modis250\Python\Script1.py
ECHO NDVI files renamed, hit enter to pre-processing data.
PAUSE
ECHO Projecting, removing spurious data, standardizing and scaling ndvi decade files to 0-100.
C:\Python27\ArcGIS10.5\Python.exe D:\Modis250\Python\Script2.py
ECHO NDVI pre-processed, hit enter to calculate anomalies.
PAUSE
ECHO Calculating decade anomalies.
C:\Python27\ArcGIS10.5\Python.exe D:\Modis250\Python\Script3.py
Del D:\Temp /s
Del D:\NDVI_10dias /s
ECHO Done.
PAUSE
Let's try and do this in parts.
Question 1: What is the best option: to run separated scripts or create a unique code?
You can work with a single script that does all 3 steps, but it might be better to break it up in parts(as you did). This way you can check the intermediate results and run parts separately and also reuse parts for other workflows.
Question 2: How to translate the model builder to python script (Python script 2)?
You can export the Model to a Python script, but that probably results in some really ugly code. It is better to rewrite the model into a new script. This also allows for some additional checks that might not be easy to do inside a model.
Question 3: If the best option is to run separated, how to make it in a logical sequence (script 1 -> script 2 -> script 3)
Depends. As I mentioned above, it may be easier to run parts separately and do some (manual) checks between the steps to be sure things go right. When you have tested thoroughly, you can call the scripts from a "master" script and run the entire process. Question: how often do you plan to run this script? Does the input data change?
I think that the first script could be rewritten like this (untested):
def main():
import arcpy
import os
arcpy.env.overwriteOutput = True
ws_in = 'O:\Modis250\Originals'
ws_out = 'O:\Modis250\NDVI_10dias'
arcpy.env.workspace = ws_in
lst_ras = arcpy.ListRasters("*.tif")
for ras_name in sorted(lst_ras):
in_name_path = os.path.join(ws_in, ras_name)
j = int(ras_name[15:18])
d = GetDecadeFromJulian(j)
out_name = "ndvi_{}_dec_{}.tif".format(j, d)
out_name_path = os.path.join(ws_out, out_name)
os.rename(in_name_path, out_name_path)
def GetDecadeFromJulian(j):
d = (j+1 - ((j-1) % 10)) / 10 + 1
return str(d) if d < 37 else '36'
if __name__ == '__main__':
main()
The script 1 worked, however the output name is wrong: The names should be "ndvi_yyyy_dec_d.tif" where "yyyy" is the year (from the input file name (MODIS.ndvi.YYYYjjj.yL6000.BOKU.tif ) and "d" the decade. Why the ".2" after the decade number in the outputs? If there are more then one input file for decade, de code shoud overwrite the first.
Question: how often do you plan to run this script? Does the input data change?
The first process will be runned one time for all the past years files (from 2000 to today). After that, the script will be runned every ten days period (decade). The new input (original file "MODIS.ndvi.YYYYjjj.yL6000.BOKU.tif") will be pre-processed and the result will be added to the list of standardized and scaled (0-100) raster datasets, to calculate the anomaly for the last decade in current year. It is a continous process.
As the name of the original file have the "YYYYjjj" format, is it possible to rename the files considering the leap years, according to the table below?
Decade | Date | julian day in common year | julian day in leap year |
1 | 01/01 to 01/10 | 10 | 10 |
2 | 01/11 to 01/20 | 20 | 20 |
3 | 01/21 to 01/31 | 31 | 31 |
4 | 02/01 to 02/10 | 41 | 41 |
5 | 02/11 to 02/20 | 51 | 51 |
6 | 02/21 to 02/28-29 | 59 | 60 |
7 | 03/01 to 03/10 | 69 | 70 |
8 | 03/11 to 03/20 | 79 | 80 |
9 | 03/21 to 03/31 | 90 | 91 |
10 | 04/01 to 04/10 | 100 | 101 |
11 | 04/11 to 04/20 | 110 | 111 |
12 | 04/21 to 04/30 | 120 | 121 |
13 | 05/01 to 05/10 | 130 | 131 |
14 | 05/11 to 05/20 | 140 | 141 |
15 | 05/21 to 05/31 | 151 | 152 |
16 | 06/01 to 06/10 | 161 | 162 |
17 | 06/11 to 06/20 | 171 | 172 |
18 | 06/21 to 06/30 | 181 | 182 |
19 | 07/01 to 07/10 | 191 | 192 |
20 | 07/11 to 07/20 | 201 | 202 |
21 | 07/21 to 07/31 | 212 | 213 |
22 | 08/01 to 08/10 | 222 | 223 |
23 | 08/11 to 08/20 | 232 | 233 |
24 | 08/21 to 08/31 | 243 | 244 |
25 | 09/01 to 09/10 | 253 | 254 |
26 | 09/11 to 09/20 | 263 | 264 |
27 | 09/21 to 09/30 | 273 | 274 |
28 | 10/01 to 10/10 | 283 | 284 |
29 | 10/11 to 10/20 | 293 | 294 |
30 | 10/21 to 10/31 | 304 | 305 |
31 | 11/01 to 11/10 | 314 | 315 |
32 | 11/11 to 11/20 | 324 | 325 |
33 | 11/21 to 11/30 | 334 | 335 |
34 | 12/01 to 12/10 | 344 | 345 |
35 | 12/11 to 12/20 | 354 | 355 |
36 | 12/21 to 12/31 | 365 | 366 |
This would require changing the function to get the decade like this:
def GetDecadeFromJulian(year, j):
import datetime
date = datetime.datetime(year, 1, 1) + datetime.timedelta(j)
m = (date.month - 1) * 3
if date.day <= 10:
d = 1
elif date.day <= 20:
d = 2
else:
d = 3
return m + d
Both year and Julian (j) should be provided as numeric value.
Error running at ArcGisPro:
Traceback (most recent call last):
File "<string>", line 40, in <module>
File "<string>", line 13, in main
TypeError: 'NoneType' object is not iterable
in code:
def main():
import arcpy
import os
arcpy.env.overwriteOutput = True
ws_in = r'D:\Modis250\Originals'
ws_out = r'D:\Modis250\NDVI_10dias'
arcpy.env.workspace = ws_in
lst_ras = arcpy.ListRasters("*.tif")
for ras_name in sorted(lst_ras):
in_name_path = os.path.join(ws_in, ras_name)
j = int(ras_name[15:18])
d = GetDecadeFromJulian(j)
out_name = "ndvi_{}_dec_{}.tif".format(j, d)
out_name_path = os.path.join(ws_out, out_name)
os.rename(in_name_path, out_name_path)
#def GetDecadeFromJulian(j):
#d = (j+1 - ((j-1) % 10)) / 10 + 1
#return str(d) if d < 37 else '36'
def GetDecadeFromJulian(year, j):
import datetime
date = datetime.datetime(year, 1, 1) + datetime.timedelta(j)
m = (date.month - 1) * 3
if date.day <= 10:
d = 1
elif date.day <= 20:
d = 2
else:
d = 3
return m + d
if __name__ == '__main__':
main()
It seems you are missing some code:
def main():
import arcpy
import os
arcpy.env.overwriteOutput = True
ws_in = 'O:\Modis250\Originals'
ws_out = 'O:\Modis250\NDVI_10dias'
arcpy.env.workspace = ws_in
lst_ras = arcpy.ListRasters("*.tif")
for ras_name in sorted(lst_ras):
# MODIS.ndvi.YYYYjjj.yL6000.BOKU.tif
in_name_path = os.path.join(ws_in, ras_name)
y = ras_name[11:15]
j = int(ras_name[15:18])
d = GetDecadeFromJulian(int(y), j)
out_name = "ndvi_{}_dec_{}.tif".format(y, d)
out_name_path = os.path.join(ws_out, out_name)
os.rename(in_name_path, out_name_path)
def GetDecadeFromJulian(year, j):
import datetime
date = datetime.datetime(year, 1, 1) + datetime.timedelta(j)
m = (date.month - 1) * 3
if date.day <= 10:
d = 1
elif date.day <= 20:
d = 2
else:
d = 3
return m + d
if __name__ == '__main__':
main()
Same error for your code:
Traceback (most recent call last):
File "<string>", line 37, in <module>
File "<string>", line 13, in main
TypeError: 'NoneType' object is not iterable
Problem was in line 7: Originals -> Originais
The script runns, however does´t overwrite the output files: If there are more than one tif per decade, the higher julian day should represent the decade.
Traceback (most recent call last):
File "<string>", line 37, in <module>
File "<string>", line 22, in main
FileExistsError: [WinError 183] Não é possível criar um arquivo já existente: 'D:\\Modis250\\Originais\\MODIS.ndvi.2002017.yL6000.BOKU.tif' -> 'D:\\Modis250\\NDVI_10dias\\ndvi_2002_dec_2.tif'
Does the code considered the two first files as belonging to decade two?
You're using "os.rename" and on Windows if the destination exists, it will throw an error.