Y.Morita

Loop processing gradually slow down with arcpy script

Discussion created by Y.Morita on Dec 7, 2017
Latest reply on Dec 11, 2017 by Dan_Patterson

I try to the process below.

  1. spatial search tile raster index polygon with feature
  2. rasterize feature
  3. combine rasterized feature and MODIS raster and export combine raster attribute table (loop for tiles) 
  4. table operation with pandas and arcpy

 

This process works well.However, process gradually slow down.

I try this process for 51 times and check time. All input features are same.

The result is below.Each time_1,time_2 time_3 is described below.

time_1:finish process 2(rasterize feature)

time_2:finish process 3(combine and export table)

time_3:finish process 4(table operation)

elapsed_time:finish 1 feature process(save output in gdb file) 

 

  • first time

INFO:root:land_00244 1/51 START
INFO:root:time_1:2.9405651092529297[sec]
INFO:root:time_2:3.73823618888855[sec]
INFO:root:time_2:4.30132269859314[sec]
INFO:root:time_2:4.817508697509766[sec]
INFO:root:time_2:5.239825248718262[sec]
INFO:root:time_3:9.50986933708191[sec]
INFO:root:elapsed_time:10.557835817337036[sec]

  • 51 time

INFO:root:land_00244_49 51/51 START
INFO:root:time_1:14.089014768600464[sec]
INFO:root:time_2:19.874730348587036[sec]
INFO:root:time_2:25.050605058670044[sec]
INFO:root:time_2:30.117022275924683[sec]
INFO:root:time_2:35.16779851913452[sec]
INFO:root:time_3:51.1801700592041[sec]
INFO:root:elapsed_time:53.90104866027832[sec]

 

Even though same feature, process time increased 5 times!

All processes slow down.Why?

 

My environment is below.

 OS:windows 10 pro

 ArcGIS:10.5.1

 ArcGIS pro:2.0

But, in windows 7 and ArcGIS(10.4.1 and pro 1.4), this slow down didn't occur.

 

Thanks in advance!

 

from __future__ import print_function, unicode_literals, absolute_import
try:
import urllib2 # Python 2
except ImportError:
import urllib.request as urllib2 # Python 3
# Import arcpy module
import pandas as pd
import arcpy
import sys
import os
import socket
import gc
import time
import numpy as np
import logging

# Set overwrite option
arcpy.env.overwriteOutput = True
# datapath
DATA_DIR = r"C:\Data"
WORK_DIR = r"C:\Work01"
MODIS = DATA_DIR + "\\" + "MODIS_2001.gdb"
INDEX_POLY = DATA_DIR + "\\" + "MODIS_index.gdb"
def proc_zonalStat(feature_lyr, feature_path, list_index):
   arcpy.env.workspace = "in_memory"
   arcpy.SelectLayerByLocation_management(list_index[0], "INTERSECT", feature_lyr)
   raslist_modis = []
   cursor = arcpy.da.SearchCursor(list_index[0], ["F_NAME"])
   for row in cursor:
      raslist_modis.append(MODIS + "\\" + row[0])
      del cursor
ras_feature = "in_memory"+ "\\ras_feature"
arcpy.env.outputCoordinateSystem = arcpy.SpatialReference(4326)
arcpy.PolygonToRaster_conversion(feature_lyr, "OBJECTID", ras_feature, cellsize=0.0041666667)
time_1 = time.time() - start_time
logging.info("time_1:{0}".format(time_1) + "[sec]")
count = 0
aggreFlg = True
for inValueRaster in raslist_modis:
   try:
      tableName = "MODIS_tbl" + str(count).zfill(3)
      outCombine = arcpy.sa.Combine([ras_feature, inValueRaster])
      arcpy.TableToTable_conversion(outCombine, scratchGdb, tableName)
      arcpy.Delete_management(outCombine)
      count += 1
      time_2 = time.time() - start_time
      logging.info("time_2:{0}".format(time_2) + "[sec]")
fieldName = "MODIS_254"
expression = "0"
arcpy.CalculateField_management(feature_lyr, fieldName, expression, "PYTHON_9.3")
arcpy.Delete_management(ras_feature)
tables = arcpy.ListTables("MODIS_tbl*")
list_df_MODIS = []
fields = ['ras_feature', 'MODIS_landCover_', 'COUNT']
for table in tables:
   # numpy array -> pandas dataframe
   arr = arcpy.da.TableToNumPyArray(table, fields)
   df = pd.DataFrame(arr, columns=fields)
   list_df_MODIS.append(df)
   del arr
   gc.collect()
del tables
gc.collect()
df_all = pd.concat(list_df_MODIS, ignore_index=True)
add = pd.DataFrame([[0,0,0],
[0,1,0],
[0,2,0],
[0,3,0],
[0,4,0],
[0,5,0],
[0,6,0],
[0,7,0],
[0,8,0],
[0,9,0],
[0,10,0],
[0,11,0],
[0,12,0],
[0,13,0],
[0,14,0],
[0,15,0],
[0,16,0],
[0,254,0]],
columns=['ras_feature', 'MODIS_landCover_', 'COUNT'])
df_all = df_all.append(add)
del add
grouped = df_all.groupby([df_all['ras_feature'], df_all['MODIS_landCover_']], as_index=False).sum()
arr_MODIS = grouped.pivot_table(index='ras_feature', columns=['MODIS_landCover_'],values=['MODIS_landCover_','COUNT'],fill_value=0)
arr_MODIS = arr_MODIS.reset_index().values
arr_MODIS = arr_MODIS.astype(np.float32)
struct_array = np.core.records.fromarrays(arr_MODIS.transpose(),
np.dtype([('Value', np.uint64),
('MODIS_0', np.float32),
('MODIS_1', np.float32),
('MODIS_2', np.float32),
('MODIS_3', np.float32),
('MODIS_4', np.float32),
('MODIS_5', np.float32),
('MODIS_6', np.float32),
('MODIS_7', np.float32),
('MODIS_8', np.float32),
('MODIS_9', np.float32),
('MODIS_10', np.float32),
('MODIS_11', np.float32),
('MODIS_12', np.float32),
('MODIS_13', np.float32),
('MODIS_14', np.float32),
('MODIS_15', np.float32),
('MODIS_16', np.float32),
('MODIS_254', np.float32)]))
tmp_table = tmp_proc_Gdb + "\\tmp_table"
arcpy.da.NumPyArrayToTable(struct_array, tmp_table)
del struct_array, grouped, arr_MODIS, df_all, list_df_MODIS
gc.collect()
tmp_table_view = tmp_proc_Gdb + "\\tmp_table_view"
arcpy.MakeTableView_management(tmp_table, tmp_table_view, "NOT Value = 0")
arcpy.AddJoin_management(feature_lyr, "OBJECTID", tmp_table_view, "Value")
feature_name = os.path.basename(feature_path)
for i in range(0,17):
   fieldName = feature_name + ".MODIS_" + str(i)
   expression = "!tmp_table.MODIS_" + str(i) + "!"
   arcpy.CalculateField_management(feature_lyr, fieldName, expression, "PYTHON_9.3")

fieldName = feature_name + ".MODIS_254"
expression = "!tmp_table.MODIS_254!"
arcpy.CalculateField_management(feature_lyr, fieldName, expression, "PYTHON_9.3")
arcpy.RemoveJoin_management(feature_lyr)
arcpy.Delete_management(tmp_table_view)
arcpy.Delete_management(tmp_table)
time_3 = time.time() - start_time
logging.info("time_3:{0}".format(time_3) + "[sec]")


# Set scratchWorkspace
arcpy.env.scratchWorkspace = "in_memory"
# Set local_outGDB
THREAD = 0
hostName = socket.gethostname()
local_outGdb_name = 'tmp_' + hostName + '_' + str(THREAD).zfill(2) + '.gdb'
scratchGdb = "in_memory"
local_outGdb = WORK_DIR + '\\' + local_outGdb_name
tmp_proc_Gdb = "in_memory"

logfile_name = WORK_DIR + "\\log_test.txt"
logging.basicConfig(filename=logfile_name,level=logging.DEBUG)
logging.info("************** MAIN PROCESS START **************")
arcpy.env.workspace = WORK_DIR
for gdb in arcpy.ListFiles("test.gdb"):
if not arcpy.Exists(WORK_DIR + '\\' + local_outGdb_name):
   arcpy.CreateFileGDB_management(WORK_DIR, local_outGdb_name)

if arcpy.Exists(WORK_DIR + '\\Res\\' + gdb):
   arcpy.env.workspace = WORK_DIR + '\\Res\\' + gdb
   features_end = arcpy.ListFeatureClasses()

   arcpy.env.workspace = WORK_DIR + '\\' + gdb
   features_tmp = arcpy.ListFeatureClasses()

   features = [ feature for feature in features_tmp if feature not in features_end ]
else:
   arcpy.env.workspace = WORK_DIR + '\\' + gdb
   features = arcpy.ListFeatureClasses()
totalNum = len(features)
counter = 0

index_MODIS_lyr = "in_memory" + "\\" + "idx_MODIS_lyr"
list_index = [index_MODIS_lyr]
arcpy.MakeFeatureLayer_management(INDEX_POLY + "\\MODIS_landCover_2001", index_MODIS_lyr)
for feature in features:
   try:
      start_time = time.time()
      arcpy.env.workspace = WORK_DIR + '\\' + gdb
      arcpy.env.parallelProcessingFactor = "100%"
      counter += 1
      logging.info(feature+" "+str(counter) + '/' + str(totalNum)+" START")
      tmp_feature = tmp_proc_Gdb + "\\tmp_feature"
      arcpy.CopyFeatures_management(feature, tmp_feature)
      feature_lyr = tmp_proc_Gdb + "\\feature_lyr"
      arcpy.MakeFeatureLayer_management(tmp_feature, feature_lyr)
      rows = arcpy.GetCount_management(feature_lyr)
      countFeature = int(rows.getOutput(0))
      del rows

      arcpy.AddField_management(feature_lyr, "MODIS_0" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_1" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_2" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_3" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_4" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_5" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_6" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_7" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_8" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_9" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_10" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_11" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_12" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_13" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_14" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_15" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_16" , "FLOAT")
      arcpy.AddField_management(feature_lyr, "MODIS_254" , "FLOAT")
      proc_zonalStat(feature_lyr, tmp_feature, list_index)
      out_feature = local_outGdb + "\\" + feature
      arcpy.CopyFeatures_management(tmp_feature, out_feature)
      arcpy.Delete_management(feature_lyr)
      arcpy.Delete_management(tmp_feature)
      elapsed_time = time.time() - start_time
      logging.info("elapsed_time:{0}".format(elapsed_time) + "[sec]")
   except arcpy.ExecuteError:
      logging.info(arcpy.GetMessages(2))
      arcpy.Delete_management("in_memory")
      arcpy.MakeFeatureLayer_management(INDEX_POLY + "\\MODIS_landCover_2001", index_MODIS_lyr)
outDir = WORK_DIR + "\\Res"
outGdbPath = outDir + "\\" + gdb
if not arcpy.Exists(outGdbPath):
   arcpy.CreateFileGDB_management(outDir, gdb)
arcpy.env.workspace = local_outGdb
features = arcpy.ListFeatureClasses()
arcpy.FeatureClassToGeodatabase_conversion(features, outGdbPath)
arcpy.Delete_management(local_outGdb)
logging.info("************** MAIN PROCESS E N D **************")

Outcomes