Efficient way to delete all table rows

SteveCole · ‎12-16-2015

I'm not a Python expert so I'm wondering if my script is already the most efficient way of doing this or if there's a better way. We have a table in SDE (SQLServer2012 back end FWIW) and we're running a Windows Task scheduled service to pull records from an Access Database over to the SDE table every 15 minutes. Because the table is providing content to a web map, I cannot delete the table (at least I don't think I can without affecting the web map). For this reason, I chose to develop a script that empties the table of all records and then inserts the updated records. Usually, we're talking about 2800-3400 records at any given time.

Digging through my script, I'm seeing that the initial step of deleting the existing records is taking *much* longer than the task of copying records from the Access database into the SDE world. I'm quite shocked by that; I suspected that the reverse was true. Here is my script:

import sys
import os
import linecache
import logging
import arcpy
import time
from datetime import datetime
from arcpy import env


file01 = r"\\pmc-floodwatch\DIADvisorDatabases\DvLive.mok" #This file must exist
file02 = r"\\pmc-floodwatch\DIADvisorDatabases\DvLive.mno" #This file must NOT exist
expression = '1=1' #SQL shorthand which select all records
theTable = "SPW_GIS_PROD.SPW_GDBA.HYDROGRAPHY__tblGageData"


#Establish the error log file 
logger = logging.getLogger('errorLog')
hdlr = logging.FileHandler(r'\\python\errorLog.log')
logger.addHandler(hdlr)


# The tables within DIADvisor must not be accessed during its daily database maintenance.
# OneRain recommends checking for the existence and non-existence of two specific files.
# If both conditions are true, it is safe to proceed with connecting to the data within
# the dvLive Access database
if os.path.exists(file01) and not os.path.exists(file02):
        print "=================================================="
        print "Processing start time: " + str(datetime.now())
        print "==================================================" + "\n"
        
        env.workspace = r"C:\Users\spwscc\AppData\Roaming\ESRI\Desktop10.3\ArcCatalog\SPW_GDBA@SPW_GIS_PROD@SQL2012-SQL127.sde"
        try:
                # Set some local variables
                tempTableView = "gageTableView"


                # Execute MakeTableView
                arcpy.MakeTableView_management(theTable, tempTableView)


                # Execute SelectLayerByAttribute to select all records
                arcpy.SelectLayerByAttribute_management(tempTableView, "NEW_SELECTION", expression)


                print "     * Deleting existing records in SDE: " + str(datetime.now())
                timeDelStart = time.time()


                # Execute GetCount and if some records have been selected, then execute
                #  DeleteRows to delete the selected records.
                if int(arcpy.GetCount_management(tempTableView).getOutput(0)) > 0:
                        arcpy.DeleteRows_management(tempTableView)


                timeDelEnd = time.time()
                timeDelElapsed = timeDelEnd - timeDelStart
                print "          Time elapsed: " + str(timeDelElapsed) + " seconds" + "\n"
                
                # Now connect to the DIADvisor access database and import the most recent data
                # This requires the OLD DB connection previously established using ArcCatalog
                counter = 0


                print "     * Starting record transfer from DVLive: " + str(datetime.now())
                timeStartTransfer = time.time()
                
                accessRows = arcpy.SearchCursor(r"C:\Users\spwscc\AppData\Roaming\ESRI\Desktop10.3\ArcCatalog\jetConnectForDvLive.odc\last3days")
                curSde = arcpy.InsertCursor(theTable)
                # Loop through the results returned via the OLE DB connection
                for cRow in accessRows:
                    curSensorId = cRow.sensor_id
                    curEpoch = cRow.epoch
                    curData = cRow.data
                    curDataValue2 = cRow.dataValue2                                     
                    counter += 1


                    #Insert a new row into the SDE table with the current DIADvisor record's information
                    row = curSde.newRow()
                    row.SENSOR_ID = curSensorId
                    row.EPOCH = curEpoch
                    row.DATA = curData
                    row.dataValue2 = curDataValue2
                    curSde.insertRow(row)


                timeEndTransfer = time.time()
                timeTransferElapsed = timeEndTransfer - timeStartTransfer
                print "          Time Elapsed: " + str(timeTransferElapsed) + " seconds" + "\n"
                
                # We're done so perform some variable cleanup
                del row
                del accessRows
                del curSde
                del cRow
                print "=================================================="                
                print "Processing end time: " + str(datetime.now())
                print "==================================================" + "\n"                
                print "Number of record(s) in the DIADvisor database: " + str(counter) + "\n"
        except Exception as e:
                # If an error occurred, print line number and error message
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                theMessage = "\n" + 80*"#" + "\n" + 80*"#" + "\n"
                theMessage = theMessage + "DATE/TIME: " + str(datetime.now()) + ":" + "\n"
                theMessage = theMessage + "EXECPTION: " + str(e) + "\n" + "\n"
                theMessage = theMessage + "CALLBACK TRACE: " + "\n"
                theMessage = theMessage + 20*" " + "File: " + str(exc_tb.tb_frame.f_code.co_filename) + "\n"
                theMessage = theMessage + 20*" " + "Line " + str(exc_tb.tb_lineno) + ": " + str(linecache.getline(exc_tb.tb_frame.f_code.co_filename, exc_tb.tb_lineno))
                theMessage = theMessage + 20*" " + "Exception Type: " + str(exc_type)
                print theMessage
                logger.error(theMessage)
else:
        sys.exit()

In terms of runtimes, here's a quick sampling:

==================================================
Processing start time: 2015-12-16 12:49:01.322000
==================================================
     * Deleting existing records in SDE: 2015-12-16 12:49:03.491000
          Time elapsed: 66.4879999161 seconds
     * Starting record transfer from DVLive: 2015-12-16 12:50:10.022000
          Time Elapsed: 4.12600016594 seconds
==================================================
Processing end time: 2015-12-16 12:50:14.157000
==================================================
Number of record(s) in the DIADvisor database: 3167
>>> ================================ RESTART ================================
>>>
==================================================
Processing start time: 2015-12-16 13:00:50.661000
==================================================
     * Deleting existing records in SDE: 2015-12-16 13:00:53.012000
          Time elapsed: 71.2430000305 seconds
     * Starting record transfer from DVLive: 2015-12-16 13:02:04.295000
          Time Elapsed: 4.01099991798 seconds
==================================================
Processing end time: 2015-12-16 13:02:08.314000
==================================================
Number of record(s) in the DIADvisor database: 3173

So- am I already clearing the contents of my SDE table in the most efficient way possible or is there a better way?

Thanks!

Steve

JoshuaBixby · ‎12-16-2015

Given the frequency and number of updates you are doing, am I correct in assuming these aren't versioned data in the enterprise geodatabase? If true, i.e., not-versioned, then you will want to use the Truncate tool.

In the database world, deleting all of the records from a table and truncating a table are very different things. Deleting records involves database transactions, which can allow recovery of deleted data if needed, hence why it doesn't go as quickly as new users think it should. Truncating a table is usually much faster, but don't think about trying to recover the data.

View solution in original post

DarrenWiens2 · ‎12-16-2015

Not sure about deletion performance, but since you're at 10.3, you may want to use arcpy.da cursors which are much faster than old-school cursors.

RickMomsen · ‎12-17-2015

The data access module (.da) was introduced at 10.1. When I first converted my scripts to use the da cursors and saw the performance improvement I did cartwheels down the hallway. I recommend everyone using 10.1 + do it. (the da upgrade - not the cartwheels)

JoshuaBixby · ‎12-16-2015

Given the frequency and number of updates you are doing, am I correct in assuming these aren't versioned data in the enterprise geodatabase? If true, i.e., not-versioned, then you will want to use the Truncate tool.

In the database world, deleting all of the records from a table and truncating a table are very different things. Deleting records involves database transactions, which can allow recovery of deleted data if needed, hence why it doesn't go as quickly as new users think it should. Truncating a table is usually much faster, but don't think about trying to recover the data.

SteveCole · ‎12-16-2015

Holy smokes- we have a WINNER. Thanks Joshua Bixby

I've run it a couple of times and the times dropped from 60-70 seconds down to under a second. That's fabulous. You were right to assume that the table was not versioned. It contains stream gage values for the last 3 days so the contents are always changing with no need for preserving the older data (that's stored in the Access Database that the gage software uses anyways).

While we're all talking, is there a "better" way to handle the transfer of records from Access to SDE? The Query Table in Access has the same definition as the SDE table.

MichaelVolz · ‎12-16-2015

Joshua:

I've been using arcpy.DeleteFeatures_management in python for years thinking this was the ESRI python truncate tool which was very fast. Do you have any idea how this tool compares to the arcpy.TruncateTable_management tool that you mentioned?

SteveCole · ‎12-16-2015

Michael Volz

As an FYI, same script I posted but just changed the one line to use the truncate command. Here are the results of a few runs:

==================================================

Processing start time: 2015-12-16 13:51:26.462000

==================================================

* Deleting existing records in SDE: 2015-12-16 13:51:28.509000

Time elapsed: 0.00799989700317 seconds

* Starting record transfer from DVLive: 2015-12-16 13:51:28.527000

Time Elapsed: 4.20300006866 seconds

==================================================

Processing end time: 2015-12-16 13:51:32.741000

==================================================

Number of record(s) in the DIADvisor database: 3218

>>> ================================ RESTART ================================

>>>

==================================================

Processing start time: 2015-12-16 13:58:36.772000

==================================================

* Deleting existing records in SDE: 2015-12-16 13:58:38.863000

Time elapsed: 0.0090000629425 seconds

* Starting record transfer from DVLive: 2015-12-16 13:58:38.878000

Time Elapsed: 3.69099998474 seconds

==================================================

Processing end time: 2015-12-16 13:58:42.640000

==================================================

Number of record(s) in the DIADvisor database: 3222

>>> ================================ RESTART ================================

>>>

==================================================

Processing start time: 2015-12-16 14:06:43.055000

==================================================

* Deleting existing records in SDE: 2015-12-16 14:06:45.097000

Time elapsed: 0.0090000629425 seconds

* Starting record transfer from DVLive: 2015-12-16 14:06:45.122000

Time Elapsed: 4.10700011253 seconds

==================================================

Processing end time: 2015-12-16 14:06:49.243000

==================================================

Number of record(s) in the DIADvisor database: 3228

MatthewRusso · ‎12-18-2015

def delCursor(table):
     try:
          counter = 0
          with arcpy.da.UpdateCursor(table, "*") as cursor:
               for row in cursor:
                    counter +=1
                    cursor.deleteRow()
     except:
          arcpy.ClearWorkspaceCache_management()


delcursor(table_name)
# This is how I clear a table... it does take a bit of time but found that it worked in SDE

DougBrowning · ‎01-19-2022

OMG thank you @JoshuaBixby I looked for a long time and somehow just found this

I need to create blank template files that look just like our SDE but in a GDB. I had been using delete rows or delete features. I have like 40 tables and FCs in there - some with 3 million records. So delete rows would run forever or crash.

Truncate did the entire GDB is like 20 seconds! Changed my life. Thanks!

My workflow is - copy paste from SDE to a clean GDB.

Run this delete script that will just clear out anything it fines. Note I was testing here so that is why the two diff loops. I added the describe in case doing SDE and have views. Then I added compact or it stays big.

outDB = r"C:\temp\TD\NewTemplate\Template.gdb"

arcpy.env.workspace = outDB
tableList = arcpy.ListTables()
fcList = arcpy.ListFeatureClasses()

# trying this
for f in fcList:
    print "   Deleting all records in FC " + f
    #arcpy.DeleteFeatures_management(f)
    arcpy.TruncateTable_management(f)

for f in tableList:
    print "   Deleting all records in table " + f
    #arcpy.DeleteRows_management(f)
    arcpy.TruncateTable_management(f)

allList = fcList + tableList

##print "Starting DB " + outDB
##
##for f in tableList:
##    desc = arcpy.Describe(f)
##    print desc
##    if desc.hasOID:
##        print "   Deleting all records in table " + f
##
##        arcpy.DeleteRows_management(f)
##    else:
##        print "      Table or FC is a View so skipping..."

print "Completed all Deletes"
print "Running Compact"
arcpy.Compact_management(outDB)
print "All Done"

Again thanks