Hi All, I have the following script running on roughly 15gb of shapefiles, with the purpose of importing them into a geodatabase. I had previously tried this by finding all of the shapefiles first, and passing them to "Feature class to Feature class" as one block, however it overloaded the tool. This version uses a loop to import them to the geodatabase one-by-one. In this way, the code is stable, and has successfully processed a few thousand shapefiles already, however I am convinced that the process is taking much longer than it should. For example, the code takes about 35-40 seconds just to check if the feature class already exists in the geodatabase. This is far, far too long, and I suspect it has something to do with checking out the arcInfo license each time it uses an arcpy function. Does anyone have ideas about increasing the speed and efficiency? I was considering trying something like this, but the documentation says it is legacy.Thanks for the brainstorming,Matt(I run this code through a normal python console, not the one in arc for stability reasons)
# Import system modules
import sys, string, os, fnmatch, arcpy
from arcpy import env
### Set user variables here, if no user-input desired: ###
searchPath = os.path.abspath("/path/to/data") #path to search
searchParam = "*.shp" #search parameter - wildcards OK
shapefileType = "Polyline" #shapefile type: Polyline, Polygon, Point, etc.
OutputDatabase = os.path.abspath("/path/to/output.gdb")
### End user variables ###
### Check and print the input parameters ###
if os.path.isdir(searchPath):
print "Search: " + searchPath + " OK"
else:
print searchPath + "\n is not a valid path!"
sys.exit(0)
print "Searching for " + searchParam
print "shapefileType = " + shapefileType
if arcpy.Exists(OutputDatabase): # verify that output database exists.
print "OutputDatabase = " + OutputDatabase + " OK..."
else:
print searchPath + "\n is not valid"
sys.exit(0)
### End variable check ###
env.workspace = OutputDatabase # set the current workspace as the target database.
env.overwriteOutput = True
# Search function
resultList = [] #The list for collecting results with
for root, dirs, files in os.walk(searchPath): # crawl through the search directory
for f in files: # for each file in the search directory
if fnmatch.fnmatch(f, searchParam): # check if the filename matches
shpDescr = arcpy.Describe(os.path.join(root, f)) # reads the shapefile
shpType = shpDescr.shapeType # reads the shapefile type
if str(shpType) == shapefileType: # check if the shapefile type matches
outFeatureClass = arcpy.ValidateTableName("cont"+f.replace(".shp",""), OutputDatabase) # generates a database-friendly fieldclass name
if arcpy.Exists(outFeatureClass): # check to see if already imported
print os.path.join(root, f) + " already imported as " + outFeatureClass
else:
print "Exporting " + f + " to " + outFeatureClass + "..."
try:
arcpy.FeatureClassToFeatureClass_conversion(os.path.join(root, f), OutputDatabase, outFeatureClass) #export to geodatabase
except: # error handling
print "Error with " + os.path.join(root, f)
for msg in range(0, arcpy.GetMessageCount()):
if arcpy.GetSeverity(msg) == 2:
arcpy.AddReturnMessage(msg)
tempString = "'" + os.path.join(root, f) + "'"
resultList.append(tempString)
resultList.append(";")
continue
if not resultList: # only runs the next lines if there were problems with some of the files.
resultList.pop() #removes last list value (trailing semicolon)
resultString = "\"" + "".join(resultList) + "\"" #Convert list to a single string
outFilesMV = resultString.replace("\\","\\\\")
# Outputs a list of problem files in the searchPath directory:
outputFilename = searchPath + "/ImportErrors.txt"
fu = open(os.path.abspath(outputFilename), 'w')
fu.write(outFilesMV)
fu.close()