Recently, we've been having issues getting our python script to copy our enterprise geodatabase to a new fresh file geodatabase. When it's copying one of the feature data sets to the local file geodatabase it generates an error saying a table already exists and skips over the entire data set which holds 90% of our data and onto the next feature data set. Does anyone have a working python script that exports an entire enterprise geodatabase?
import time, os, datetime, sys, logging, logging.handlers, shutil, traceback
import arcpy
########################## user defined functions ##############################
def getDatabaseItemCount(workspace):
log = logging.getLogger("script_log")
"""returns the item count in provided database"""
arcpy.env.workspace = workspace
feature_classes = []
log.info("Compiling a list of items in {0} and getting count.".format(workspace))
for dirpath, dirnames, filenames in arcpy.da.Walk(workspace,datatype="Any",type="Any"):
for filename in filenames:
feature_classes.append(os.path.join(dirpath, filename))
log.info("There are a total of {0} items in the database".format(len(feature_classes)))
return feature_classes, len(feature_classes)
def replicateDatabase(dbConnection, targetGDB):
log = logging.getLogger("script_log")
startTime = time.time()
if arcpy.Exists(dbConnection):
featSDE,cntSDE = getDatabaseItemCount(dbConnection)
log.info("Geodatabase being copied: %s -- Feature Count: %s" %(dbConnection, cntSDE))
if arcpy.Exists(targetGDB):
featGDB,cntGDB = getDatabaseItemCount(targetGDB)
log.info("Old Target Geodatabase: %s -- Feature Count: %s" %(targetGDB, cntGDB))
try:
shutil.rmtree(targetGDB)
log.info("Deleted Old %s" %(os.path.split(targetGDB)[-1]))
except Exception as e:
log.info(e)
GDB_Path, GDB_Name = os.path.split(targetGDB)
log.info("Now Creating New %s" %(GDB_Name))
arcpy.CreateFileGDB_management(GDB_Path, GDB_Name)
arcpy.env.workspace = dbConnection
try:
datasetList = [arcpy.Describe(a).name for a in arcpy.ListDatasets()]
except Exception, e:
datasetList = []
log.info(e)
try:
featureClasses = [arcpy.Describe(a).name for a in arcpy.ListFeatureClasses()]
except Exception, e:
featureClasses = []
log.info(e)
try:
tables = [arcpy.Describe(a).name for a in arcpy.ListTables()]
except Exception, e:
tables = []
log.info(e)
#Compiles a list of the previous three lists to iterate over
allDbData = datasetList + featureClasses + tables
for sourcePath in allDbData:
targetName = sourcePath.split('.')[-1]
targetPath = os.path.join(targetGDB, targetName)
if not arcpy.Exists(targetPath):
try:
log.info("Attempting to Copy %s to %s" %(targetName, targetPath))
arcpy.Copy_management(sourcePath, targetPath)
log.info("Finished copying %s to %s" %(targetName, targetPath))
except Exception as e:
log.info("Unable to copy %s to %s" %(targetName, targetPath))
log.info(e)
else:
log.info("%s already exists....skipping....." %(targetName))
featGDB,cntGDB = getDatabaseItemCount(targetGDB)
log.info("Completed replication of %s -- Feature Count: %s" %(dbConnection, cntGDB))
else:
log.info("{0} does not exist or is not supported! \
Please check the database path and try again.".format(dbConnection))
#####################################################################################
def formatTime(x):
minutes, seconds_rem = divmod(x, 60)
if minutes >= 60:
hours, minutes_rem = divmod(minutes, 60)
return "%02d:%02d:%02d" % (hours, minutes_rem, seconds_rem)
else:
minutes, seconds_rem = divmod(x, 60)
return "00:%02d:%02d" % (minutes, seconds_rem)
if __name__ == "__main__":
startTime = time.time()
now = datetime.datetime.now()
############################### user variables #################################
'''change these variables to the location of the database being copied, the target
database location and where you want the log to be stored'''
logPath = "C:/temp/Log"
databaseConnection = r"C:\Users\jnmiller\AppData\Roaming\ESRI\Desktop10.6\ArcCatalog\SDE.sde"
############################### logging items ###################################
# Make a global logging object.
logName = os.path.join(logPath,(now.strftime("%Y-%m-%d_%H-%M.log")))
log = logging.getLogger("script_log")
log.setLevel(logging.INFO)
h1 = logging.FileHandler(logName)
h2 = logging.StreamHandler()
f = logging.Formatter("[%(levelname)s] [%(asctime)s] [%(lineno)d] - %(message)s",'%m/%d/%Y %I:%M:%S %p')
h1.setFormatter(f)
h2.setFormatter(f)
h1.setLevel(logging.INFO)
h2.setLevel(logging.INFO)
log.addHandler(h1)
log.addHandler(h2)
log.info('Script: {0}'.format(os.path.basename(sys.argv[0])))
try:
########################## function calls ######################################
replicateDatabase(databaseConnection, targetGDB)
################################################################################
except Exception, e:
log.exception(e)
totalTime = formatTime((time.time() - startTime))
log.info('--------------------------------------------------')
log.info("Script Completed After: {0}".format(totalTime))
log.info('--------------------------------------------------')
Solved! Go to Solution.
This is an older thread but I use a different approach for exporting an enterprise DB to a GDB using replicas. It is fast and simple.
If the GDB already exists the script will sync the changes otherwise it creates a new copy. You can modify the items contained in the featureData list if you only want portions of the database exported. This does require a Standard (ArcEditor) or Advanced (ArcInfo) license.
Create Replica | ArcGIS Desktop
Synchronize Changes | ArcGIS Desktop
Working with geodatabase replicas | ArcGIS Desktop
#!/usr/bin/env python
import sys; sys.dont_write_bytecode = True
import arcpy
sde = r'Database Connections/sde@xxxxxx.sde'
gdb = r'\\server\path\GDBexport.gdb'
rName = r'SDE_GDB'
def buildReplica(folderPath,outputFile):
arcpy.CreateFileGDB_management(folderPath, outputFile, "CURRENT")
myWorkSpace = arcpy.env.workspace = sde
featureData = arcpy.ListDatasets()
arcpy.CreateReplica_management(featureData, 'ONE_WAY_REPLICA',gdb,rName,'FULL','CHILD_DATA_SENDER','USE_DEFAULTS','DO_NOT_REUSE','GET_RELATED', '','DO_NOT_USE_ARCHIVING')
def syncReplica():
arcpy.SynchronizeChanges_management(sde,rName,gdb,'FROM_GEODATABASE1_TO_2','IN_FAVOR_OF_GDB1','BY_OBJECT','')
def main():
try:
if os.path.exists(gdb):
syncReplica()
else:
buildReplica(r'\\server\path','GDBexport.gdb')
except Exception, e:
sys.exit("Error in processing File")
if __name__ == '__main__':
main()
I like your script better, but in a pinch, this will work (Pro):
import sys, string, os, arcpy, calendar, datetime, traceback,smtplib
import arcpy
from arcpy import env
from arcgis.gis import GIS
from subprocess import call
# Set the name of the feature service. This will control many variable in this script
NAME = "GRSM_EXOTICS"
# Define log setting
try:
d = datetime.datetime.now()
log = open("C:\\PYTHON_LOGS\LOG."+NAME+".txt","a")
log.write("----------------------------" + "\n")
log.write("----------------------------" + "\n")
log.write("Log: " + str(d) + "\n")
log.write("\n")
# Start process...
starttime = datetime.datetime.now()
log.write("Begin process:\n")
log.write(" Process started at " + str(starttime) + "\n")
log.write("\n")
### Start setting variables
# Mail Server Settings
SERVER = "1.2.3.4"
PORT = "25"
FROM = "BINKY_THE_CLOWN@clown.com"
MAILDOMAIN = '@clown.com'
# Data Steward getting the email. Needs to be their email address...without @nps.gov at the end
userList=["the_joker"]
# Define a seperate email list if fail, data steward won't know what error message means
userListFail = ["sasquatch"]
# get a list of usernames from the list of named tuples returned from ListUsers
userNames = [u for u in userList]
userNamesFail = [u for u in userListFail]
# take the userNames list and make email addresses by appending the appropriate suffix.
emailList = [name + MAILDOMAIN for name in userNames]
emailListFail = [name + MAILDOMAIN for name in userNamesFail]
TO = emailList
TOFail = emailListFail
# Grab date for the email
DATE = d
# Use X Drive for testing, once script is working, change to full UNC path IAW File and Folder naming protocol
# e.g. \\\\some_server\COMMON\RMS\Fisheries_Management\Angler_Creel\Data\Tabular\Archive\\
OutFolder = "\\\\some_server\COMMON\Veg\Exotic_Plants\EXOTICS_RESTON_BACKUPS\\"
# First letters need to indicate protocol needs to be same name as SDE Feature Class
# Don't forget underscore at end of feature class name!
OutName = NAME+"_"+time.strftime("%Y%m%d%H%M%S")+"_ARCHIVE.gdb"
GDB = OutFolder+"\\"+OutName
arcpy.env.configKeyword = "DEFAULTS"
arcpy.CreateFileGDB_management(OutFolder, OutName)
#This needs to be the full path to the SDE connection file, which must be in the same folder
# AFter testing and moving to final folder, needs to be UNC path
# e.g \\\\some_server\COMMON\RMS\Fisheries_Management\Angler_Creel\Data\Tabular\Archive\\RESTON BATS.sde
arcpy.env.workspace = "\\\\some_server\COMMON\Veg\Exotic_Plants\EXOTICS_RESTON_BACKUPS\\RESTON_EXOTICS.sde"
# Converts the SDE data, which will always start with [DBNAME].[SCHEMA]
arcpy.FeatureClassToFeatureClass_conversion("EXOTICS.DBO.GRSM_EXOTICS_SITE_POINT",GDB, "GRSM_EXOTICS_SITE_POINT", "", "", "DEFAULTS")
arcpy.FeatureClassToFeatureClass_conversion("EXOTICS.DBO.GRSM_EXOTICS_SITE_POLY",GDB, "GRSM_EXOTICS_SITE_POLY", "", "", "DEFAULTS")
arcpy.Compact_management(GDB)
# Write nothing to log if success.
endtime = datetime.datetime.now()
log.write(" Completed successfully in "
+ str(endtime - starttime) + "\n")
log.write("\n")
log.close()
#Define email message if success
SUBJECT = "Notification of Successful Archive of "+NAME
MSG = NAME + " was archived at "+ GDB +" at "+ str(DATE)
print (MSG)
print (emailList)
# Send an email notifying steward of successful archive
#MESSAGE = "\ From: %s To: %s Subject: %s %s" % (FROM, ", ".join(TO), SUBJECT, MSG)
MESSAGE = "Subject: %s\n\n%s" % (SUBJECT, MSG)
try:
try:
print("Connecting to Server...")
server = smtplib.SMTP(SERVER,PORT)
try:
print("Login...")
try:
print("Sending mail...")
server.sendmail(FROM, TO, MESSAGE)
except Exception as e:
print("Send Error Mail\n" + e.message)
except Exception as e:
print("Error Authentication Server: check the credentials \n" + e.message)
except Exception as e:
print("Error Connecting to Server : check the URL of the server and communications port ( 25 and ' the default ) \n" + e.message)
print("Quit.")
server.quit()
except Exception as e:
print (e.message)
# Something went wrong
except:
# Get the traceback object
tb = sys.exc_info()[2]
tbinfo = traceback.format_tb(tb)[0]
# Concatenate information together concerning
# the error into a message string
pymsg = "PYTHON ERRORS:\nTraceback info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()[1])
msgs = "ArcPy ERRORS:\n" + arcpy.GetMessages(2) + "\n"
# Return python error messages for use in
# script tool or Python Window
arcpy.AddError(pymsg)
arcpy.AddError(msgs)
# Print Python error messages for use in
# Python / Python Window
log.write("" + pymsg + "\n")
log.write("" + msgs + "")
log.close()
# Define email message if something went wrong
SUBJECT = "Notification of Un-Successful Archive of "+NAME
MSG = "Did Not archive "+NAME +" at "+ str(DATE)+ "; " +pymsg + "; " + msgs
print (MSG)
print (emailListFail)
# Send an email notifying steward of unsuccessful archive
#MESSAGE = "\ From: %s To: %s Subject: %s %s" % (FROM, ", ".join(TO), SUBJECT, MSG)
MESSAGE = "Subject: %s\n\n%s" % (SUBJECT, MSG)
try:
try:
print("Connecting to Server...")
server = smtplib.SMTP(SERVER,PORT)
try:
print("Login...")
try:
print("Sending mail...")
server.sendmail(FROM, TOFail, MESSAGE)
except Exception as e:
print("Send Error Mail\n" + e.message)
except Exception as e:
print("Error Authentication Server: check the credentials \n" + e.message)
except Exception as e:
print("Error Connecting to Server : check the URL of the server and communications port ( 25 and ' the default ) \n" + e.message)
print("Quit.")
server.quit()
except Exception as e:
print (e.message)
I prefer using arcpy.CopyFeatures_management() over arcpy.Copy_management() because the latter will fail when there is topology or other stuff that are only supported in geodatabase enterprise. Also, Copy_management() is copying all relationships so when you loop over a dataset, it can tries to copy a feature class many times. Overall, arcpy.CopyFeatures_management() is much faster. Finally, I dont know why, but arcpy.Copy_management() have duplicated some dataset in the output adding _1, _2 , _etc at the end and spreading the feature classes in them.
I wonder how arcpy.CopyFeatures_management() compares with arcpy.FeatureClassToFeatureClass_converseion() in terms of performance. Anyboy have compared both?
This is an older thread but I use a different approach for exporting an enterprise DB to a GDB using replicas. It is fast and simple.
If the GDB already exists the script will sync the changes otherwise it creates a new copy. You can modify the items contained in the featureData list if you only want portions of the database exported. This does require a Standard (ArcEditor) or Advanced (ArcInfo) license.
Create Replica | ArcGIS Desktop
Synchronize Changes | ArcGIS Desktop
Working with geodatabase replicas | ArcGIS Desktop
#!/usr/bin/env python
import sys; sys.dont_write_bytecode = True
import arcpy
sde = r'Database Connections/sde@xxxxxx.sde'
gdb = r'\\server\path\GDBexport.gdb'
rName = r'SDE_GDB'
def buildReplica(folderPath,outputFile):
arcpy.CreateFileGDB_management(folderPath, outputFile, "CURRENT")
myWorkSpace = arcpy.env.workspace = sde
featureData = arcpy.ListDatasets()
arcpy.CreateReplica_management(featureData, 'ONE_WAY_REPLICA',gdb,rName,'FULL','CHILD_DATA_SENDER','USE_DEFAULTS','DO_NOT_REUSE','GET_RELATED', '','DO_NOT_USE_ARCHIVING')
def syncReplica():
arcpy.SynchronizeChanges_management(sde,rName,gdb,'FROM_GEODATABASE1_TO_2','IN_FAVOR_OF_GDB1','BY_OBJECT','')
def main():
try:
if os.path.exists(gdb):
syncReplica()
else:
buildReplica(r'\\server\path','GDBexport.gdb')
except Exception, e:
sys.exit("Error in processing File")
if __name__ == '__main__':
main()
For anyone interested I just did a basic test of copying a single feature class from one file geodatabase to another and the average results for the same feat class were
Copy_management ~20 secs
CopyFeatures_management ~20 secs
FeatureClassToFeatureClass ~12secs