Using Python To Export Enterprise GDB to File GDB

3962
4
Jump to solution
08-01-2018 12:33 PM
JordanMiller4
Occasional Contributor III

Recently, we've been having issues getting our python script to copy our enterprise geodatabase to a new fresh file geodatabase. When it's copying one of the feature data sets to the local file geodatabase it generates an error saying a table already exists and skips over the entire data set which holds 90% of our data and onto the next feature data set. Does anyone have a working python script that exports an entire enterprise geodatabase?

import time, os, datetime, sys, logging, logging.handlers, shutil, traceback
import arcpy

########################## user defined functions ##############################

def getDatabaseItemCount(workspace):
    log = logging.getLogger("script_log")
    """returns the item count in provided database"""
    arcpy.env.workspace = workspace
    feature_classes = []
    log.info("Compiling a list of items in {0} and getting count.".format(workspace))
    for dirpath, dirnames, filenames in arcpy.da.Walk(workspace,datatype="Any",type="Any"):
        for filename in filenames:
            feature_classes.append(os.path.join(dirpath, filename))
    log.info("There are a total of {0} items in the database".format(len(feature_classes)))
    return feature_classes, len(feature_classes)

def replicateDatabase(dbConnection, targetGDB):
    log = logging.getLogger("script_log")
    startTime = time.time()

    if arcpy.Exists(dbConnection):
        featSDE,cntSDE = getDatabaseItemCount(dbConnection)
        log.info("Geodatabase being copied: %s -- Feature Count: %s" %(dbConnection, cntSDE))
        if arcpy.Exists(targetGDB):
            featGDB,cntGDB = getDatabaseItemCount(targetGDB)
            log.info("Old Target Geodatabase: %s -- Feature Count: %s" %(targetGDB, cntGDB))
            try:
                shutil.rmtree(targetGDB)
                log.info("Deleted Old %s" %(os.path.split(targetGDB)[-1]))
            except Exception as e:
                log.info(e)

        GDB_Path, GDB_Name = os.path.split(targetGDB)
        log.info("Now Creating New %s" %(GDB_Name))
        arcpy.CreateFileGDB_management(GDB_Path, GDB_Name)

        arcpy.env.workspace = dbConnection

        try:
            datasetList = [arcpy.Describe(a).name for a in arcpy.ListDatasets()]
        except Exception, e:
            datasetList = []
            log.info(e)
        try:
            featureClasses = [arcpy.Describe(a).name for a in arcpy.ListFeatureClasses()]
        except Exception, e:
            featureClasses = []
            log.info(e)
        try:
            tables = [arcpy.Describe(a).name for a in arcpy.ListTables()]
        except Exception, e:
            tables = []
            log.info(e)

        #Compiles a list of the previous three lists to iterate over
        allDbData = datasetList + featureClasses + tables

        for sourcePath in allDbData:
            targetName = sourcePath.split('.')[-1]
            targetPath = os.path.join(targetGDB, targetName)
           
            if not arcpy.Exists(targetPath):
                try:
                    log.info("Attempting to Copy %s to %s" %(targetName, targetPath))
                    arcpy.Copy_management(sourcePath, targetPath)
                    log.info("Finished copying %s to %s" %(targetName, targetPath))
                except Exception as e:
                    log.info("Unable to copy %s to %s" %(targetName, targetPath))
                    log.info(e)
            else:
                log.info("%s already exists....skipping....." %(targetName))

        featGDB,cntGDB = getDatabaseItemCount(targetGDB)
        log.info("Completed replication of %s -- Feature Count: %s" %(dbConnection, cntGDB))

    else:
        log.info("{0} does not exist or is not supported! \
        Please check the database path and try again.".format(dbConnection))

#####################################################################################

def formatTime(x):
    minutes, seconds_rem = divmod(x, 60)
    if minutes >= 60:
        hours, minutes_rem = divmod(minutes, 60)
        return "%02d:%02d:%02d" % (hours, minutes_rem, seconds_rem)
    else:
        minutes, seconds_rem = divmod(x, 60)
        return "00:%02d:%02d" % (minutes, seconds_rem)

if __name__ == "__main__":
    startTime = time.time()
    now = datetime.datetime.now()

    ############################### user variables #################################
    '''change these variables to the location of the database being copied, the target
    database location and where you want the log to be stored'''


    logPath = "C:/temp/Log"
    databaseConnection = r"C:\Users\jnmiller\AppData\Roaming\ESRI\Desktop10.6\ArcCatalog\SDE.sde"

    ############################### logging items ###################################
    # Make a global logging object.
    logName = os.path.join(logPath,(now.strftime("%Y-%m-%d_%H-%M.log")))

    log = logging.getLogger("script_log")
    log.setLevel(logging.INFO)

    h1 = logging.FileHandler(logName)
    h2 = logging.StreamHandler()

    f = logging.Formatter("[%(levelname)s] [%(asctime)s] [%(lineno)d] - %(message)s",'%m/%d/%Y %I:%M:%S %p')

    h1.setFormatter(f)
    h2.setFormatter(f)

    h1.setLevel(logging.INFO)
    h2.setLevel(logging.INFO)

    log.addHandler(h1)
    log.addHandler(h2)

    log.info('Script: {0}'.format(os.path.basename(sys.argv[0])))

    try:
        ########################## function calls ######################################

        replicateDatabase(databaseConnection, targetGDB)

        ################################################################################
    except Exception, e:
        log.exception(e)

    totalTime = formatTime((time.time() - startTime))
    log.info('--------------------------------------------------')
    log.info("Script Completed After: {0}".format(totalTime))
    log.info('--------------------------------------------------')
‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍
1 Solution

Accepted Solutions
LanceCole
MVP Regular Contributor

Maxime Demers,‌

This is an older thread but I use a different approach for exporting an enterprise DB to a GDB using replicas.  It is fast and simple. 

If the GDB already exists the script will sync the changes otherwise it creates a new copy.  You can modify the items contained in the featureData list if you only want portions of the database exported.  This does require a Standard (ArcEditor) or Advanced (ArcInfo) license.

Create Replica | ArcGIS Desktop

Synchronize Changes | ArcGIS Desktop

Working with geodatabase replicas | ArcGIS Desktop

#!/usr/bin/env python
import sys; sys.dont_write_bytecode = True
import arcpy

sde = r'Database Connections/sde@xxxxxx.sde'
gdb = r'\\server\path\GDBexport.gdb'
rName = r'SDE_GDB'

def buildReplica(folderPath,outputFile):
arcpy.CreateFileGDB_management(folderPath, outputFile, "CURRENT")
myWorkSpace = arcpy.env.workspace = sde
featureData = arcpy.ListDatasets()
arcpy.CreateReplica_management(featureData, 'ONE_WAY_REPLICA',gdb,rName,'FULL','CHILD_DATA_SENDER','USE_DEFAULTS','DO_NOT_REUSE','GET_RELATED', '','DO_NOT_USE_ARCHIVING')

def syncReplica():
arcpy.SynchronizeChanges_management(sde,rName,gdb,'FROM_GEODATABASE1_TO_2','IN_FAVOR_OF_GDB1','BY_OBJECT','')

def main():
try:
if os.path.exists(gdb):
syncReplica()
else:
buildReplica(r'\\server\path','GDBexport.gdb')
except Exception, e:
sys.exit("Error in processing File")

if __name__ == '__main__':
main()‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍

View solution in original post

4 Replies
ThomasColson
MVP Frequent Contributor

I like your script better, but in a pinch, this will work (Pro): 

import sys, string, os, arcpy, calendar, datetime, traceback,smtplib
import arcpy
from arcpy import env
from arcgis.gis import GIS
from subprocess import call

# Set the name of the feature service. This will control many variable in this script
NAME = "GRSM_EXOTICS"
# Define log setting
try:
    d = datetime.datetime.now()
    log = open("C:\\PYTHON_LOGS\LOG."+NAME+".txt","a")
    log.write("----------------------------" + "\n")
    log.write("----------------------------" + "\n")
    log.write("Log: " + str(d) + "\n")
    log.write("\n")
# Start process...
    starttime = datetime.datetime.now()
    log.write("Begin process:\n")
    log.write("     Process started at " + str(starttime) + "\n")
    log.write("\n")
     
    ### Start setting variables
    # Mail Server Settings
    SERVER = "1.2.3.4"
    PORT = "25"
    FROM = "BINKY_THE_CLOWN@clown.com"
    MAILDOMAIN = '@clown.com'
    # Data Steward getting the email. Needs to be their email address...without @nps.gov at the end
    userList=["the_joker"]
    # Define a seperate email list if fail, data steward won't know what error message means
    userListFail = ["sasquatch"]
    # get a list of usernames from the list of named tuples returned from ListUsers
    userNames = [u for u in userList]
    userNamesFail = [u for u in userListFail]
    # take the userNames list and make email addresses by appending the appropriate suffix.
    emailList = [name +  MAILDOMAIN for name in userNames]
    emailListFail = [name +  MAILDOMAIN for name in userNamesFail]
    TO = emailList
    TOFail = emailListFail
    # Grab date for the email
    DATE = d
  
    # Use X Drive for testing, once script is working, change to full UNC path IAW File and Folder naming protocol
    # e.g. \\\\some_server\COMMON\RMS\Fisheries_Management\Angler_Creel\Data\Tabular\Archive\\
    OutFolder = "\\\\some_server\COMMON\Veg\Exotic_Plants\EXOTICS_RESTON_BACKUPS\\"
    # First letters need to indicate protocol needs to be same name as SDE Feature Class
    # Don't forget underscore at end of feature class name!
    OutName = NAME+"_"+time.strftime("%Y%m%d%H%M%S")+"_ARCHIVE.gdb"
    GDB = OutFolder+"\\"+OutName
    arcpy.env.configKeyword = "DEFAULTS"
    arcpy.CreateFileGDB_management(OutFolder, OutName)
    #This needs to be the full path to the SDE connection file, which must be in the same folder
    # AFter testing and moving to final folder, needs to be UNC path
    # e.g \\\\some_server\COMMON\RMS\Fisheries_Management\Angler_Creel\Data\Tabular\Archive\\RESTON BATS.sde
    arcpy.env.workspace = "\\\\some_server\COMMON\Veg\Exotic_Plants\EXOTICS_RESTON_BACKUPS\\RESTON_EXOTICS.sde"
    # Converts the SDE data, which will always start with [DBNAME].[SCHEMA]
    arcpy.FeatureClassToFeatureClass_conversion("EXOTICS.DBO.GRSM_EXOTICS_SITE_POINT",GDB, "GRSM_EXOTICS_SITE_POINT", "", "", "DEFAULTS")
    arcpy.FeatureClassToFeatureClass_conversion("EXOTICS.DBO.GRSM_EXOTICS_SITE_POLY",GDB, "GRSM_EXOTICS_SITE_POLY", "", "", "DEFAULTS")
    arcpy.Compact_management(GDB)

     
    # Write nothing to log if success.
    endtime = datetime.datetime.now()
    log.write("     Completed successfully in "
           + str(endtime - starttime) + "\n")
    log.write("\n")
    log.close()
    #Define email message if success
    SUBJECT = "Notification of Successful Archive of "+NAME
    MSG = NAME + " was archived at  "+ GDB +" at "+ str(DATE)
    print (MSG)
    print (emailList)

    # Send an email notifying steward of successful archive
    #MESSAGE = "\ From: %s To: %s Subject: %s %s" % (FROM, ", ".join(TO), SUBJECT, MSG)
    MESSAGE = "Subject: %s\n\n%s" % (SUBJECT, MSG)
    try:
            try:
                print("Connecting to Server...")
                server = smtplib.SMTP(SERVER,PORT)
                try:
                    print("Login...")
                    try:
                        print("Sending mail...")
                        server.sendmail(FROM, TO, MESSAGE)
                    except Exception as e:
                        print("Send Error Mail\n" + e.message)
                except Exception as e:
                    print("Error Authentication Server: check the credentials \n" + e.message)
            except Exception as e:
                print("Error Connecting to Server : check the URL of the server and communications port ( 25 and ' the default ) \n" + e.message)
    
            print("Quit.")
            server.quit()
    
    except Exception as e:
            print (e.message)
# Something went wrong
except:
   
     # Get the traceback object
    tb = sys.exc_info()[2]
    tbinfo = traceback.format_tb(tb)[0]
     # Concatenate information together concerning
     # the error into a message string
    pymsg = "PYTHON ERRORS:\nTraceback info:\n" + tbinfo + "\nError Info:\n" + str(sys.exc_info()[1])
    msgs = "ArcPy ERRORS:\n" + arcpy.GetMessages(2) + "\n"
    # Return python error messages for use in
    # script tool or Python Window
    arcpy.AddError(pymsg)
    arcpy.AddError(msgs)
    # Print Python error messages for use in
    # Python / Python Window
    log.write("" + pymsg + "\n")
    log.write("" + msgs + "")
    log.close()
    # Define email message if something went wrong
    SUBJECT = "Notification of Un-Successful Archive of "+NAME
    MSG = "Did Not archive "+NAME +" at "+ str(DATE)+ "; " +pymsg + "; " + msgs
    print (MSG)
    print (emailListFail)

    # Send an email notifying steward of unsuccessful archive
    #MESSAGE = "\ From: %s To: %s Subject: %s %s" % (FROM, ", ".join(TO), SUBJECT, MSG)
    MESSAGE = "Subject: %s\n\n%s" % (SUBJECT, MSG)
    try:
            try:
                print("Connecting to Server...")
                server = smtplib.SMTP(SERVER,PORT)
                try:
                    print("Login...")
                    try:
                        print("Sending mail...")
                        server.sendmail(FROM, TOFail, MESSAGE)
                    except Exception as e:
                        print("Send Error Mail\n" + e.message)
                except Exception as e:
                    print("Error Authentication Server: check the credentials \n" + e.message)
            except Exception as e:
                print("Error Connecting to Server : check the URL of the server and communications port ( 25 and ' the default ) \n" + e.message)
    
            print("Quit.")
            server.quit()
    
    except Exception as e:
            print (e.message)




0 Kudos
MaximeDemers
Occasional Contributor III

I prefer using arcpy.CopyFeatures_management() over arcpy.Copy_management() because the latter will fail when there is topology or other stuff that are only supported in geodatabase enterprise. Also, Copy_management() is copying all relationships so when you loop over a dataset, it can tries to copy a feature class many times. Overall, arcpy.CopyFeatures_management() is much faster. Finally, I dont know why, but arcpy.Copy_management() have duplicated some dataset in the output adding _1, _2 , _etc at the end and spreading the feature classes in them.

I wonder how arcpy.CopyFeatures_management()  compares with arcpy.FeatureClassToFeatureClass_converseion() in terms of performance. Anyboy have compared both?

0 Kudos
LanceCole
MVP Regular Contributor

Maxime Demers,‌

This is an older thread but I use a different approach for exporting an enterprise DB to a GDB using replicas.  It is fast and simple. 

If the GDB already exists the script will sync the changes otherwise it creates a new copy.  You can modify the items contained in the featureData list if you only want portions of the database exported.  This does require a Standard (ArcEditor) or Advanced (ArcInfo) license.

Create Replica | ArcGIS Desktop

Synchronize Changes | ArcGIS Desktop

Working with geodatabase replicas | ArcGIS Desktop

#!/usr/bin/env python
import sys; sys.dont_write_bytecode = True
import arcpy

sde = r'Database Connections/sde@xxxxxx.sde'
gdb = r'\\server\path\GDBexport.gdb'
rName = r'SDE_GDB'

def buildReplica(folderPath,outputFile):
arcpy.CreateFileGDB_management(folderPath, outputFile, "CURRENT")
myWorkSpace = arcpy.env.workspace = sde
featureData = arcpy.ListDatasets()
arcpy.CreateReplica_management(featureData, 'ONE_WAY_REPLICA',gdb,rName,'FULL','CHILD_DATA_SENDER','USE_DEFAULTS','DO_NOT_REUSE','GET_RELATED', '','DO_NOT_USE_ARCHIVING')

def syncReplica():
arcpy.SynchronizeChanges_management(sde,rName,gdb,'FROM_GEODATABASE1_TO_2','IN_FAVOR_OF_GDB1','BY_OBJECT','')

def main():
try:
if os.path.exists(gdb):
syncReplica()
else:
buildReplica(r'\\server\path','GDBexport.gdb')
except Exception, e:
sys.exit("Error in processing File")

if __name__ == '__main__':
main()‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍‍

View solution in original post

JackGreen2
New Contributor

For anyone interested I just did a basic test of copying a single feature class from one file geodatabase to another and the average results for the same feat class were

Copy_management ~20 secs

CopyFeatures_management ~20 secs

FeatureClassToFeatureClass ~12secs 

0 Kudos