arcpy.da.Walk cannot iterate past a corrupted DWG

3054
1
Jump to solution
08-06-2014 10:23 AM
jasonramsey1
New Contributor II

Hello. I am relatively new to python. I am trying to iterate through all DWG polylines in a directory with multiple (hundreds) of subdirectories. I would like the code to skip any DWG files that it cannot open and continue iterating. The following code works fine, until it comes across any DWG files that are possibly corrupted. Python stalls and i cannot get it to throw an exception.I have isolated the problem to the arcpy.da.walk. I have tried multiple exception types. I am not sure how the onerror function call works in arcpy.da.walk (or even if this is an avenue of exploration). I am beyond frustrated and have done hours of research and cannot get anything to work. Any help would be greatly appreciated.

 

-Jason

 

import arcpy import os import sys from arcpy import env import pdb  ##def WalkErr(onerror): ##    try: ##        import errno         ##        print errno ##        if errno!=errno.ENOENT: raise ##            ##        elif errno == errno.EACCES: ##            print filename,"cannot open"          ##        else: ##            print errno ##            print filename, "Not Found....Continuing" ##    except: ##        print "Something went wrong"         WorkspaceRoot="W:/GIS_Scratch"  env.workspace=WorkspaceRoot  #List workspaces in root directory workspaces=arcpy.ListWorkspaces ("*","Folder") for workspace in workspaces:     env.workspace=workspace     print workspace  #Find workspaces named Drawings     workspaces1=arcpy.ListWorkspaces("*rawing*","Folder")     print workspaces1          for workspaceNext in workspaces1:                 arcpy.env.workspace = workspaceNext         print workspaceNext          #start stepping through code     #    pdb.set_trace()  # Walk through directory to find CAD polylines (Note: "CadDrawing" datatype does not return any polylines, so had to use "FeatureClass")         try:             for dirpath, dirnames, filenames in arcpy.da.Walk(workspaceNext,topdown=True, datatype='FeatureClass',onerror=None,type='Polyline'):                 try:                     print dirpath                     for filename in filenames:                                                          try:                                                          arcpy.env.workspace=filename                             desc = arcpy.Describe(dirpath)                             featureTest=(os.path.join(dirpath,filename))                             print filename                             print featureTest                             print desc.extent.XMin                             filename.close                          except arcpy.ExecuteError:                             arcpy.AddError(arcpy.GetMessages())                         except:                             arcpy.AddError("Non-tool error occurred")                 except OSError as e:                     print e.errno                     print e.filename                     print e.strerror                     continue                                 except Exception as e:                     print "An error has occurred"                     print e                     continue          except OSError as e:             print e.errno             print e.filename             print e.strerror             continue         except Exception as e:             print "An error has occurred"             print e             continue
0 Kudos
1 Solution

Accepted Solutions
jasonramsey1
New Contributor II

Here is the python script which uses multiprocessing to solve my problem. It doesn't use arcpy.da.walk although arcpy.da.walk could be incorporated in the solution. Hopefully this helps somebody.

import glob,os, arcpy, multiprocessing, csv, sys

outputCSV=r'W:\GIS_Scratch\CADFileExtents.csv'

#iterate directory to find all DWGs in Drawings Folder

def MultiProcessDWGs(workspaceEnviron):

    arcpy.env.workspace = workspaceEnviron

    files = glob.glob(workspaceEnviron+"/*/*rawing*/FINAL/")

    for file in files:

        print file

        for dirname, dirnames, filenames in os.walk(file):

            WorkspacePass=dirname

            if __name__=='__main__':

                main(WorkspacePass)

                       

def main(workspace):

    arcpy.env.workspace = workspace

    dataset1 = arcpy.ListDatasets("*.dwg", "Feature")

    ds_list=[os.path.join(workspace,dataset,"Polyline") for dataset in dataset1]

    pool = multiprocessing.Pool()

    pool.map(GetExtent,ds_list)   

    pool.close()

    pool.join()

def GetExtent(ds):

    desc=arcpy.Describe(ds)

    with open(outputCSV, 'ab') as csvfile:

       csvwriter = csv.writer(csvfile)

       csvwriter.writerow([ds.replace('\Polyline',''),desc.extent.XMin,desc.extent.YMin,desc.extent.XMax,desc.extent.YMax,desc.spatialReference.name])

MultiProcessDWGs(r"C:\CAD\CAD_FileDirectory")

print "C:\CAD\CAD_FileDirectory"

View solution in original post

0 Kudos
1 Reply
jasonramsey1
New Contributor II

Here is the python script which uses multiprocessing to solve my problem. It doesn't use arcpy.da.walk although arcpy.da.walk could be incorporated in the solution. Hopefully this helps somebody.

import glob,os, arcpy, multiprocessing, csv, sys

outputCSV=r'W:\GIS_Scratch\CADFileExtents.csv'

#iterate directory to find all DWGs in Drawings Folder

def MultiProcessDWGs(workspaceEnviron):

    arcpy.env.workspace = workspaceEnviron

    files = glob.glob(workspaceEnviron+"/*/*rawing*/FINAL/")

    for file in files:

        print file

        for dirname, dirnames, filenames in os.walk(file):

            WorkspacePass=dirname

            if __name__=='__main__':

                main(WorkspacePass)

                       

def main(workspace):

    arcpy.env.workspace = workspace

    dataset1 = arcpy.ListDatasets("*.dwg", "Feature")

    ds_list=[os.path.join(workspace,dataset,"Polyline") for dataset in dataset1]

    pool = multiprocessing.Pool()

    pool.map(GetExtent,ds_list)   

    pool.close()

    pool.join()

def GetExtent(ds):

    desc=arcpy.Describe(ds)

    with open(outputCSV, 'ab') as csvfile:

       csvwriter = csv.writer(csvfile)

       csvwriter.writerow([ds.replace('\Polyline',''),desc.extent.XMin,desc.extent.YMin,desc.extent.XMax,desc.extent.YMax,desc.spatialReference.name])

MultiProcessDWGs(r"C:\CAD\CAD_FileDirectory")

print "C:\CAD\CAD_FileDirectory"

0 Kudos