Select to view content in your preferred language

Parsing and Writing out Metadata to a file

696
1
09-22-2010 02:16 PM
RandyKreuziger
Frequent Contributor
We don't have the GeoPortal Toolkit and Metadata Explorer no longer works.  My users would like a document or something that lists all the feature classes in a Geodatabase by he primary keyword.  Has anyone written a program read through all the FCs in a GDB and write out specific meatadata elements. 

Or a python program that can parse a directory of XML files and do that same thing would be great.
0 Kudos
1 Reply
KimOllivier
Honored Contributor
Load the ElementTree module. This is very helpful to parse XML files of metadata.
Here is an example where I read in an old metadata file, fiddle with the dates and rewrite it.
 # LoadMetadata.py
# with altered dates for current month
# using element tree
# create original Metadata with same name as layer
# run this to alter to name_et.xml
# reload into filegeodatabase
# Note there is no tool to unload metadata
# 15 March 2010

import arcgisscripting,sys,os
import elementtree.ElementTree as ET
import sys,os,datetime
print
print

def alter(xmlfile,edDate,publishDate,createDate) :
    """
    read xml file for featureclass or table
    change dates to today,loading date and  extract date
    empty processing logs
    write out file with _et suffix
    return file name
    """
    print xmlfile
    tree = ET.parse(xmlfile)
    ## print tree.getroot().tag, tree.getroot().text,tree.getroot().tail,tree.getroot().attrib

    # Edition Date
    elem = list(tree.iter("resEdDate"))[0] 
    # print elem.tag,elem.text
    elem.text = edDate
    ## print elem.text

    # Reference Date 001 (Creation)
    elem = list(tree.iter("refDate"))[0] 
    # print elem.tag,elem.text
    elem.text = createDate
    ## print elem.text
    # note there may be two of these dates
    # DateTypCd 001 and 002
    # Reference Date 002 (Publication)
    if len(list(tree.iter("refDate"))) > 1 :
        elem = list(tree.iter("refDate"))[1]
        # print elem.tag,elem.text
        elem.text = publishDate
    else :
        print "skipping publication date",xmlfile
    ## print elem.text
    # clear out lineag if it exists
    try :
        lin = list(tree.iter("lineage"))[0]
        # print lin.tag
        lin.clear()
        lin.text = "Cleared"
    except :
        gp.AddMessage("skipping clear lineage")
    outfile = xmlfile.replace(".","_et.")
    tree.write(outfile)
    return outfile

# ---------------------- main ----------------------

try :
    publishDate = sys.argv[1]
    createDate  = sys.argv[2]
    if createDate == '#' :
        createDate = publishDate
    gp.AddMessage(publishDate+type(publishDate))
except :
    today = datetime.datetime.now()
    firstSat = today.replace(day=1) + datetime.timedelta(5 - datetime.datetime.now().replace(day=1).weekday())
    publishDate = firstSat.strftime("%Y%m%d")
    createDate  = publishDate

# override
publishDate = '20100804'
createDate = '20100710'

gp = arcgisscripting.create(9.3)

os.chdir("e:/crs/metadata")
edDate = str(datetime.datetime.now().date()).replace("-","")
edDate = '20100914'
gp.AddWarning(edDate+" edit date")
gp.AddWarning(publishDate+" publish date")
gp.AddWarning(createDate+" create date")

ws = "e:/crs/corax.gdb"
metasrc = "e:/crs/metadata"
gp.Workspace = ws

os.chdir(metasrc)
print
print ws
print metasrc
print

lstFC = gp.ListFeatureClasses("*")

for fc in lstFC :
    # print fc
    fcxml = metasrc+"/"+fc+".xml"
    if os.path.exists(fcxml) :
        etxml = alter(fcxml,edDate,publishDate,createDate) 
        gp.MetadataImporter_conversion (etxml,fc)
        print fc,"updated"
    else :
        pass
        print etxml,"not found"
        
lstTab = gp.ListTables("*")
for tab in lstTab :
    # print tab
    tabxml = metasrc+"/"+tab+".xml"
    if os.path.exists(tabxml) :
        etxml = alter(tabxml,edDate,publishDate,createDate) 
        gp.MetadataImporter_conversion (etxml,tab)
        print tab,"updated"
        gp.AddMessage(tab+" updated")
    else :
        pass
        print etxml,"not found"
        gp.AddError(etxml+" not found")
0 Kudos