Parsing and Writing out Metadata to a file

09-22-2010 02:16 PM
We don't have the GeoPortal Toolkit and Metadata Explorer no longer works.  My users would like a document or something that lists all the feature classes in a Geodatabase by he primary keyword.  Has anyone written a program read through all the FCs in a GDB and write out specific meatadata elements. 

Or a python program that can parse a directory of XML files and do that same thing would be great.
Load the ElementTree module. This is very helpful to parse XML files of metadata.
Here is an example where I read in an old metadata file, fiddle with the dates and rewrite it.
# with altered dates for current month
# using element tree
# create original Metadata with same name as layer
# run this to alter to name_et.xml
# reload into filegeodatabase
# Note there is no tool to unload metadata
# 15 March 2010

import arcgisscripting,sys,os
import elementtree.ElementTree as ET
import sys,os,datetime

def alter(xmlfile,edDate,publishDate,createDate) :
    read xml file for featureclass or table
    change dates to today,loading date and  extract date
    empty processing logs
    write out file with _et suffix
    return file name
    print xmlfile
    tree = ET.parse(xmlfile)
    ## print tree.getroot().tag, tree.getroot().text,tree.getroot().tail,tree.getroot().attrib

    # Edition Date
    elem = list(tree.iter("resEdDate"))[0] 
    # print elem.tag,elem.text
    elem.text = edDate
    ## print elem.text

    # Reference Date 001 (Creation)
    elem = list(tree.iter("refDate"))[0] 
    # print elem.tag,elem.text
    elem.text = createDate
    ## print elem.text
    # note there may be two of these dates
    # DateTypCd 001 and 002
    # Reference Date 002 (Publication)
    if len(list(tree.iter("refDate"))) > 1 :
        elem = list(tree.iter("refDate"))[1]
        # print elem.tag,elem.text
        elem.text = publishDate
    else :
        print "skipping publication date",xmlfile
    ## print elem.text
    # clear out lineag if it exists
    try :
        lin = list(tree.iter("lineage"))[0]
        # print lin.tag
        lin.text = "Cleared"
    except :
        gp.AddMessage("skipping clear lineage")
    outfile = xmlfile.replace(".","_et.")
    return outfile

# ---------------------- main ----------------------

try :
    publishDate = sys.argv[1]
    createDate  = sys.argv[2]
    if createDate == '#' :
        createDate = publishDate
except :
    today =
    firstSat = today.replace(day=1) + datetime.timedelta(5 -
    publishDate = firstSat.strftime("%Y%m%d")
    createDate  = publishDate

# override
publishDate = '20100804'
createDate = '20100710'

gp = arcgisscripting.create(9.3)

edDate = str("-","")
edDate = '20100914'
gp.AddWarning(edDate+" edit date")
gp.AddWarning(publishDate+" publish date")
gp.AddWarning(createDate+" create date")

ws = "e:/crs/corax.gdb"
metasrc = "e:/crs/metadata"
gp.Workspace = ws

print ws
print metasrc

lstFC = gp.ListFeatureClasses("*")

for fc in lstFC :
    # print fc
    fcxml = metasrc+"/"+fc+".xml"
    if os.path.exists(fcxml) :
        etxml = alter(fcxml,edDate,publishDate,createDate) 
        gp.MetadataImporter_conversion (etxml,fc)
        print fc,"updated"
    else :
        print etxml,"not found"
lstTab = gp.ListTables("*")
for tab in lstTab :
    # print tab
    tabxml = metasrc+"/"+tab+".xml"
    if os.path.exists(tabxml) :
        etxml = alter(tabxml,edDate,publishDate,createDate) 
        gp.MetadataImporter_conversion (etxml,tab)
        print tab,"updated"
        gp.AddMessage(tab+" updated")
    else :
        print etxml,"not found"
        gp.AddError(etxml+" not found")
