Calculate multiple feature class fields from stand alone table

ChristopherKesler · ‎06-01-2011

Greetings,

I have two feature classes and four tables stored in a file based geodatabase that periodically need attributes recalculated based on data stored in and Excel spreadsheet. Any ideas on how to establish a Join then Calculate the attributes based on columns in Excel using Python?
The Joins would all be 1 to 1.
The calculations are simple, ie.
OilField_Locations.FIELD_ID = !OilFields$.FIELD_ID! (calculate fc field = column in Excel)
but I have around 250 fields to calculate and do not have any desire to go through them manually.

Thanks!

KimOllivier · ‎06-02-2011

1. Load the spreadsheet into a table.
If you dump the spreadsheet to a .CSV file first and define a Schema.ini then it will load nicely as a formatted table using the CopyTable_management tool. Schema.ini is a default Windows function.
Note that a spreadsheet is not a database and is not supported to be reliably loaded with a satisfactory schema because columns are not defined, only formatted. Default widths are 255 chars for example.

2. If you have to update multiple sheets from one table, create a dictionary using the key value and the attribute.

3. Run an updateCursor on each target table and push the new values in using the dictionary as a lookup.

Using the join tools are a bit slow.

ChristopherKesler · ‎06-03-2011

Thank you for the direction. Creatings CSV files from the six tables in my Excel workbook may be an acceptable alternative. I am still looking for a more automated approach. I established the Joins manually in an ArcMap document for now and will focus on the field calculations.

KimOllivier · ‎06-05-2011

If you want a more automated approach, you can use the dispatch com interface that we used to use for ArcGIS9. This works well to get direct access to a spreadsheet and each of the sheets.
This is well explained in "Python Programming on Win 32" by Hammond and Robinson.

o = win32com.client.Dispatch("Excel.Application")
o.Visible = 1
o.Workbooks.Add() # for office 97 �?? 95 a bit different!
o.Cells(1,1).Value = "Hello"

To make accessing Excel a bit easier, Mark Hammond suggested a small module to get the data which I called exceldemos.py:

# Excel module from Mark Hammond
# Python Programming on Win 32
import win32com.client
import win32com.client.dynamic
from pywintypes import UnicodeType, TimeType

import pprint
import os
import time
import string


class easyExcel:
    """A utility to make it easier to get at Excel.  Remembering
    to save the data is your problem, as is  error handling.
    Operates on one workbook at a time."""
    
    def __init__(self, filename=None):
        self.xlApp = win32com.client.dynamic.Dispatch('Excel.Application')
        if filename:
            self.filename = filename
            self.xlBook = self.xlApp.Workbooks.Open(filename)
        else:
            self.xlBook = self.xlApp.Workbooks.Add()
            self.filename = ''  
    
    def save(self, newfilename=None):
        if newfilename:
            self.filename = newfilename
            self.xlBook.SaveAs(newfilename)
        else:
            self.xlBook.Save()

    def close(self):
        self.xlBook.Close(SaveChanges=0)
        del self.xlApp
   
    def show(self):
        self.xlApp.Visible = 1
        
    def hide(self):
        self.xlApp.Visible = 0
        
    def getBooks(self):
        return self.xlApp.Worksheets

    def getName(self,sheet):
        return self.xlBook.Worksheets(sheet).Name
#
#    now for the helper methods
#
    def getCell(self, sheet, row, col):
        "Get value of one cell"
        sht = self.xlBook.Worksheets(sheet)
        return sht.Cells(row, col).Value
    
    def setCell(self, sheet, row, col, value):
        "set value of one cell"
        sht = self.xlBook.Worksheets(sheet)
        sht.Cells(row, col).Value = value
    
    def getRange(self, sheet, row1, col1, row2, col2):
        "return a 2d array (i.e. tuple of tuples)"
        sht = self.xlBook.Worksheets(sheet)
        return sht.Range(sht.Cells(row1, col1), sht.Cells(row2, col2)).Value
    
    def setRange(self, sheet, leftCol, topRow, data):
        """insert a 2d array starting at given location. 
        Works out the size needed for itself"""
        
        bottomRow = topRow + len(data) - 1
        rightCol = leftCol + len(data[0]) - 1
        sht = self.xlBook.Worksheets(sheet)
        sht.Range(
            sht.Cells(topRow, leftCol), 
            sht.Cells(bottomRow, rightCol)
            ).Value = data

    def getContiguousRange(self, sheet, row, col):
        """Tracks down and across from top left cell until it
        encounters blank cells; returns the non-blank range.
        Looks at first row and column; blanks at bottom or right
        are OK and return None witin the array"""
        
        sht = self.xlBook.Worksheets(sheet)
        
        # find the bottom row
        bottom = row
        while sht.Cells(bottom + 1, col).Value not in [None, '']:
            bottom = bottom + 1
        
        # right column
        right = col
        while sht.Cells(row, right + 1).Value not in [None, '']:
            right = right + 1
        
        return sht.Range(sht.Cells(row, col), sht.Cells(bottom, right)).Value

    def getHeaderRange(self, sheet, row, col):
        """Tracks  across from left cell until it
        encounters blank cells; returns the non-blank range.
        Looks at first row and column; blanks at bottom or right
        are OK and return None within the array"""
        
        sht = self.xlBook.Worksheets(sheet)
        
        # find the bottom row
        # bottom = row
        # while sht.Cells(bottom + 1, col).Value not in [None, '']:
        #     bottom = bottom + 1
        
        # right column
        right = col
        while sht.Cells(row, right + 1).Value not in [None, '']:
            right = right + 1
        
        return sht.Range(sht.Cells(row, col), sht.Cells(row, right)).Value    
 
    def fixStringsAndDates(self, aMatrix):
        # converts all unicode strings and times
        newmatrix = []
        for row in aMatrix:
            newrow = []
            for cell in row:
                if type(cell) is UnicodeType:
                    newrow.append(str(cell))
                elif type(cell) is TimeType:
                    newrow.append(int(cell))
                else:
                    newrow.append(cell)
            newmatrix.append(tuple(newrow))
        return newmatrix
    
        
def test():
    # puts things in a new sheet which it does not save
    spr = easyExcel()
    spr.show()
    
    input = 'hello'
    spr.setCell('Sheet1',1,4, input)
    output = spr.getCell('Sheet1',1,4)
    print output
    assert input == output, 'setCell/getCell failed'
    
    input = []
    for i in range(10):
        row = []
        for j in range(4):
            row.append(str('(%d,%d)'% (j, i)))
        input.append(tuple(row))
    
    spr.setRange('Sheet1',2,2,input)
    
    output = spr.getRange('Sheet1',2,2,11,5)
    # get rid of unicode strings
    output = spr.fixStringsAndDates(output)
    assert input == output, 'setRange/getRange test failed'
    
    #get a contiguous range
    output2 = spr.getContiguousRange('Sheet1',2,2)
    dimensions = (len(output2), len(output2[0]))
    assert dimensions == (10, 4), 'getContiguousRange failed'
    
    print 'passed!'
    

# if __name__ == '__main__' :
#     test()

Then to get each sheet you can create a Python loop something line this:
The spreadsheet had data split into several tables, with headers. I needed to transpose the data and calculate some cross sections from a centreline.
In my case I exported to CSV files and loaded them using another obsolete tool called CreateFeaturesFromText, but you could write the data directly into a table using a cursor.

# load reachs and crosssections from a spreadsheet
# getwater3.py
# get channel centreline and number nodes uniquely
# dropping off duplicates
# calculate bearing of nodes in geographic degrees
# Kimo
# 6 October 2005
# upgrade to average angles for middle points of a reach
# 7 October 2005
# 10 October 2005
# offset one more row to allow for max water level calc
# does not get all the other water levels, just bottom and max
# better to load database of levels for each section for joining later
# so that we can create a surface for each time step from same cross sections
# ref first field for sorting and joining
# pad chainage
# removed max row again
# but too late, have to have it.
# 
# import win32com.client
import win32com.client.dynamic
import os,sys
from math import *

# sys.path.append("d:\\project\\python")
import exceldemos
 
homedir = "e:/toolbox/scratch"
os.chdir(homedir)

xlApp = win32com.client.dynamic.Dispatch('Excel.Application')
filename = "e:/example/GISTransfer.xls"

spr = exceldemos.easyExcel(filename)

f1 = open("channel.txt","w")     # polyline generate file
f2 = open("channel_att.txt","w") # polyline attributes
f3 = open("channel_node.txt","w")# xsection nodes, can load as XY events
f4 = open("channel_skip.txt","w")# rejects - duplicate nodes
f5 = open("channel_xsec.txt","w")# xsection ref,x,y,theta for joining to xsections

# there are multiple worksheets
# so loop over all to combine into single output
max = 0
lstRef = []
try :
    for b in spr.getBooks() :
        max += 1
        print "workbook: ",b.Name
    print "Workbooks found: ",max
    row = 0
    # output file headers
    print >> f1,'Polyline'   # for Toolbox loader
    print >> f2,'rid,reach'
    print >> f3,'ref,nodeid,x,y,z,w,theta,rid,reach,chain'
    print >> f4,'rec,x,y,reach,chain,z,w'
    # no header for f5 xsec
    # print >> f5,'ref,x,y,theta,maxwater'
    rec = 1
    id = 0
    rn = 0
    # skip first sheet
    for sheetindex in range(2,5) :
        
        # get number of columns
        header = spr.getHeaderRange(sheetindex,3,1)
        columns = len(header[0])
        print "Length of header",columns
        
        print "Beginning :",spr.getName(sheetindex)
        aHeader = spr.getRange(sheetindex,1,1,6,columns)
        # transpose rows and columns
        headerpivot =  [[r[col] for r in aHeader] for col in range(len(aHeader[0]))]
        # initialize counters
        rname = ""
        rx = 0
        ry = 0
        i = 0
        rej = 0
        colcount = len(headerpivot[1:])
        print "Columns",colcount
        for (a,b,c,d,e,w) in headerpivot[1:] :   
            # print rec,a,b,c,d,e,w
            # a X coordinate
            # b Y coordinate
# ..... snip to fit forum text limit .....
    print >> f1,"END"
    print "records :",rec,"nodes",id,"reaches",rn,"rejects",rej
    f1.close()
    f2.close()
    f3.close()
    f4.close()
    f5.close()
    spr.close()
    
    
    del xlApp
except :
    print "cleanup close"
    f1.close()
    f2.close()
    f3.close()
    f4.close()
    f5.close()
    spr.close()
    
    del xlApp

The spreadsheet is a bit large to include, I hope this is enough of a clue to encourage you to buy the book, which is now available in digital format.
http://oreilly.com/catalog/9781565926219