At ArcGIS 10.2.1 there are new python modules included called XLRD and XLWT which are excellent modules for reading spreadsheets and extracting the data. It can handle multiple sheets and can pick out any cell or range of cells. Don't bother trying to use the new Excel to Table tool unless the data is already in a suitable format with valid field names etc.Here is an example of reading the 4th sheet that was not formatted for a database, no header field names and lots of comments all over the sheet. I needed three columns in the middle of the sheet.By the way watch for new uses of Sqlite with spatial extensions as Spatialite and the OSGEO recent standard http://www.opengeospatial.org/standards/geopackage#-------------------------------------------------------------------------------
# Name: loadXLS
# read Table 4 from 68 spreadsheets to get Unoccupied Dwellings
# Purpose: read Stats spreadsheets into a Sqlite database that cannot
# be read by FME, ArcGIS or anything other than Excel
# saves writing out to CSV first
# Author: kimo
#
# Created: 08/05/2014
# Copyright: (c) kimo 2014
# Licence: Creative Commons
#-------------------------------------------------------------------------------
import xlrd # installed with ArcGIS 10.2 by default
import sys,os
import re
import glob
import sqlite3
import arcpy # only needed for final message
pat = re.compile("^[0-9]{6}") # pattern of AU code in AU name in Column 0 ("A")
dType = {0:'Empty', 1:'Text', 2:'Number', 3:'Date', 4:'Boolean', 5:'Error', 6:'Blank'}
def extract_tla(filename):
'''extract Unoccupied Dwellings'''
workbook = xlrd.open_workbook(filename)
worksheet = workbook.sheet_by_name('Table 4')
num_rows = worksheet.nrows
num_cells = worksheet.ncols
rec = 0
for curr_row in range(num_rows):
curr_col = 0
cellAn = worksheet.cell_value(curr_row, curr_col)
cell_type0 = worksheet.cell_type(curr_row, curr_col)
if cell_type0 == 1 and pat.match(cellAn):
a = str(worksheet.cell_value(curr_row,curr_col))
val = [a[0:6],a[7:]]
for curr_col in range(1,4):
count = worksheet.cell_value(curr_row,curr_col)
if count == '-':
n = 0
else :
n = int(count)
val.append(n)
rec+=1
#print val
insert_ud_row(udTable,val)
# print "Records found",rec
# fields au2013,auname,ud2001,ud2006,ud2013
return rec
def create_ud_tab(conn,udTable):
# (re-)create a table
c = conn.cursor()
c.execute('''DROP TABLE IF EXISTS '''+ udTable)
c.execute('''CREATE TABLE '''+udTable+''' (
au text NOT NULL,
auname text ,
ud2001 integer,
ud2006 integer,
ud2013 integer )''')
conn.commit()
c.close()
return
def insert_ud_row(outTab,row):
'''insert a row
requires a sqlite database to be connected
and a cursor to be open
'''
try:
cmd = "INSERT INTO " + outTab + " VALUES (" + (",?"*(len(row)))[1:] + ")"
# implied transaction already opened
c.execute(cmd,row)
except Exception,msg:
print msg
print row
print cmd
conn.rollback()
conn.close()
sys.exit()
# -------------------------------------------------------------------------
if __name__ == '__main__':
try:
source_folder = sys.arg[1]
except:
source_folder = 'C:/data/census2013/source'
os.chdir(source_folder)
conn = sqlite3.connect('C:/data/census2013/dwelling.sqlite')
udTable = 'DwellingEmptyAU'
create_ud_tab(conn,udTable)
# Open a cursor and insert records from all the spreadsheets
c = conn.cursor()
print "sqlite database successfully opened"
trec = 0
lstXLS = glob.glob('*.xls')
for x in lstXLS:
rec = extract_tla(x)
trec+=rec
print rec,x
conn.commit()
c.close()
conn.close()
msg = '{} Total records inserted into {}'.format(trec,udTable)
print msg
arcpy.AddMessage(msg)