Fixed-Length Text File to Table using Python

AdamCrateau1 · ‎03-04-2014

Hello,

I'm having trouble with schema issues when converting a fixed length text file to a gdb table. My workflow is:

1. convert fixed-length to comma-separated using python's csv module
2. create table from csv file using TableToTable_conversion

The original fixed-length file looks like this:

R75190  083W05AA03300  00000003268   INDIAN WELLS            LP  S 302GOLF COURSE ESTATES                     00004600000.00R45 00006200000010116000016316000013951011152002       92401000 1010000000000000000000200201402RITCHIE,ADAM S &                RITCHIE,TRACEY L                3268 INDIAN WELLS LP S                                          SALEM           OR9730222700025BS R581 SSAL.A    00002637.72
R75191  083W05AA03400  00000003264   INDIAN WELLS            LP  S 302GOLF COURSE ESTATES                     00004700000.14R41 00006200000009804000016004000013761011152002       92401000 1010001480000826200300200201340JAMES-BELDEN,DEEDE M            3264 INDIAN WELLS LP S                                                                          SALEM           OR9730221880475WD R581 SSAL.A    00002601.80
R75192  083W05AA03500  00000003258   INDIAN WELLS            LP  S 302GOLF COURSE ESTATES                     00004800000.15R41Z00005730000009363000015093000016652010071997       92401000 1010001299000312199900199701398DUUS,CATHERINE M                3258 INDIAN WELL LOOP S                                                                         SALEM           OR9730215820524WD R581 SSAL.A    00003039.98
R75193  083W05AA03600  00000003254   INDIAN WELLS            LP  S 302GOLF COURSE ESTATES                     00004900000.15R41Z00005730000009363000015093000016652010071997       92401000 1010002180000713200900199701398WALLACE,CONSTANCE M             3254 INDIAN WELLS LP S                                                                          SALEM           OR9730230850148WD R581 SSAL.A    00003039.98

My TXT to CSV code looks like this:

import csv, os, datetime, arcpy

workfolder = arcpy.GetParameterAsText(0)
inputfile = arcpy.GetParameterAsText(1)
outfile = arcpy.GetParameterAsText(2)

schemafile = workfolder + '\\schema.ini'
outputfile = workfolder + '\\' + outfile

if os.path.exists(schemafile):
    os.remove(schemafile)
if os.path.exists(outputfile):
    os.remove(outputfile)

#---------------------------------------------
def validate(date_text):
    date_text = date_text.zfill(8)
    year = date_text[4:]
    month = date_text[0:2]
    day = date_text[2:4]
    date_sorted = year+month+day
    try:
        return datetime.datetime.strptime(date_sorted,'%Y%m%d')
    except ValueError:
        return ""
#----------------------------------------------


with open(outputfile, 'wb') as outcsvfile:
    filewriter = csv.writer(outcsvfile,quoting=csv.QUOTE_MINIMAL)

    # Add header row
    filewriter.writerow(['PropID','Taxlot','StNumInit','StNumSx','StName','StType','StDirPx','StDirSx','Zip3','SubName','Block',
            'Lot','Acres','StatClass','RMVLand','RMVImp','RMVTotal','AssdValue','LastApprDt','Zoning','CodeArea','MultCode','PropClass',
            'SalePrice','SaleDateTxt','SaleCode','YearConst','LivingArea','OwnerName','OwnerAdd1','OwnerAdd2','OwnerAdd3','OwnerCity',
            'OwnerState','OwnerZip','DeedRef','MaintCode','NbrhdCode','TaxBalance','SaleDate'])

    # Iterate through file, assign variables and write to CSV file
    with open(inputfile, 'r') as TSGinputfile:
        for line in TSGinputfile.readlines():
            PropID = line[0:8]
            Taxlot = line[8:23]
            StNumInit = line[23:34]
            StNumSx = line[34:37]
            StName = line[37:61]
            StType = line[61:63]
            StDirPx = line[63:65]
            StDirSx = line[65:67]
            Zip3 = line[67:70]
            SubName = line[70:110]
            Block = line[110:113]
            Lot = line[113:116]
            Acres = line[116:124]
            StatClass = line[124:128]
            RMVLand = line[128:137]
            RMVImp = line[137:146]
            RMVTotal = line[146:155]
            AssdValue = line[155:164]
            LastApprDt = line[164:172]
            Zoning = line[172:179]
            CodeArea = line[179:187]
            MultCode = line[187:188]
            PropClass = line[188:191]
            SalePrice = line[191:200]
            SaleDateTxt = line[200:208]
            SaleCode = line[208:210]
            YearConst = line[210:214]
            LivingArea = line[214:219]
            OwnerName = line[219:251]
            OwnerAdd1 = line[251:283]
            OwnerAdd2 = line[283:315]
            OwnerAdd3 = line[315:347]
            OwnerCity = line[347:363]
            OwnerState = line[363:365]
            OwnerZip = line[365:370]
            DeedRef = line[370:381]
            MaintCode = line[381:386]
            NbrhdCode = line[386:396]
            TaxBalance = line[396:407]
            SaleDate = validate(SaleDateTxt)

            if Taxlot <> '':
                filewriter.writerow([PropID,Taxlot,StNumInit,StNumSx,StName,StType,StDirPx,StDirSx,Zip3,SubName,Block,Lot,Acres,
                    StatClass,RMVLand,RMVImp,RMVTotal,AssdValue,LastApprDt,Zoning,CodeArea,MultCode,PropClass,SalePrice,SaleDateTxt,
                    SaleCode,YearConst,LivingArea,OwnerName,OwnerAdd1,OwnerAdd2,OwnerAdd3,OwnerCity,OwnerState,OwnerZip,DeedRef,
                    MaintCode,NbrhdCode,TaxBalance,SaleDate])

My CSV to Table code looks like this:

import arcpy, os

input_csv = arcpy.GetParameterAsText(0)
gdbfolder = arcpy.GetParameterAsText(1)
table_name = arcpy.GetParameterAsText(2)

arcpy.env.workspace = gdbfolder

out_table = os.path.join(gdbfolder,table_name)

if os.path.exists('schemafile.ini'):
    os.remove('schemafile.ini')

arcpy.env.overwriteOutput = True

arcpy.TableToTable_conversion(input_csv,gdbfolder,table_name,"#",
    """PropID "PropID" true true false 8 Text 0 0 ,First,#,input_csv,PropID,-1,-1;
    Taxlot "Taxlot" true true false 15 Text 0 0 ,First,#,input_csv,Taxlot,-1,-1;
    StNumInit "StNumInit" true true false 11 Text 0 0 ,First,#,input_csv,StNumInit,-1,-1;
    StNumSx "StNumSx" true true false 3 Text 0 0 ,First,#,input_csv,StNumSx,-1,-1;
    StName "StName" true true false 24 Text 0 0 ,First,#,input_csv,StName,-1,-1;
    StType "StType" true true false 2 Text 0 0 ,First,#,input_csv,StType,-1,-1;
    StDirPx "StDirPx" true true false 2 Text 0 0 ,First,#,input_csv,StDirPx,-1,-1;
    StDirSx "StDirSx" true true false 2 Text 0 0 ,First,#,input_csv,StDirSx,-1,-1;
    Zip3 "Zip3" true true false 3 Text 0 0 ,First,#,input_csv,Zip3,-1,-1;
    SubName "SubName" true true false 40 Text 0 0 ,First,#,input_csv,SubName,-1,-1;
    Block "Block" true true false 3 Text 0 0 ,First,#,input_csv,Block,-1,-1;
    Lot "Lot" true true false 3 Text 0 0 ,First,#,input_csv,Lot,-1,-1;
    Acres "Acres" true true false 8 Double 0 0 ,First,#,input_csv,Acres,-1,-1;
    StatClass "StatClass" true true false 4 Text 0 0 ,First,#,input_csv,StatClass,-1,-1;
    RMVLand "RMVLand" true true false 4 Long 0 0 ,First,#,input_csv,RMVLand,-1,-1;
    RMVImp "RMVImp" true true false 4 Long 0 0 ,First,#,input_csv,RMVImp,-1,-1;
    RMVTotal "RMVTotal" true true false 4 Long 0 0 ,First,#,input_csv,RMVTotal,-1,-1;
    AssdValue "AssdValue" true true false 4 Long 0 0 ,First,#,input_csv,AssdValue,-1,-1;
    LastApprDt "LastApprDt" true true false 8 Text 0 0 ,First,#,input_csv,LastApprDt,-1,-1;
    Zoning "Zoning" true true false 7 Text 0 0 ,First,#,input_csv,Zoning,-1,-1;
    CodeArea "CodeArea" true true false 8 Text 0 0 ,First,#,input_csv,CodeArea,-1,-1;
    MultCode "MultCode" true true false 1 Text 0 0 ,First,#,input_csv,MultCode,-1,-1;
    PropClass "PropClass" true true false 3 Text 0 0 ,First,#,input_csv,PropClass,-1,-1;
    SalePrice "SalePrice" true true false 4 Long 0 0 ,First,#,input_csv,SalePrice,-1,-1;
    SaleDateTxt "SaleDateTxt" true true false 8 Text 0 0 ,First,#,input_csv,SaleDateTxt,-1,-1;
    SaleCode "SaleCode" true true false 2 Text 0 0 ,First,#,input_csv,SaleCode,-1,-1;
    YearConst "YearConst" true true false 4 Text 0 0 ,First,#,input_csv,YearConst,-1,-1;
    LivingArea "LivingArea" true true false 5 Text 0 0 ,First,#,input_csv,LivingArea,-1,-1;
    OwnerName "OwnerName" true true false 32 Text 0 0 ,First,#,input_csv,OwnerName,-1,-1;
    OwnerAdd1 "OwnerAdd1" true true false 32 Text 0 0 ,First,#,input_csv,OwnerAdd1,-1,-1;
    OwnerAdd2 "OwnerAdd2" true true false 32 Text 0 0 ,First,#,input_csv,OwnerAdd2,-1,-1;
    OwnerAdd3 "OwnerAdd3" true true false 32 Text 0 0 ,First,#,input_csv,OwnerAdd3,-1,-1;
    OwnerCity "OwnerCity" true true false 16 Text 0 0 ,First,#,input_csv,OwnerCity,-1,-1;
    OwnerState "OwnerState" true true false 2 Text 0 0 ,First,#,input_csv,OwnerState,-1,-1;
    OwnerZip "OwnerZip" true true false 5 Text 0 0 ,First,#,input_csv,OwnerZip,-1,-1;
    DeedRef "DeedRef" true true false 11 Text 0 0 ,First,#,input_csv,DeedRef,-1,-1;
    MaintCode "MaintCode" true true false 5 Text 0 0 ,First,#,input_csv,MaintCode,-1,-1;
    NbrhdCode "NbrhdCode" true true false 10 Text 0 0 ,First,#,input_csv,NbrhdCode,-1,-1;
    TaxBalance "TaxBalance" true true false 8 Double 0 0 ,First,#,input_csv,TaxBalance,-1,-1;
    SaleDate "SaleDate" true true false 8 Date 0 0 ,First,#,input_csv,SaleDate,-1,-1""","#")

The TableToTable tool is not honoring the field mapping I include in the code. For example, OwnerZip is a Long Integer in my output table, and all the text fields are length = 255.

One solution I haven't tried yet is to read the input file one line at a time and insert it into a table using an insertCursor. This would bypass the need for a csv file.

Any suggestions? Many Thanks, Adam

WilliamCraft · ‎03-04-2014

How about converting your newly-created CSV file from step 1 into a GDB table using the Copy Rows GP tool under Data Management? For a CSV file input to this tool, the first row of the CSV file will be used as the field names for the output. Keep in mind that those field names cannot contain spaces or special characters. Alternatively, you could generate an empty table with the field names and data types you care about and then add the rows from your CSV into the empty table using the Append GP tool.

AdamCrateau1 · ‎03-05-2014

Thanks William, I tried the Copy Rows tool and had some of the same problems. I also tried working with the schema.ini file a little, but didn't have any success.

I decided to try using the da.insertCursor and bypass the CSV file. It works. I'm curious to know if anyone out there has a better method for getting these types of files into a gdb table. I'll be working with other files similar to this one, and would prefer not to define the column definitions by hand. I don't have MS Access but am thinking this maybe the best option. Excel won't work because the number of rows is > 65000.

Here is the code that worked for me:

import arcpy

input_file = arcpy.GetParameterAsText(0)
geodatabase = arcpy.GetParameterAsText(1)
table_name = arcpy.GetParameterAsText(2)
table_template = arcpy.GetParameterAsText(3)


arcpy.env.workspace = geodatabase
arcpy.env.overwriteOutput = True

arcpy.CreateTable_management(geodatabase,table_name,table_template)


with open(input_file, 'r') as TSGinputfile:
    lines = TSGinputfile.readlines()

fields = ('TAXLOT','SNAM','BLK','LOTNO','ACRES','PROPID','STNUM','ADDRANGE','STREET','SDIRPX','STNAM','STYP','SDIRSX',
            'SITUSCITY','SITUSZIP','OWNERNAME','OWNERADD','OWNERADD1','OWNERADD2','OWNERADD3','OWNERCITY','OWNERSTATE',
            'OWNERZIP','DEEDREF','STCL','LANDVAL','IMPVAL','TOTALVAL','ASSDVAL','ZONING','CODEAREA','PROPCLASS','SALEPRICE',
            'SALEDATETXT','SALECODE','YEARCONST','LIVINGAREA','MAINTENANCE','NBRHDCODE','StNumInit','StNumSx','Zip3')
            
with arcpy.da.InsertCursor(table_name,fields) as cursor:
    for line in lines:
        PROPID = line[0:8].strip()
        TAXLOT = line[8:23].strip()
        StNumInit = line[23:34]
        StNumSx = line[34:37].strip()
        STNAM = line[37:61].strip()
        STYP = line[61:63].strip()
        SDIRPX = line[63:65].strip()
        SDIRSX = line[65:67].strip()
        Zip3 = line[67:70].strip()
        SNAM = line[70:110].strip()
        BLK = line[110:113].strip()
        LOTNO = line[113:116].strip()
        ACRES = line[116:124].strip()
        STCL = line[124:128].strip()
        LANDVAL = int(line[128:137].strip())
        IMPVAL = int(line[137:146].strip())
        TOTALVAL = int(line[146:155].strip())
        ASSDVAL = int(line[155:164].strip())
        #LastApprDt = line[164:172]
        ZONING = line[172:179].strip()
        CODEAREA = line[179:187]
        #MultCode = line[187:188]
        PROPCLASS = line[188:191].strip()
        SALEPRICE = int(line[191:200].strip())
        SALEDATETXT = line[200:208].strip()
        SALECODE = line[208:210].strip()
        YEARCONST = int(line[210:214].strip())
        LIVINGAREA = int(line[214:219].strip())
        OWNERNAME = line[219:251].strip()
        OWNERADD1 = line[251:283].strip()
        OWNERADD2 = line[283:315].strip()
        OWNERADD3 = line[315:347].strip()
        OWNERCITY = line[347:363].strip()
        OWNERSTATE = line[363:365].strip()
        OWNERZIP = line[365:370].strip()
        DEEDREF = line[370:381].strip()
        MAINTENANCE = line[381:386].strip()
        NBRHDCODE = line[386:396].strip()
        #CurLeviedTax = line[396:407]
        #SALEDATE = validate(SALEDATETXT)
        STNUM = 0
        ADDRANGE = 0
        STREET = ''
        SITUSCITY = ''
        SITUSZIP = ''
        OWNERADD = ''

        cursor.insertRow((TAXLOT,SNAM,BLK,LOTNO,ACRES,PROPID,STNUM,ADDRANGE,STREET,SDIRPX,STNAM,STYP,SDIRSX,SITUSCITY,SITUSZIP,
                OWNERNAME,OWNERADD,OWNERADD1,OWNERADD2,OWNERADD3,OWNERCITY,OWNERSTATE,OWNERZIP,DEEDREF,STCL,LANDVAL,IMPVAL,
                TOTALVAL,ASSDVAL,ZONING,CODEAREA,PROPCLASS,SALEPRICE,SALEDATETXT,SALECODE,YEARCONST,LIVINGAREA,MAINTENANCE,
                NBRHDCODE,StNumInit,StNumSx,Zip3))