problem with nested while loops

AmelieDavis · ‎10-20-2011

Hi!
I'm trying to write a python script that will look for points that are within a certain distance of each other and delete one of the pair at random.
I've taken a stab at this and the program runs but it doesn't get all the points. I think it may be a problem with the nested while loops. I put the script below and tried to comment it as much as possible.
Can someone please give me advice on how to fix this.
Any help is greatly appreciated.
Amelie

# ---------------------------------------------------------------------------
# IterativeNearAnalysis.py
# Created on: thu oct 19 2011 by Amelie Davis
# for a shapefile in a folder
# if points are identical in terms of lat long, removes one point at random
# checks the distance between all points in that same shapefile is less than a specified distance
# deletes at random one of the two 'offending points'
# creates a new shapefile with only the points that are wanted
# checks the output
# final output is shapefile with only points beyond the specified distance
# ---------------------------------------------------------------------------

# Import system modules
import arcpy
from arcpy import env
import os

# input shapfile
inputfile = arcpy.GetParameterAsText(0)
# default inputfile = "d:/data/leap540/52spp_withid_1000m/BCN_Birds_P_top52spamecro.shp"

#distance to look for points
dist2 = arcpy.GetParameterAsText(1)
dist = dist2 + " Meters"
# default dist2 = 500

# outputfiles
# shapefile from which points that are located at the exact same spot are removed
out1 = arcpy.GetParameterAsText(2)
# default out1 = inputfile[:-4] + "_sort.shp"
# table listing points that are within the specified distance
out2 = out1[:-4] + dist2 + "_near.dbf"
# out2 table sorted by random2 field
out3 = out2[:-4] + "_dist.dbf"
#
out4 = inputfile[:-4] + "_f.dbf"
out5 = out4[:-4] + "_check.dbf"

arcpy.AddField_management(inputfile,"RANDOM","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
codeblock1= "dim max, min\nmax=99999\nmin=0\nx=(Int((max-min+1)*Rnd+min))"
arcpy.CalculateField_management(inputfile,"RANDOM","x","VB",codeblock1)

# delete records that are identical in latitute and longitude
# make sure the point to delete is selected at random by sorting by random
arcpy.Sort_management(inputfile,out1,"RANDOM ASCENDING","UR")
fields = ["LATITUDE","LONGITUDE"]
arcpy.DeleteIdentical_management(out1,fields,"#","0")

# generate list of points that are within a certain distance (specified by 'dist') or each other
arcpy.GenerateNearTable_analysis(out1,out1,out2,dist,"NO_LOCATION","NO_ANGLE","ALL","0")
arcpy.AddField_management(out2,"RANDOM2","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(out2,"RANDOM2","x","VB",codeblock1)
arcpy.Sort_management(out2,out3,"RANDOM2 ASCENDING","UR")
arcpy.DeleteIdentical_management(out3,"NEAR_DIST","#","0")

rows = arcpy.SearchCursor(out3)
i_fid = "NEAR_FID"

row = rows.next()
while row:
    i_fid1 = row.getValue(i_fid)
    arcpy.AddMessage("FID of row to delete is "+ str(i_fid1))
    rows2 = arcpy.UpdateCursor(out1)
    row2 = rows2.next()
    while row2:
        i_fid2 = row2.getValue("FID")
        arcpy.AddMessage("FID of row we are on is "+str(i_fid2))
        if i_fid2 == i_fid1:
            rows2.deleteRow(row2)
            arcpy.AddMessage("deleting row")
            row2 = rows2.next()
        else:
            row2 = rows2.next()
            arcpy.AddMessage("not deleting anything")
    row = rows.next()
arcpy.AddMessage("out of loop")

del row
del row2

arcpy.GenerateNearTable_analysis(out1,out1,out5,dist,"NO_LOCATION","NO_ANGLE","ALL","0")
# this table should be empty (need to implement this as a code and give an error message if not #empty.

# delete out2 and out3

AmelieDavis · ‎10-21-2011

never mind I was using FID and then deleting rows so the FIDs changed so I was deleting the wrong row after the first iteration.
This works like I want it to:

# ---------------------------------------------------------------------------
# IterativeNearAnalysis.py
# Created on: thu oct 19 2011 by Amelie Davis
# Takes all shapefiles in a folder
# if points are identical in terms of lat long, removes one point at random
# checks the distance between all points in that same shapefile is less than a specified distance
# deletes at random one of the two 'offending points'
# creates a new shapefile with only the points that are wanted
# checks the output
# final output is a .dbf of the attribute table for those points
# ---------------------------------------------------------------------------

# Import system modules
import arcpy
from arcpy import env
import os

# input shapfile
inputfile = arcpy.GetParameterAsText(0)
# default inputfile = "d:/data/leap540/52spp_withid_1000m/BCN_Birds_P_top52spamecro.shp"

#distance to look for points
dist2 = arcpy.GetParameterAsText(1)
dist = dist2 + " Meters"
# default dist2 = 500

# outputfiles
# shapefile from which points that are located at the exact same spot are removed
out1 = arcpy.GetParameterAsText(2)
# default out1 = inputfile[:-4] + "_sort.shp"
# table listing points that are within the specified distance
out2 = out1[:-4] + dist2 + "_near.dbf"
# out2 table sorted by random2 field
out3 = out2[:-4] + "_dist.dbf"
#
out4 = inputfile[:-4] + "_f.dbf"
out5 = out4[:-4] + "_check.dbf"

##arcpy.AddField_management(inputfile,"RANDOM","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
codeblock1= "dim max, min\nmax=99999\nmin=0\nx=(Int((max-min+1)*Rnd+min))"
##arcpy.CalculateField_management(inputfile,"RANDOM","x","VB",codeblock1)
##arcpy.AddField_management(inputfile,"FID_COPY","SHORT","6","#","#","#","NULLABLE","NON_REQUIRED","#")
##arcpy.CalculateField_management(inputfile,"FID_COPY","[FID]","VB","#")

# delete records that are identical in latitute and longitude
# make sure the point to delete is selected at random by sorting by random
arcpy.Sort_management(inputfile,out1,"RANDOM ASCENDING","UR")
fields = ["LATITUDE","LONGITUDE"]
arcpy.DeleteIdentical_management(out1,fields,"#","0")

# generate list of points that are within a certain distance (specified by 'dist') or each other
arcpy.GenerateNearTable_analysis(out1,out1,out2,dist,"NO_LOCATION","NO_ANGLE","ALL","0")
arcpy.AddField_management(out2,"RANDOM2","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(out2,"RANDOM2","x","VB",codeblock1)
arcpy.Sort_management(out2,out3,"RANDOM2 ASCENDING","UR")
arcpy.DeleteIdentical_management(out3,"NEAR_DIST","#","0")

rows = arcpy.SearchCursor(out3)
i_fid = "IN_FID"

row = rows.next()
while row:
    i_fid1 = row.getValue(i_fid)
    arcpy.AddMessage("FID of row to delete is "+ str(i_fid1))
    rows2 = arcpy.UpdateCursor(out1)
    row2 = rows2.next()
    while row2:
        i_fid2 = row2.getValue("FID_COPY")
        cute = row2.getValue("L_ID")
        arcpy.AddMessage("FID of row we are on is "+str(i_fid2))
        if i_fid2 == i_fid1:
            rows2.deleteRow(row2)
            arcpy.AddMessage("deleting row"+ " and original loc is " + str(cute))
            row2 = rows2.next()
        else:
            row2 = rows2.next()
            arcpy.AddMessage("not deleting anything")
    row = rows.next()
arcpy.AddMessage("out of loop")

del row
del row2

arcpy.GenerateNearTable_analysis(out1,out1,out5,dist,"NO_LOCATION","NO_ANGLE","ALL","0")

# delete out2 and out3

AmelieDavis · ‎10-21-2011

NO, that still doesn't work.
it's still a problem with the loops. the second while loop does not start at the beginning of the table after it finds a match and deletes that row.
How do I do that?

# ---------------------------------------------------------------------------
# IterativeNearAnalysis.py
# Created on: thu oct 19 2011 by Amelie Davis
# Takes all shapefiles in a folder
# if points are identical in terms of lat long, removes one point at random
# checks the distance between all points in that same shapefile is less than a specified distance
# deletes at random one of the two 'offending points'
# creates a new shapefile with only the points that are wanted
# checks the output
# final output is a .dbf of the attribute table for those points
# ---------------------------------------------------------------------------

# Import system modules
import arcpy
from arcpy import env
import os

# input shapfile
inputfile = arcpy.GetParameterAsText(0)
# default inputfile = "d:/data/leap540/52spp_withid_1000m/BCN_Birds_P_top52spamecro.shp"

#distance to look for points
dist2 = arcpy.GetParameterAsText(1)
dist = dist2 + " Meters"
# default dist2 = 500

# outputfiles
# shapefile from which points that are located at the exact same spot are removed
out1 = arcpy.GetParameterAsText(2)
# default out1 = inputfile[:-4] + "_sort.shp"
# table listing points that are within the specified distance
out2 = out1[:-4] + dist2 + "_near.dbf"
# out2 table sorted by random2 field
out3 = out2[:-4] + "_dist.dbf"
#
out4 = inputfile[:-4] + "_f.dbf"
out5 = out4[:-4] + "_check.dbf"

arcpy.AddField_management(inputfile,"RANDOM","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
codeblock1= "dim max, min\nmax=99999\nmin=0\nx=(Int((max-min+1)*Rnd+min))"
arcpy.CalculateField_management(inputfile,"RANDOM","x","VB",codeblock1)
arcpy.AddField_management(inputfile,"FID_COPY","SHORT","6","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(inputfile,"FID_COPY","[FID]","VB","#")

# delete records that are identical in latitute and longitude
# make sure the point to delete is selected at random by sorting by random
arcpy.Sort_management(inputfile,out1,"RANDOM ASCENDING","UR")
fields = ["LATITUDE","LONGITUDE"]
arcpy.DeleteIdentical_management(out1,fields,"#","0")

# generate list of points that are within a certain distance (specified by 'dist') or each other
arcpy.GenerateNearTable_analysis(out1,out1,out2,dist,"NO_LOCATION","NO_ANGLE","ALL","0")
arcpy.AddField_management(out2,"RANDOM2","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(out2,"RANDOM2","x","VB",codeblock1)
arcpy.Sort_management(out2,out3,"RANDOM2 ASCENDING","UR")
arcpy.DeleteIdentical_management(out3,"NEAR_DIST","#","0")

rows = arcpy.SearchCursor(out3)
i_fid = "IN_FID"

#a = arcpy.GetCount_management(out1)
row = rows.next()
while row:
    i_fid1 = row.getValue(i_fid)
    arcpy.AddMessage("FID of row to delete is "+ str(i_fid1))
    rows2 = arcpy.UpdateCursor(out1)
    row2 = rows2.next()

    while row2:
        i_fid2 = row2.getValue("FID_COPY")
        cute = row2.getValue("L_ID")
        #arcpy.AddMessage("FID of row we are on is "+str(i_fid2))
        if i_fid2 == i_fid1:
            rows2.deleteRow(row2)
            arcpy.AddMessage("deleting row"+ " and original loc is " + str(cute))
            row2 = rows2.next()
            rows2.reset()
        else:
            row2 = rows2.next()
            arcpy.AddMessage("not deleting anything")
    row = rows.next()
#arcpy.AddMessage("out of loop")

del row
del row2

arcpy.GenerateNearTable_analysis(out1,out1,out5,dist,"NO_LOCATION","NO_ANGLE","ALL","0")

# delete out2 and out3

DarrenWiens2 · ‎10-21-2011

Since your problem is regarding loops, indentation is important -> use the

 tags above the posting text box, and you'll be more likely to get a response.

AmelieDavis · ‎10-21-2011

oups, thanks - didn't know about that

It's not deleting the right row...

# ---------------------------------------------------------------------------
# IterativeNearAnalysis.py
# Created on: thu oct 19 2011  by Amelie Davis
# Takes all shapefiles in a folder
# if points are identical in terms of lat long, removes one point at random
# checks the distance between all points in that same shapefile is less than a specified distance
# deletes at random one of the two 'offending points'
# creates a new shapefile with only the points that are wanted
# checks the output
# final output is a .dbf of the attribute table for those points
# ---------------------------------------------------------------------------

# Import system modules
import arcpy
from arcpy import env
import os

# input shapfile
inputfile = arcpy.GetParameterAsText(0)
# default inputfile = "d:/data/leap540/52spp_withid_1000m/BCN_Birds_P_top52spamecro.shp"

#distance to look for points
dist2 = arcpy.GetParameterAsText(1)
dist = dist2 + " Meters"
# default dist2 = 500

# outputfiles
# shapefile from which points that are located at the exact same spot are removed
out1 = arcpy.GetParameterAsText(2)
# default out1 = inputfile[:-4] + "_sort.shp"
# table listing points that are within the specified distance
out2 = out1[:-4] + dist2 + "_near.dbf"
# out2 table sorted by random2 field
out3 = out2[:-4] + "_dist.dbf"
#
out4 = inputfile[:-4] + "_f.dbf"
out5 = out4[:-4] + "_check.dbf"

arcpy.AddField_management(inputfile,"RANDOM","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
codeblock1= "dim max, min\nmax=99999\nmin=0\nx=(Int((max-min+1)*Rnd+min))"
arcpy.CalculateField_management(inputfile,"RANDOM","x","VB",codeblock1)
arcpy.AddField_management(inputfile,"FID_COPY","SHORT","6","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(inputfile,"FID_COPY","[FID]","VB","#")

# delete records that are identical in latitute and longitude
# make sure the point to delete is selected at random by sorting by random
arcpy.Sort_management(inputfile,out1,"RANDOM ASCENDING","UR")
fields = ["LATITUDE","LONGITUDE"]
arcpy.DeleteIdentical_management(out1,fields,"#","0")

# generate list of points that are within a certain distance (specified by 'dist') or each other
arcpy.GenerateNearTable_analysis(out1,out1,out2,dist,"NO_LOCATION","NO_ANGLE","ALL","0")
arcpy.AddField_management(out2,"RANDOM2","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(out2,"RANDOM2","x","VB",codeblock1)
arcpy.Sort_management(out2,out3,"RANDOM2 ASCENDING","UR")
arcpy.DeleteIdentical_management(out3,"NEAR_DIST","#","0")


rows = arcpy.SearchCursor(out3)
i_fid = "IN_FID"

#a = arcpy.GetCount_management(out1)
row = rows.next()
while row:
    i_fid1 = row.getValue(i_fid)
    arcpy.AddMessage("FID of row to delete is "+ str(i_fid1))
    rows2 = arcpy.UpdateCursor(out1)
    row2 = rows2.next()
    while row2:
        i_fid2 = row2.getValue("FID_COPY")
        cute = row2.getValue("L_ID")
        #arcpy.AddMessage("FID of row we are on is "+str(i_fid2))
        if i_fid2 == i_fid1:
            rows2.deleteRow(row2)
            arcpy.AddMessage("deleting row"+ " and original loc is " + str(cute))
            row2 = rows2.next()
        else:
            row2 = rows2.next()
            #arcpy.AddMessage("not deleting anything")
        rows2.reset()
    row = rows.next()
#arcpy.AddMessage("out of loop")

del row
del row2

arcpy.GenerateNearTable_analysis(out1,out1,out5,dist,"NO_LOCATION","NO_ANGLE","ALL","0")


# delete out2 and out3

AmelieDavis · ‎10-21-2011

Maybe I should explain what I'm trying to do better:

I want to
1) loop through all rows in a table called tableA
2) for each row in tableA get the value inside one field called fid_N
3) loop through a different table (tableB) and find the row where the value of this different field (fid_copy) matches fid_N so I need fid_N = fid_copy
4) delete that row from tableB (the row where the values I got was equal)
5) move on to the next row in tableA and then search through everysingle row in tableB again
6) reiterate for all rows in tableA

Could someone just provide me with the architecture to do this please as I think I'm not implementing my while (or maybe I should be using for) loops...

AmelieDavis · ‎10-21-2011

Fixed it - problem was not with my while loops NOR with the delete row function but rather with my logic and incorrect use of FIDs...
Here is the correct code:

# ---------------------------------------------------------------------------
# IterativeNearAnalysis.py
# Created on: thu oct 19 2011  by Amelie Davis
# Takes all shapefiles in a folder
# if points are identical in terms of lat long, removes one point at random
# checks the distance between all points in that same shapefile is less than a specified distance
# deletes at random one of the two 'offending points'
# creates a new shapefile with only the points that are wanted
# checks the output
# final output is a .dbf of the attribute table for those points
# ---------------------------------------------------------------------------

# Import system modules
import arcpy
from arcpy import env
import os
from os import path

# input shapfile
inputfile = arcpy.GetParameterAsText(0)
# default inputfile = "d:/data/leap540/52spp_withid_1000m/BCN_Birds_P_top52spamecro.shp"

#distance to look for points
dist2 = arcpy.GetParameterAsText(1)
dist = dist2 + " Meters"
# default dist2 = 500

#workspace
wksp = arcpy.GetParameterAsText(2)
arcpy.AddMessage("Workspace is "+ wksp)
#set the workspace
arcpy.env.workspace = wksp
(dirName, fileName) = os.path.split(inputfile)

# outputfiles
# Name of output shapefile from which points that are located at the exact same spot are removed
out1 = fileName[:-4]+"_copy.shp"

# make a copy of inputfile and name it out1
arcpy.Select_analysis(inputfile,out1,"#")

# Names of output tables and shapefiles
out1_a = out1[:-4]+"_sort.shp"
out2 = out1_a[:-4] + dist2 + "_near.dbf"
out3 = out2[:-4] + "_sort2.dbf"
# final output
out4 = arcpy.GetParameterAsText(3)
# check output
out5 = out3[:-4] + "_check.dbf"

arcpy.AddField_management(out1,"RANDOM","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
codeblock1= "dim max, min\nmax=99999\nmin=0\nx=(Int((max-min+1)*Rnd+min))"
arcpy.CalculateField_management(out1,"RANDOM","x","VB",codeblock1)
arcpy.AddField_management(out1,"FID_COPY","SHORT","6","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(out1,"FID_COPY","[FID]","VB","#")

# delete records that are identical in latitute and longitude
# make sure the point to delete is selected at random by sorting by random
arcpy.Sort_management(out1,out1_a,"RANDOM ASCENDING","#")
fields = ["LATITUDE","LONGITUDE"]
arcpy.DeleteIdentical_management(out1_a,fields,"#","0")

# generate list of points that are within a certain distance (specified by 'dist') or each other
arcpy.GenerateNearTable_analysis(out1_a,out1_a,out2,dist,"NO_LOCATION","NO_ANGLE","ALL","0")
arcpy.AddField_management(out2,"RANDOM2","SHORT","5","#","#","#","NULLABLE","NON_REQUIRED","#")
arcpy.CalculateField_management(out2,"RANDOM2","x","VB",codeblock1)
arcpy.Sort_management(out2,out3,"RANDOM2 ASCENDING","#")
arcpy.DeleteIdentical_management(out3,"NEAR_DIST","#","0")


rows = arcpy.SearchCursor(out3)
i_fid = "NEAR_FID"
a = []

arcpy.MakeFeatureLayer_management(out1_a,"grumble")

row = rows.next()
while row:
    i_fid1 = row.getValue(i_fid)
    arcpy.AddMessage("FID of row to delete is "+ str(i_fid1))
    rows2 = arcpy.UpdateCursor(out1_a)
    row2 = rows2.next()
    while row2:
        i_fid2 = row2.getValue("FID")
        cute = row2.getValue("L_ID")
        #arcpy.AddMessage("FID of row we are on is "+str(i_fid2))
        if i_fid1 == i_fid2:
            b = "L_ID = " + str(cute)
            a.append(b)
            arcpy.AddMessage("deleting row"+ str(i_fid2)+" and original loc is " + str(cute))
        row2 = rows2.next()
    row = rows.next()
#arcpy.AddMessage("out of loop")

del row
del row2
    
cntr = 0
while cntr<len(a):
    f_query = a[cntr]
    arcpy.SelectLayerByAttribute_management("grumble","ADD_TO_SELECTION",f_query)
    arcpy.AddMessage(f_query)
    cntr +=1

arcpy.SelectLayerByAttribute_management("grumble","SWITCH_SELECTION","#")
arcpy.Select_analysis("grumble",out4,"#")



arcpy.GenerateNearTable_analysis(out4,out4,out5,dist,"NO_LOCATION","NO_ANGLE","ALL","0")


# delete out2 and out3