import arcpy from arcpy import da import os import pandas as pd ### USER MUST SET VARIABLES AS PARAMTERS IF SAVING SCRIPT TO TOOLBOX OTHERWISE CHANGE VARIABLES TO ABOSLUTE PATHS # Parameters for front-end use data_gdb = arcpy.GetParameterAsText(0) # this is the unzipped Survey123 file gdb which contains all survey data. Must be fresh from Survey123. outputFolder = arcpy.GetParameterAsText(1) # this is the output folder where the unzipped Survey123 file gdb is saved. Faster to save on C: drive. intermediaryFolder = arcpy.GetParameterAsText(2) # this is an intermediary folder where temp files will be saved and overwritten each time the tool is ran. Faster to save on C: drive. # allow overwrite arcpy.env.overwriteOutput = True # definitions def createFolder(folderPath): if not os.path.exists(folderPath): try: os.makedirs(folderPath) except OSError as e: if e.errno != errno.EEXIST: raise # this could change depending on schema of file geodatabase downloaded from survey123, routinely QC survey = data_gdb + "\\survey" survey__ATTACH = data_gdb + "\\survey__ATTACH" repeat_draw_field = data_gdb + "\\repeat_draw_field" ##### -------------------- Preparing data for loop -------------------- ###### #### ---------- Excel and Survey Attachments ---------- #### # 1. Using Survey123 fgdb, join survey__ATTACH to survey by global IDs survey__ATTACH_join = arcpy.management.AddJoin(survey__ATTACH, "REL_GLOBALID", survey, "GlobalID") # 2. Save join as a new table and read as new element arcpy.conversion.TableToTable(survey__ATTACH_join, data_gdb, "survey__ATTACH_join") survey__ATTACH_join_2 = data_gdb + "\\survey__ATTACH_join" # 3. Create CountyPQ column in survey fc and calculate field arcpy.management.AddField(survey__ATTACH_join_2, "CountyPQ", "STRING") arcpy.management.CalculateField(survey__ATTACH_join_2,"CountyPQ",'$feature.california_county+"_"+$feature.PQ',"ARCADE") # 4. Create excel of survey data then read in to create a list of unique file names arcpy.TableToExcel_conversion(survey__ATTACH_join_2,intermediaryFolder + "\survey__ATTACH_join.xls", "ALIAS") survey__ATTACH_join_df = pd.read_excel(intermediaryFolder + "\survey__ATTACH_join.xls") # 5. Create elements to refer to in loop list_of_CountyPQ = survey__ATTACH_join_df.loc[:,"CountyPQ"] list_of_PQ = survey__ATTACH_join_df.loc[:,"PQ Number"].astype(str) outputSubFolders = outputFolder + '\\' + list_of_CountyPQ #### ---------- End of Section ---------- #### #### ---------- Shapefile and field boundaries ---------- #### # 6. Using Survey123 fgdb, join repeat_draw_field to survey by global IDs repeat_draw_field_join = arcpy.management.AddJoin(repeat_draw_field, "parentglobalid", survey, "globalid") # 7. Select rows with shape data only and save as new feature class repeat_draw_field_sel = arcpy.SelectLayerByAttribute_management(repeat_draw_field_join, "NEW_SELECTION", "SHAPE_Length <> 0") arcpy.management.CopyFeatures(repeat_draw_field_sel, data_gdb + "\\repeat_draw_field_sel") # 8. Read new feature class, create excel and then a list of CountyPQs repeat_draw_field_NEW = data_gdb + "\\repeat_draw_field_sel" arcpy.TableToExcel_conversion(repeat_draw_field_sel,intermediaryFolder + "\\repeat_draw_field_sel.xls", "ALIAS") repeat_draw_field_sel_df = pd.read_excel(intermediaryFolder + "\\repeat_draw_field_sel.xls", converters = {'California county where grown': str}) # 9. Create a CountyPQ column repeat_draw_field_sel_df["CountyPQ"] = repeat_draw_field_sel_df["California county where grown"].astype(str) + "_" + repeat_draw_field_sel_df["PQ Number"].astype(str) # 10. Create lists to refer to in loop list_of_surveyfc_CountyPQ = repeat_draw_field_sel_df.loc[:,"CountyPQ"] list_of_surveyfc_PQ = repeat_draw_field_sel_df.loc[:,"PQ Number"].astype(str) outputFolderSurveyFC = outputFolder + '\\' + list_of_surveyfc_CountyPQ outputSurveyFC = "PQ_" + list_of_surveyfc_PQ #### ---------- End of Section ---------- #### ####### ---------- Loop to export Excel and survey attachment data ---------- ######## for i in range(0,len(outputSubFolders.index)): # allow overwrite arcpy.env.overwriteOutput = True # Create subfolders based on unique PQs createFolder(outputSubFolders[i]) # Create excel of PQ CountyPQ = list_of_CountyPQ[i] + "'" CountyPQ_query = str("CountyPQ = '" + CountyPQ) CountyPQ_sel = arcpy.SelectLayerByAttribute_management(data_gdb + "\\survey__ATTACH_join", "NEW_SELECTION", CountyPQ_query) # Copy join, save to gdb as new tbl arcpy.management.CopyRows(CountyPQ_sel, data_gdb + "\\survey__ATTACH_join_select") #Clear Selection CountyPQ_sel = arcpy.SelectLayerByAttribute_management(data_gdb + "\\survey__ATTACH_join", "CLEAR_SELECTION") # Read survey__ATTACH_join_select CountyPQ_sel_attach = data_gdb + "\\survey__ATTACH_join_select" # Export attachments to folder with da.SearchCursor(CountyPQ_sel_attach, ['DATA', 'ATT_NAME', 'REL_GLOBALID', 'PQ']) as cursor: for item in cursor: attachment = item[0] filenum = str(item[3]) filename = filenum + "_" + str(item[1]) open(outputSubFolders[i] + os.sep + filename, 'wb').write(attachment.tobytes()) del item del filenum del filename del attachment # Delete identical rows (based on PQ), remove some columns, export as .xls CountyPQ_sel_xl = arcpy.management.DeleteIdentical(CountyPQ_sel_attach, "CountyPQ") CountyPQ_sel_xl = arcpy.DeleteField_management(CountyPQ_sel_xl, ["GLOBALID", "REL_GLOBALID", "CONTENT_TYPE","ATT_NAME", "DATA_SIZE", "CountyPQ", "objectid", "GlobalID"]) arcpy.conversion.TableToExcel(CountyPQ_sel_xl, outputSubFolders[i] + "\\" + list_of_PQ[i] + "_survey.xls", "ALIAS") ######### ---------- Loop for shapefile data ---------- ######### shp_data = [] for i in range(0,len(list_of_surveyfc_CountyPQ.index)): # allow overwrite arcpy.env.overwriteOutput = True # Create query for PQs and select by attribute fc_PQ = list_of_surveyfc_PQ[i] + "'" fc_PQ_query = str("survey_PQ = '" + fc_PQ) repeat_draw_field_sel_2 = arcpy.SelectLayerByAttribute_management(repeat_draw_field_NEW, "NEW_SELECTION", fc_PQ_query) # Create file geodatabase arcpy.management.CreateFileGDB(outputFolderSurveyFC[i], outputSurveyFC[i]) # Copy selected features, save as fc in existing file gdb arcpy.management.CopyFeatures(repeat_draw_field_sel_2, outputFolderSurveyFC[i] + "\\" + outputSurveyFC[i] + ".gdb\\" + outputSurveyFC[i]) # Write Excel list of all shapefile data temp_row = outputFolderSurveyFC[i] + "\\" + outputSurveyFC[i] + ".gdb\\" + outputSurveyFC[i] shp_data.append(temp_row) shp_data_df = pd.DataFrame(list(zip(shp_data)), columns =['Feature Class Path Name']) shp_data_df.to_csv(outputFolder + "\\FeatureClass_List.csv", sep = ",", header = 'TRUE');