Hi!
So I want to use arcpy to run a composite band function. Essentially I have 450+ tif files in one folder, and each file is a separate band from drone imagery. I want to create a script that takes every 4 bands and outputs a composite image into a new folder. Additionally, it would be helpful to be able to label the bands accordingly. I am not sure how to create a loop that will accomplish this, for I am new to working with arcpy. Thanks!
import arcpy
arcpy.env.workspace = ".\\"
output = "Stacked_bands"
for rasters in folder:
band1 =
band2 =
band3 =
band4 =
arcpy.CompositeBands_management("band1.tif;band2.tif;band3.tif;
band4.tif, "stacked_img.tif")
Solved! Go to Solution.
You could utilize multiprocessing to help speed up process of creating 112+ rasters.
In a separate python script named WorkProcess:
"""
Description: Python script used by multithreading create rasters.
Usage: worker method
"""
# -------------------------------------------------------------------------------
# IMPORTS
import arcpy
import os
def comp_band(tifs=None):
result = {'Task': os.path.basename(tifs["outName"]), 'Error': None}
try:
arcpy.CompositeBands_management(tifs['tifPaths'], tifs['outName'])
return result
except Exception as ex:
result['Error'] = ex
return result
Save that it in the same folder that you save this main script below. Borrowing Madhi_ch parsing script above, which is great- I modified a couple lines to output the generated paths into a list of dictionaries {tifs, output name} and added the multiprocessing code:
import os
import arcpy
import multiprocessing as mp
import WorkerProcess
input_dir = r'C:\blah' # input tifs are here
output_dir = r'C:\blah\Stacked_bands' # composed tifs will be saved here
def create_tif_groups():
files = os.listdir(input_dir) # reads all files in the folder
# extracts all files that are tif and keeps the name only (drops extension)
tif_file_names = [i.split('.')[0] for i in files if i.lower().endswith('tif')]
tifGroups = []
relations = dict() # empty dictionary to save file name relations
for name in tif_file_names:
# unique_part is the unique part of the name, and we will extract that and use it
# as a key to a dictionary that holds related file names
unique_part = f"{name.split('_')[1]}_{name.split('_')[2]}" # takes 20230316165710_001, etc
if unique_part not in relations.keys(): # if the first time, creates a list and appends first file
relations[unique_part] = [name]
else: # if visited that unique part before just adds other files to the same list
relations[unique_part].append(name)
# key is the unique part of the name e.g. 20230316165710_001
# val is list of 4 file names
for key, val in relations.items():
# check point to make sure all your files have 4 files for the bands
if len(val) == 4:
pass
else:
print(f'There are only {len(val)} files for {key}') # you can also raise error and stop
# place holder variables outside of for loop scope
band_1 = None
band_2 = None
band_3 = None
band_4 = None
for file_name in val:
# I am not sure if the orders are important here
# # but just in case I manually assigned those
if file_name.endswith('_G'):
band_1 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_NIR'):
band_2 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_R'):
band_3 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_RE'):
band_4 = os.path.join(input_dir, f'{file_name}.tif')
else:
print('Error! Should not land here!')
# create list of bands and its output path
tifGroups.append({'tifPaths': [band_1, band_2, band_3, band_4], 'outName': os.path.join(output_dir, f'DJI_{key}.tif')})
return tifGroups
def mp_tifs():
# get the list of tif groups and output name from create_tif_groups
tif_groups = create_tif_groups()
# set up multiprocessing, leaving one core free to do other tasks. Remove the - 1 to just let it rip on all cores
cores = mp.cpu_count() - 1
with mp.Pool(processes=cores) as pool:
jobs = []
for batchGroup in tif_groups:
# Create a job for each tif group and pass it to the worker function.
jobs.append(pool.apply_async(WorkerProcess.comp_band, (batchGroup,)))
# get results from the workers
res = [j.get() for j in jobs]
# check your results
for r in res:
if r['Error'] != None:
print(f'failed for {r["Error"]}')
if __name__ == '__main__':
mp_tifs()
Hi,
I never worked with tif images in arcpy , but generally if I want to handle the files I will do something like this.
I was not sure if the order of the bands really matter here, but just in case I tried to force the order you had in the screenshot.
Also, I was not sure if you wanted to move each new image into a new folder or all the new ones into one new folder.
import os
import arcpy
...
input_dir = r'C:\blah' # input tifs are here
output_dir = r'C:\blah\Stacked_bands' # composed tifs will be saved here
# if folder exists pass otherwise create folder to save tifs
if os.path.exists(output_dir):
pass
else:
os.makedirs(output_dir)
files = os.listdir(input_dir) # reads all files in the folder
# extracts all files that are tif and keeps the name only (drops extension)
tif_file_names = [i.split('.')[0] for i in files if i.lower().endswith('tif')]
relations = dict() # empty dictionary to save file name relations
for name in tif_file_names:
# unique_part is the unique part of the name, and we will extract that and use it
# as a key to a dictionary that holds related file names
unique_part = f"{name.split('_')[1]}_{name.split('_')[2]}" # takes 20230316165710_001, etc
if unique_part not in relations.keys(): # if the first time, creates a list and appends first file
relations[unique_part] = [name]
else: # if visited that unique part before just adds other files to the same list
relations[unique_part].append(name)
# key is the unique part of the name e.g. 20230316165710_001
# val is list of 4 file names
for key, val in relations.items():
# check point to make sure all your files have 4 files for the bands
if len(val) == 4:
pass
else:
print(f'There are only {len(val)} files for {key}') # you can also raise error and stop
for file_name in val:
# I am not sure if the orders are important here
# # but just in case I manually assigned those
if file_name.endswith('_G'):
band_1 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_NIR'):
band_2 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_R'):
band_3 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_RE'):
band_4 = os.path.join(input_dir, f'{file_name}.tif')
else:
print('Error! Should not land here!')
input_path = f"{band_1};{band_2};{band_3};{band_4}"
output_path = os.path.join(output_dir, f'DJI_{key}.tif')
arcpy.CompositeBands_management(input_path, output_path)
Assuming the Compositeband_manegment() works they way you showed, it should -hopefully- work.
You could utilize multiprocessing to help speed up process of creating 112+ rasters.
In a separate python script named WorkProcess:
"""
Description: Python script used by multithreading create rasters.
Usage: worker method
"""
# -------------------------------------------------------------------------------
# IMPORTS
import arcpy
import os
def comp_band(tifs=None):
result = {'Task': os.path.basename(tifs["outName"]), 'Error': None}
try:
arcpy.CompositeBands_management(tifs['tifPaths'], tifs['outName'])
return result
except Exception as ex:
result['Error'] = ex
return result
Save that it in the same folder that you save this main script below. Borrowing Madhi_ch parsing script above, which is great- I modified a couple lines to output the generated paths into a list of dictionaries {tifs, output name} and added the multiprocessing code:
import os
import arcpy
import multiprocessing as mp
import WorkerProcess
input_dir = r'C:\blah' # input tifs are here
output_dir = r'C:\blah\Stacked_bands' # composed tifs will be saved here
def create_tif_groups():
files = os.listdir(input_dir) # reads all files in the folder
# extracts all files that are tif and keeps the name only (drops extension)
tif_file_names = [i.split('.')[0] for i in files if i.lower().endswith('tif')]
tifGroups = []
relations = dict() # empty dictionary to save file name relations
for name in tif_file_names:
# unique_part is the unique part of the name, and we will extract that and use it
# as a key to a dictionary that holds related file names
unique_part = f"{name.split('_')[1]}_{name.split('_')[2]}" # takes 20230316165710_001, etc
if unique_part not in relations.keys(): # if the first time, creates a list and appends first file
relations[unique_part] = [name]
else: # if visited that unique part before just adds other files to the same list
relations[unique_part].append(name)
# key is the unique part of the name e.g. 20230316165710_001
# val is list of 4 file names
for key, val in relations.items():
# check point to make sure all your files have 4 files for the bands
if len(val) == 4:
pass
else:
print(f'There are only {len(val)} files for {key}') # you can also raise error and stop
# place holder variables outside of for loop scope
band_1 = None
band_2 = None
band_3 = None
band_4 = None
for file_name in val:
# I am not sure if the orders are important here
# # but just in case I manually assigned those
if file_name.endswith('_G'):
band_1 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_NIR'):
band_2 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_R'):
band_3 = os.path.join(input_dir, f'{file_name}.tif')
elif file_name.endswith('_RE'):
band_4 = os.path.join(input_dir, f'{file_name}.tif')
else:
print('Error! Should not land here!')
# create list of bands and its output path
tifGroups.append({'tifPaths': [band_1, band_2, band_3, band_4], 'outName': os.path.join(output_dir, f'DJI_{key}.tif')})
return tifGroups
def mp_tifs():
# get the list of tif groups and output name from create_tif_groups
tif_groups = create_tif_groups()
# set up multiprocessing, leaving one core free to do other tasks. Remove the - 1 to just let it rip on all cores
cores = mp.cpu_count() - 1
with mp.Pool(processes=cores) as pool:
jobs = []
for batchGroup in tif_groups:
# Create a job for each tif group and pass it to the worker function.
jobs.append(pool.apply_async(WorkerProcess.comp_band, (batchGroup,)))
# get results from the workers
res = [j.get() for j in jobs]
# check your results
for r in res:
if r['Error'] != None:
print(f'failed for {r["Error"]}')
if __name__ == '__main__':
mp_tifs()
@Anonymous User
Beautiful! Thank you. Didn't know about the multiprocessing handling in Arcpy.
By the way, I just realized the part that I assigned the path for bands are kind of redundant and could be defined once before assignment as follows:
for file_name in val:
# I am not sure if the orders are important here
# but just in case I manually assigned those
# define the path once here and use it below
file_path = os.path.join(input_dir, f'{file_name}.tif')
if file_name.endswith('_G'):
band_1 = file_path
elif file_name.endswith('_NIR'):
band_2 = file_path
elif file_name.endswith('_R'):
band_3 = file_path
elif file_name.endswith('_RE'):
band_4 = file_path
else: # added the file name as well to be easier to track possible issues
print(f'Error! Should not land here!- {file_name}')
Thanks I really appreciate your response! I did get the code to run, and my output was generated how I intended! Thanks again!!