class rglob: '''A recursive/regex enhanced glob adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm ''' def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True): ''' @type directory: C{str} @param directory: Path to search @type pattern: C{type} @param pattern: Regular expression/wildcard pattern to match files against @type regex: C{boolean} @param regex: Use regular expression matching (if False, use fnmatch) See U{http://docs.python.org/library/re.html} @type regex_flags: C{int} @param regex_flags: Flags to pass to the regular expression compiler. See U{http://docs.python.org/library/re.html} @type recurse: C{boolean} @param recurse: Recurse into the directory? ''' self.stack = [directory] self.pattern = pattern self.regex = regex self.recurse = recurse self.regex_flags = regex_flags self.files = [] self.index = 0 def __getitem__(self, index): import os while 1: try: file = self.files[self.index] self.index = self.index + 1 except IndexError: # pop next directory from stack self.directory = self.stack.pop() try: self.files = os.listdir(self.directory) print self.files self.index = 0 except:pass else: # got a filename fullname = os.path.join(self.directory, file) if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse: self.stack.append(fullname) if self.regex: import re if re.search(self.pattern,file,self.regex_flags): return fullname else: import fnmatch if fnmatch.fnmatch(file, self.pattern): return fullname import shutil search_dir=r'C:\Project' out_dir=r'C:\Workspace' for jpg in rglob(search_dir,'*.jpg'): print 'Copying: ' + jpg shutil.copy(jpg,out_dir)
import os class DirectoryWalker: '''Callously stolen (with attribution!) from os-path-walk-example-3.py Copyright © 1995-2010 by Fredrik Lundh http://effbot.org/librarybook/os-path.htm''' def __init__(self, directory): self.stack = [directory] self.files = [] self.index = 0 def __getitem__(self, index): while 1: try: file = self.files[self.index] self.index = self.index + 1 except IndexError: # pop next directory from stack self.directory = self.stack.pop() self.files = os.listdir(self.directory) self.index = 0 else: # got a filename fullname = os.path.join(self.directory, file) if os.path.isdir(fullname) and not os.path.islink(fullname): self.stack.append(fullname) return fullname import shutil search_dir = r'C:\Project' out_dir = r'C:\Workspace' # This filter takes all the results from DirectoryWalker and only iterates # through those that match the condition 'file.endswith('.jpg') for jpg in filter(lambda x: x.endswith('.jpg'), DirectoryWalker(search_dir)): print('Copying: ' + jpg) shutil.copy(jpg,out_dir)
You could use a recursive 'glob':class rglob: '''A recursive/regex enhanced glob adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm ''' def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True): ''' @type directory: C{str} @param directory: Path to search @type pattern: C{type} @param pattern: Regular expression/wildcard pattern to match files against @type regex: C{boolean} @param regex: Use regular expression matching (if False, use fnmatch) See U{http://docs.python.org/library/re.html} @type regex_flags: C{int} @param regex_flags: Flags to pass to the regular expression compiler. See U{http://docs.python.org/library/re.html} @type recurse: C{boolean} @param recurse: Recurse into the directory? ''' self.stack = [directory] self.pattern = pattern self.regex = regex self.recurse = recurse self.regex_flags = regex_flags self.files = [] self.index = 0 def __getitem__(self, index): import os while 1: try: file = self.files[self.index] self.index = self.index + 1 except IndexError: # pop next directory from stack self.directory = self.stack.pop() try: self.files = os.listdir(self.directory) print self.files self.index = 0 except:pass else: # got a filename fullname = os.path.join(self.directory, file) if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse: self.stack.append(fullname) if self.regex: import re if re.search(self.pattern,file,self.regex_flags): return fullname else: import fnmatch if fnmatch.fnmatch(file, self.pattern): return fullname import shutil search_dir=r'C:\Project' out_dir=r'C:\Workspace' for jpg in rglob(search_dir,'*.jpg'): print 'Copying: ' + jpg shutil.copy(jpg,out_dir)
That.... is a thing of beauty!!! Thank you so much for sharing that, Luke!
I just want to say that in the interest of giving the poster a smaller bit of code to dissect, one could make a smaller version using a filter and the exact example you were inspired by on the OS example page.import os class DirectoryWalker: '''Callously stolen (with attribution!) from os-path-walk-example-3.py Copyright © 1995-2010 by Fredrik Lundh http://effbot.org/librarybook/os-path.htm''' def __init__(self, directory): self.stack = [directory] self.files = [] self.index = 0 def __getitem__(self, index): while 1: try: file = self.files[self.index] self.index = self.index + 1 except IndexError: # pop next directory from stack self.directory = self.stack.pop() self.files = os.listdir(self.directory) self.index = 0 else: # got a filename fullname = os.path.join(self.directory, file) if os.path.isdir(fullname) and not os.path.islink(fullname): self.stack.append(fullname) return fullname import shutil search_dir = r'C:\Project' out_dir = r'C:\Workspace' # This filter takes all the results from DirectoryWalker and only iterates # through those that match the condition 'file.endswith('.jpg') for jpg in filter(lambda x: x.endswith('.jpg'), DirectoryWalker(search_dir)): print('Copying: ' + jpg) shutil.copy(jpg,out_dir)
Diana: If you need any help understanding either of these examples, feel free to ask.
Cheers,
Marc
def main1(): x = [] for pdf in filter(lambda x: x.endswith('.pdf'), DirectoryWalker(search_dir)): x.append(pdf) def main2(): y = [] for pdf in rglob(search_dir,'*.pdf'): y.append(pdf) from timeit import Timer t1 = Timer("main1()", "from __main__ import main1") t2 = Timer("main2()", "from __main__ import main2") print("DirectoryWalker: " + str(t1.timeit(100))) print("rglob: " + str(t2.timeit(100)))
DirectoryWalker: 155.908681642
rglob: 162.25917093
search_dir=r'C:\Project' out_dir=r'C:\Workspace' import os, shutil for root, dirs, files in os.walk(search_dir): print '____________________________________' print 'searching for files in', root print '' for f in files: if f.endswith('.jpg'): infile = os.path.join(root, f) outfile = os.path.join(out_dir, f) print 'copying', infile shutil.copy(infile, outfile)