class rglob:
    '''A recursive/regex enhanced glob
       adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
    '''
    def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
        ''' @type    directory: C{str}
            @param   directory: Path to search
            @type    pattern: C{type}
            @param   pattern: Regular expression/wildcard pattern to match files against
            @type    regex: C{boolean}
            @param   regex: Use regular expression matching (if False, use fnmatch)
                            See U{http://docs.python.org/library/re.html}
            @type    regex_flags: C{int}
            @param   regex_flags: Flags to pass to the regular expression compiler.
                                  See U{http://docs.python.org/library/re.html}
            @type    recurse: C{boolean}
            @param   recurse: Recurse into the directory?
        '''
        self.stack = [directory]
        self.pattern = pattern
        self.regex = regex
        self.recurse = recurse
        self.regex_flags = regex_flags
        self.files = []
        self.index = 0
    def __getitem__(self, index):
        import os
        while 1:
            try:
                file = self.files[self.index]
                self.index = self.index + 1
            except IndexError:
                # pop next directory from stack
                self.directory = self.stack.pop()
                try:
                    self.files = os.listdir(self.directory)
                    print self.files
                    self.index = 0
                except:pass
            else:
                # got a filename
                fullname = os.path.join(self.directory, file)
                if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
                    self.stack.append(fullname)
                if self.regex:
                    import re
                    if re.search(self.pattern,file,self.regex_flags):
                        return fullname
                else:
                    import fnmatch
                    if fnmatch.fnmatch(file, self.pattern):
                        return fullname
import shutil
search_dir=r'C:\Project'
out_dir=r'C:\Workspace'
for jpg in rglob(search_dir,'*.jpg'):
    print 'Copying: ' + jpg
    shutil.copy(jpg,out_dir)
					
				
			
			
				
			
			
				
			
			
			
			
			
			
		import os
class DirectoryWalker:
      '''Callously stolen (with attribution!) from os-path-walk-example-3.py
      Copyright © 1995-2010 by Fredrik Lundh
      http://effbot.org/librarybook/os-path.htm'''
      
    def __init__(self, directory):
        self.stack = [directory]
        self.files = []
        self.index = 0
    def __getitem__(self, index):
        while 1:
            try:
                file = self.files[self.index]
                self.index = self.index + 1
            except IndexError:
                # pop next directory from stack
                self.directory = self.stack.pop()
                self.files = os.listdir(self.directory)
                self.index = 0
            else:
                # got a filename
                fullname = os.path.join(self.directory, file)
                if os.path.isdir(fullname) and not os.path.islink(fullname):
                    self.stack.append(fullname)
                return fullname
import shutil
search_dir = r'C:\Project'
out_dir = r'C:\Workspace'
# This filter takes all the results from DirectoryWalker and only iterates
# through those that match the condition 'file.endswith('.jpg')
for jpg in filter(lambda x: x.endswith('.jpg'), DirectoryWalker(search_dir)):
    print('Copying: ' + jpg)
    shutil.copy(jpg,out_dir)You could use a recursive 'glob':class rglob: '''A recursive/regex enhanced glob adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm ''' def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True): ''' @type directory: C{str} @param directory: Path to search @type pattern: C{type} @param pattern: Regular expression/wildcard pattern to match files against @type regex: C{boolean} @param regex: Use regular expression matching (if False, use fnmatch) See U{http://docs.python.org/library/re.html} @type regex_flags: C{int} @param regex_flags: Flags to pass to the regular expression compiler. See U{http://docs.python.org/library/re.html} @type recurse: C{boolean} @param recurse: Recurse into the directory? ''' self.stack = [directory] self.pattern = pattern self.regex = regex self.recurse = recurse self.regex_flags = regex_flags self.files = [] self.index = 0 def __getitem__(self, index): import os while 1: try: file = self.files[self.index] self.index = self.index + 1 except IndexError: # pop next directory from stack self.directory = self.stack.pop() try: self.files = os.listdir(self.directory) print self.files self.index = 0 except:pass else: # got a filename fullname = os.path.join(self.directory, file) if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse: self.stack.append(fullname) if self.regex: import re if re.search(self.pattern,file,self.regex_flags): return fullname else: import fnmatch if fnmatch.fnmatch(file, self.pattern): return fullname import shutil search_dir=r'C:\Project' out_dir=r'C:\Workspace' for jpg in rglob(search_dir,'*.jpg'): print 'Copying: ' + jpg shutil.copy(jpg,out_dir)
That.... is a thing of beauty!!! Thank you so much for sharing that, Luke!
I just want to say that in the interest of giving the poster a smaller bit of code to dissect, one could make a smaller version using a filter and the exact example you were inspired by on the OS example page.import os class DirectoryWalker: '''Callously stolen (with attribution!) from os-path-walk-example-3.py Copyright © 1995-2010 by Fredrik Lundh http://effbot.org/librarybook/os-path.htm''' def __init__(self, directory): self.stack = [directory] self.files = [] self.index = 0 def __getitem__(self, index): while 1: try: file = self.files[self.index] self.index = self.index + 1 except IndexError: # pop next directory from stack self.directory = self.stack.pop() self.files = os.listdir(self.directory) self.index = 0 else: # got a filename fullname = os.path.join(self.directory, file) if os.path.isdir(fullname) and not os.path.islink(fullname): self.stack.append(fullname) return fullname import shutil search_dir = r'C:\Project' out_dir = r'C:\Workspace' # This filter takes all the results from DirectoryWalker and only iterates # through those that match the condition 'file.endswith('.jpg') for jpg in filter(lambda x: x.endswith('.jpg'), DirectoryWalker(search_dir)): print('Copying: ' + jpg) shutil.copy(jpg,out_dir)
Diana: If you need any help understanding either of these examples, feel free to ask.
Cheers,
Marc
def main1():
    x = []    
    for pdf in filter(lambda x: x.endswith('.pdf'), DirectoryWalker(search_dir)):
        x.append(pdf)
def main2():
    y = []
    for pdf in rglob(search_dir,'*.pdf'):
        y.append(pdf)
from timeit import Timer
t1 = Timer("main1()", "from __main__ import main1")
t2 = Timer("main2()", "from __main__ import main2")
print("DirectoryWalker: " + str(t1.timeit(100)))
print("rglob: "                  + str(t2.timeit(100)))DirectoryWalker: 155.908681642
rglob:                 162.25917093
					
				
			
			
				
			
			
				
			
			
			
			
			
			
		search_dir=r'C:\Project'
out_dir=r'C:\Workspace'
import os, shutil
for root, dirs, files in os.walk(search_dir):
    print '____________________________________'
    print 'searching for files in', root
    print ''
    for f in files:
        if f.endswith('.jpg'):
            infile = os.path.join(root, f)
            outfile = os.path.join(out_dir, f)
            print 'copying', infile
            shutil.copy(infile, outfile)