class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' @type directory: C{str}
@param directory: Path to search
@type pattern: C{type}
@param pattern: Regular expression/wildcard pattern to match files against
@type regex: C{boolean}
@param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
@type regex_flags: C{int}
@param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
@type recurse: C{boolean}
@param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
import os
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
print self.files
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
import shutil
search_dir=r'C:\Project'
out_dir=r'C:\Workspace'
for jpg in rglob(search_dir,'*.jpg'):
print 'Copying: ' + jpg
shutil.copy(jpg,out_dir)
import os
class DirectoryWalker:
'''Callously stolen (with attribution!) from os-path-walk-example-3.py
Copyright © 1995-2010 by Fredrik Lundh
http://effbot.org/librarybook/os-path.htm'''
def __init__(self, directory):
self.stack = [directory]
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
self.files = os.listdir(self.directory)
self.index = 0
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname):
self.stack.append(fullname)
return fullname
import shutil
search_dir = r'C:\Project'
out_dir = r'C:\Workspace'
# This filter takes all the results from DirectoryWalker and only iterates
# through those that match the condition 'file.endswith('.jpg')
for jpg in filter(lambda x: x.endswith('.jpg'), DirectoryWalker(search_dir)):
print('Copying: ' + jpg)
shutil.copy(jpg,out_dir)
You could use a recursive 'glob':
class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' @type directory: C{str}
@param directory: Path to search
@type pattern: C{type}
@param pattern: Regular expression/wildcard pattern to match files against
@type regex: C{boolean}
@param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
@type regex_flags: C{int}
@param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
@type recurse: C{boolean}
@param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
import os
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
print self.files
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
import shutil
search_dir=r'C:\Project'
out_dir=r'C:\Workspace'
for jpg in rglob(search_dir,'*.jpg'):
print 'Copying: ' + jpg
shutil.copy(jpg,out_dir)
That.... is a thing of beauty!!! Thank you so much for sharing that, Luke!
I just want to say that in the interest of giving the poster a smaller bit of code to dissect, one could make a smaller version using a filter and the exact example you were inspired by on the OS example page.import os
class DirectoryWalker:
'''Callously stolen (with attribution!) from os-path-walk-example-3.py
Copyright © 1995-2010 by Fredrik Lundh
http://effbot.org/librarybook/os-path.htm'''
def __init__(self, directory):
self.stack = [directory]
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
self.files = os.listdir(self.directory)
self.index = 0
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname):
self.stack.append(fullname)
return fullname
import shutil
search_dir = r'C:\Project'
out_dir = r'C:\Workspace'
# This filter takes all the results from DirectoryWalker and only iterates
# through those that match the condition 'file.endswith('.jpg')
for jpg in filter(lambda x: x.endswith('.jpg'), DirectoryWalker(search_dir)):
print('Copying: ' + jpg)
shutil.copy(jpg,out_dir)
Diana: If you need any help understanding either of these examples, feel free to ask.
Cheers,
Marc
def main1():
x = []
for pdf in filter(lambda x: x.endswith('.pdf'), DirectoryWalker(search_dir)):
x.append(pdf)
def main2():
y = []
for pdf in rglob(search_dir,'*.pdf'):
y.append(pdf)
from timeit import Timer
t1 = Timer("main1()", "from __main__ import main1")
t2 = Timer("main2()", "from __main__ import main2")
print("DirectoryWalker: " + str(t1.timeit(100)))
print("rglob: " + str(t2.timeit(100)))
DirectoryWalker: 155.908681642
rglob: 162.25917093
search_dir=r'C:\Project'
out_dir=r'C:\Workspace'
import os, shutil
for root, dirs, files in os.walk(search_dir):
print '____________________________________'
print 'searching for files in', root
print ''
for f in files:
if f.endswith('.jpg'):
infile = os.path.join(root, f)
outfile = os.path.join(out_dir, f)
print 'copying', infile
shutil.copy(infile, outfile)