AnsweredAssumed Answered

Python Shutil.copytree: Dont copy empty directories

Question asked by Playa on Feb 27, 2017
Latest reply on Feb 27, 2017 by Dan_Patterson
from fnmatch import fnmatch, filter
from os.path import isdir, join
from shutil import copytree

def include_patterns(*patterns):
    """Factory function that can be used with copytree() ignore parameter.

    Arguments define a sequence of glob-style patterns
    that are used to specify what files to NOT ignore.
    Creates and returns a function that determines this for each directory
    in the file hierarchy rooted at the source directory when used with
    shutil.copytree().
    """

    def _ignore_patterns(path, names):
        keep = set(name for pattern in patterns
                            for name in filter(names, pattern))
        ignore = set(name for name in names
                        if name not in keep and not isdir(join(path, name)))
        return ignore
    return _ignore_patterns

# sample usage

copytree(src_directory, dst_directory,
         ignore=include_patterns('*.dwg', '*.dxf'))

I have found the following Python Factory Function on Stack Overflow that uses Python shutil.copytree to copy the directory and files based on the inverse of the ignore argument. It adds all file types that are not part of the include_patterns("*.dwg", "*.dxf"). The problem is that it's copying directories where there are no CAD files and resulting in empty directories.

 

The link to the Stack Overflow post is: post

 

Solution:

I've found a very simple solution of removing the empty directories with some amendments.

 

'''
Created on 27 Feb 2017

Extract CAD files

from within all

directories

@author: PeterW
'''

# import site-packages and modules
from fnmatch import filter
from os.path import isdir, join
from shutil import copytree, rmtree
from os import listdir, rmdir, walk
import errno

# set arguments
input_folder = r"H:\Tanzania"
output_folder = r"E:\CAD"


# create ignore patterns dynamically based on include patterns
def include_patterns(*patterns):
    def _ignore_patterns(path, names):
        keep = set(name for pattern in patterns for name in filter(names, pattern))
        ignore = set(name for name in names if name not in keep and not isdir(join(path, name)))
        return ignore
    return _ignore_patterns


# copy directory structure and files based on pattern
def copytree_files(input_folder, output_folder):
    if isdir(output_folder):
        rmtree(output_folder)
    copytree(input_folder, output_folder, ignore=include_patterns("*.dwg", "*.dxf"))

copytree_files(input_folder, output_folder)


# remove empty directories
def remove_empty_dirs(output_folder):
    dirs = [x[0] for x in walk(output_folder, topdown=False)]
    for dir in dirs:
        try:
            rmdir(dir)
        except Exception as e:
            if e.errno == errno.ENOTEMPTY:
                print("Directory: {0} not empty".format(dir))

remove_empty_dirs(output_folder)

 

I found an additional solution that could work from a solution provided on Stack Overflow, but due to time constraints I wasn't able to get it to work: How do I copy an entire directory of files into an existing directory using Python?

 

Mital Vora provided the following solution:

 

def copytree(src, dst, symlinks=False, ignore=None):
    if not os.path.exists(dst):
        os.makedirs(dst)
    for item in os.listdir(src):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            copytree(s, d, symlinks, ignore)
        else:
            if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
                shutil.copy2(s, d)

 

The idea of the following was to create a list of directories that contain CAD drawings thereby eliminating the need of removing empty directories afterwards:

 

Incomplete Solution:

'''
Created on 27 Feb 2017

Extract CAD files

from within all

directories

@author: PeterW
'''

# import site-packages and modules
from fnmatch import fnmatch, filter
import os
from os.path import isdir, join
from shutil import copytree, copy2

# set arguments
input_folder = r"H:\Tanzania\fromDropbox\Infrastructure"
output_folder = r"E:\CAD"

def directories_list(input_folder):
    dirs_list = []
    for root, dirs, files in os.walk(input_folder):
        if any(file.endswith('.dwg') or file.endswith('.dxf') for file in files):
            dirs_list.append(os.path.abspath(root))
    return dirs_list

dirs_list = directories_list(input_folder)

# function
def include_patterns(*patterns):
    def _ignore_patterns(path, names):
        keep = set(name for pattern in patterns for name in filter(names, pattern))
        ignore = set(name for name in names if name not in keep and not isdir(join(path, name)))
        return ignore
    return _ignore_patterns


# relook at the following, its not working as expected
# due to copytree limitation of existing directory exists
# after first loop
def copytree_files(dirs_list, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    for dir in dirs_list:
        for item in os.listdir(dir):
            s = os.path.join(dir, item)
            d = os.path.join(output_folder, item)
            if os.path.isdir(s):
                copytree(s, d, ignore=include_patterns('*.dwg', "*.dxf"))
            else:
                if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
                    copy2(s, d)

copytree_files(dirs_list, input_folder)

Outcomes