How to Copy Directory Trees in Python
Copying entire folder structures (including subdirectories, files, and metadata) is a common task for backups, deployments, and file synchronization. Python's shutil module provides robust functions for recursive directory copying.
Copy Entire Directory with shutil.copytree()
The copytree() function recursively copies a directory and all its contents.
import shutil
source = "./project_v1"
destination = "./project_backup"
# Copy entire directory tree
shutil.copytree(source, destination)
print(f"Copied {source} to {destination}")
By default, copytree() requires the destination directory to not exist. If it already exists, a FileExistsError is raised.
Copy into Existing Directory with dirs_exist_ok
Since Python 3.8, the dirs_exist_ok parameter allows copying into existing directories, merging contents.
import shutil
source = "./updates"
destination = "./project"
# Merge into existing directory, overwriting conflicts
shutil.copytree(source, destination, dirs_exist_ok=True)
print("Files merged successfully")
Behavior with dirs_exist_ok=True
| Scenario | Result |
|---|---|
| File exists in destination | Overwritten with source file |
| File only in source | Copied to destination |
| File only in destination | Left unchanged |
| Subdirectory exists | Merged recursively |
Filter Files with ignore Patterns
Exclude specific files or directories during copying using ignore_patterns().
import shutil
source = "./project"
destination = "./project_clean"
# Exclude compiled files, caches, and temporary files
shutil.copytree(
source,
destination,
ignore=shutil.ignore_patterns(
'*.pyc',
'__pycache__',
'*.tmp',
'.git',
'node_modules'
)
)
print("Copied without excluded patterns")
Create Custom Ignore Functions
For complex filtering logic, define a custom ignore function.
import shutil
import os
def ignore_large_files(directory, files):
"""Ignore files larger than 10MB."""
ignored = []
for file in files:
filepath = os.path.join(directory, file)
if os.path.isfile(filepath):
size_mb = os.path.getsize(filepath) / (1024 * 1024)
if size_mb > 10:
ignored.append(file)
return ignored
shutil.copytree(
"./source",
"./destination",
ignore=ignore_large_files
)
Combine Multiple Ignore Conditions
import shutil
import os
def custom_ignore(directory, files):
"""Ignore hidden files, temp files, and specific directories."""
ignored = set()
# Ignore hidden files (starting with .)
ignored.update(f for f in files if f.startswith('.'))
# Ignore temp files
ignored.update(f for f in files if f.endswith(('.tmp', '.bak')))
# Ignore specific directories
ignored.update(f for f in files if f in ['__pycache__', 'node_modules'])
return ignored
shutil.copytree("./source", "./destination", ignore=custom_ignore)
Copy Individual Files with copy2()
For granular control or custom copying logic, use copy2() which preserves file metadata.
import shutil
import os
source_dir = "./documents"
dest_dir = "./backup"
# Ensure destination exists
os.makedirs(dest_dir, exist_ok=True)
# Copy only text files
for filename in os.listdir(source_dir):
if filename.endswith(".txt"):
source_path = os.path.join(source_dir, filename)
dest_path = os.path.join(dest_dir, filename)
# copy2 preserves timestamps and metadata
shutil.copy2(source_path, dest_path)
print(f"Copied: {filename}")
Difference Between copy() and copy2()
import shutil
import os
source_file = "./data.txt"
# copy() - copies content and permissions only
shutil.copy(source_file, "./copy_basic.txt")
# copy2() - copies content, permissions, AND timestamps
shutil.copy2(source_file, "./copy_full.txt")
# Verify timestamps
original_stat = os.stat(source_file)
copy_stat = os.stat("./copy_basic.txt")
copy2_stat = os.stat("./copy_full.txt")
print(f"Original mtime: {original_stat.st_mtime}")
print(f"copy() mtime: {copy_stat.st_mtime}") # Different
print(f"copy2() mtime: {copy2_stat.st_mtime}") # Same as original
| Function | Content | Permissions | Timestamps |
|---|---|---|---|
copy() | ✅ | ✅ | ❌ |
copy2() | ✅ | ✅ | ✅ |
copyfile() | ✅ | ❌ | ❌ |
Handle Errors During Copying
Use error handling to manage permission issues or missing files.
import shutil
source = "./protected_folder"
destination = "./backup"
try:
shutil.copytree(source, destination)
print("Copy completed successfully")
except FileExistsError:
print(f"Destination {destination} already exists")
except PermissionError:
print("Permission denied - check file access rights")
except shutil.Error as e:
print(f"Copy error occurred: {e}")
except OSError as e:
print(f"OS error: {e}")
Continue Copying Despite Errors
import shutil
def copy_with_errors(src, dst):
"""Copy directory, collecting errors instead of stopping."""
errors = []
def error_handler(function, path, excinfo):
errors.append((path, excinfo[1]))
# Note: copytree doesn't have built-in error handler
# Use custom implementation for error collection
try:
shutil.copytree(src, dst, dirs_exist_ok=True)
except shutil.Error as e:
errors.extend(e.args[0])
return errors
errors = copy_with_errors("./source", "./destination")
if errors:
print(f"Completed with {len(errors)} errors:")
for path, error in errors:
print(f" {path}: {error}")
Copy with Progress Tracking
For large directories, implement progress tracking with a custom copy function.
import shutil
import os
def copytree_with_progress(src, dst):
"""Copy directory tree with progress output."""
# Count total files first
total_files = sum(len(files) for _, _, files in os.walk(src))
copied = 0
def copy_function(source, destination):
nonlocal copied
shutil.copy2(source, destination)
copied += 1
print(f"\rProgress: {copied}/{total_files} files", end="")
shutil.copytree(src, dst, copy_function=copy_function)
print(f"\nCompleted: {copied} files copied")
copytree_with_progress("./large_project", "./backup")
For very large directories, consider using rsync via subprocess for better performance and resume capability.
Quick Reference
| Task | Function | Example |
|---|---|---|
| Copy directory recursively | copytree(src, dst) | Full backup |
| Merge into existing | copytree(src, dst, dirs_exist_ok=True) | Update/sync |
| Exclude patterns | copytree(src, dst, ignore=ignore_patterns(...)) | Skip cache files |
| Copy single file with metadata | copy2(src, dst) | Preserve timestamps |
| Copy single file basic | copy(src, dst) | Content only |
Conclusion
Use shutil.copytree() for recursive directory copying. It handles the complexity of walking directory trees and preserving structure. Add dirs_exist_ok=True for merge operations and ignore_patterns() to filter unwanted files. For individual files, prefer copy2() over copy() to preserve timestamps and metadata, which is essential for backup integrity and file synchronization.