How to Delete Files Older Than X Days in Python
Automating disk cleanup by removing old files based on modification time is a common maintenance task for logs, backups, and temporary files.
Using pathlib (Modern Approach)
Compare file modification timestamps against a calculated cutoff:
import time
from pathlib import Path
def delete_old_files(folder: str, days: int, dry_run: bool = True) -> int:
"""Delete files older than specified days."""
folder_path = Path(folder)
cutoff = time.time() - (days * 86400) # 86400 seconds per day
deleted_count = 0
for file in folder_path.iterdir():
if file.is_file():
if file.stat().st_mtime < cutoff:
if dry_run:
print(f"Would delete: {file.name}")
else:
file.unlink()
print(f"Deleted: {file.name}")
deleted_count += 1
return deleted_count
# Preview first
count = delete_old_files("./logs", days=30, dry_run=True)
print(f"Found {count} files to delete")
# Then actually delete
# count = delete_old_files("./logs", days=30, dry_run=False)
Using datetime for Clearer Time Handling
More readable approach using datetime objects:
from datetime import datetime, timedelta
from pathlib import Path
def delete_old_files(folder: str, days: int) -> list[Path]:
"""Delete files older than specified days, return deleted paths."""
folder_path = Path(folder)
cutoff_date = datetime.now() - timedelta(days=days)
deleted = []
for file in folder_path.iterdir():
if not file.is_file():
continue
# Convert mtime to datetime
file_time = datetime.fromtimestamp(file.stat().st_mtime)
if file_time < cutoff_date:
file.unlink()
deleted.append(file)
print(f"Deleted: {file.name} (modified {file_time.date()})")
return deleted
deleted_files = delete_old_files("./cache", days=7)
print(f"Removed {len(deleted_files)} files")
Recursive Deletion with os.walk
Clean files in subdirectories as well:
import os
import time
from pathlib import Path
def delete_old_files_recursive(
folder: str,
days: int,
pattern: str = "*",
dry_run: bool = True
) -> dict:
"""Recursively delete old files matching pattern."""
cutoff = time.time() - (days * 86400)
stats = {"deleted": 0, "freed_bytes": 0, "errors": 0}
for root, dirs, files in os.walk(folder):
root_path = Path(root)
for filename in files:
file_path = root_path / filename
# Optional pattern matching
if pattern != "*" and not file_path.match(pattern):
continue
try:
file_stat = file_path.stat()
if file_stat.st_mtime < cutoff:
if dry_run:
print(f"Would delete: {file_path}")
else:
file_path.unlink()
print(f"Deleted: {file_path}")
stats["deleted"] += 1
stats["freed_bytes"] += file_stat.st_size
except OSError as e:
print(f"Error processing {file_path}: {e}")
stats["errors"] += 1
return stats
# Example to Delete old log files recursively
stats = delete_old_files_recursive(
"./logs",
days=30,
pattern="*.log",
dry_run=True
)
print(f"Files: {stats['deleted']}, Space: {stats['freed_bytes'] / 1024 / 1024:.2f} MB")
Always test with dry_run=True before actual deletion. Recursive deletion in the wrong folder can cause significant data loss.
Filtering by File Extension
Target specific file types:
from pathlib import Path
import time
def cleanup_by_extension(
folder: str,
days: int,
extensions: set[str]
) -> int:
"""Delete old files with specific extensions."""
folder_path = Path(folder)
cutoff = time.time() - (days * 86400)
deleted = 0
for file in folder_path.rglob("*"):
if not file.is_file():
continue
if file.suffix.lower() not in extensions:
continue
if file.stat().st_mtime < cutoff:
file.unlink()
deleted += 1
print(f"Deleted: {file}")
return deleted
# Clean old temporary and log files
deleted = cleanup_by_extension(
"./temp",
days=7,
extensions={".tmp", ".log", ".bak", ".cache"}
)
Keeping Recent Files Per Directory
More sophisticated cleanup that keeps N most recent files:
from pathlib import Path
from typing import Callable
def cleanup_keep_recent(
folder: str,
keep_count: int = 10,
pattern: str = "*"
) -> int:
"""Keep only the N most recent files in a folder."""
folder_path = Path(folder)
deleted = 0
files = [f for f in folder_path.glob(pattern) if f.is_file()]
# Sort by modification time (newest first)
files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
# Delete files beyond the keep count
for old_file in files[keep_count:]:
old_file.unlink()
deleted += 1
print(f"Deleted: {old_file.name}")
return deleted
# Keep only 10 most recent backups
deleted = cleanup_keep_recent("./backups", keep_count=10, pattern="*.zip")
Scheduled Cleanup Script
Complete script suitable for cron or Task Scheduler:
#!/usr/bin/env python3
"""Automated file cleanup script."""
import argparse
import logging
from datetime import datetime
from pathlib import Path
import time
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def cleanup_old_files(
folder: str,
days: int,
extensions: set[str] | None = None,
recursive: bool = False,
dry_run: bool = False
) -> dict:
"""Clean up old files with comprehensive options."""
folder_path = Path(folder)
if not folder_path.exists():
logger.error(f"Folder does not exist: {folder}")
return {"error": "Folder not found"}
cutoff = time.time() - (days * 86400)
stats = {"scanned": 0, "deleted": 0, "freed_bytes": 0}
# Choose iteration method
iterator = folder_path.rglob("*") if recursive else folder_path.iterdir()
for file_path in iterator:
if not file_path.is_file():
continue
stats["scanned"] += 1
# Filter by extension if specified
if extensions and file_path.suffix.lower() not in extensions:
continue
try:
file_stat = file_path.stat()
if file_stat.st_mtime >= cutoff:
continue
if dry_run:
logger.info(f"Would delete: {file_path}")
else:
file_path.unlink()
logger.info(f"Deleted: {file_path}")
stats["deleted"] += 1
stats["freed_bytes"] += file_stat.st_size
except PermissionError:
logger.warning(f"Permission denied: {file_path}")
except OSError as e:
logger.error(f"Error deleting {file_path}: {e}")
return stats
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Delete old files")
parser.add_argument("folder", help="Target folder")
parser.add_argument("--days", type=int, default=30, help="Age threshold in days")
parser.add_argument("--extensions", nargs="+", help="File extensions to target")
parser.add_argument("--recursive", action="store_true", help="Include subdirectories")
parser.add_argument("--dry-run", action="store_true", help="Preview without deleting")
args = parser.parse_args()
extensions = set(args.extensions) if args.extensions else None
stats = cleanup_old_files(
args.folder,
args.days,
extensions=extensions,
recursive=args.recursive,
dry_run=args.dry_run
)
freed_mb = stats["freed_bytes"] / 1024 / 1024
print(f"\nScanned: {stats['scanned']} files")
print(f"Deleted: {stats['deleted']} files")
print(f"Freed: {freed_mb:.2f} MB")
Usage:
# Preview deletion
python cleanup.py ./logs --days 30 --extensions .log .tmp --dry-run
# Actually delete
python cleanup.py ./logs --days 30 --extensions .log .tmp --recursive
File Timestamp Reference
| Attribute | Meaning | Best For |
|---|---|---|
st_mtime | Last content modification | Cleanup scripts |
st_ctime | Creation (Windows) / Metadata change (Unix) | Audit trails |
st_atime | Last access time | Detecting unused files |
Use st_mtime for most cleanup scripts since it reflects when the file content was actually modified. Access time (st_atime) may be disabled on some systems for performance reasons.
Empty Directory Cleanup
Remove empty directories after file deletion:
from pathlib import Path
def remove_empty_dirs(folder: str) -> int:
"""Remove empty directories recursively."""
folder_path = Path(folder)
removed = 0
# Walk bottom-up to remove nested empty dirs
for dir_path in sorted(folder_path.rglob("*"), reverse=True):
if dir_path.is_dir():
try:
dir_path.rmdir() # Only succeeds if empty
removed += 1
print(f"Removed empty dir: {dir_path}")
except OSError:
pass # Directory not empty
return removed
Use pathlib for simple, readable scripts and os.walk for performance-critical recursive operations. Always include dry-run functionality to prevent accidental data loss.