Skip to main content

How to Delete Files Older Than X Days in Python

Automating disk cleanup by removing old files based on modification time is a common maintenance task for logs, backups, and temporary files.

Using pathlib (Modern Approach)

Compare file modification timestamps against a calculated cutoff:

import time
from pathlib import Path

def delete_old_files(folder: str, days: int, dry_run: bool = True) -> int:
"""Delete files older than specified days."""
folder_path = Path(folder)
cutoff = time.time() - (days * 86400) # 86400 seconds per day
deleted_count = 0

for file in folder_path.iterdir():
if file.is_file():
if file.stat().st_mtime < cutoff:
if dry_run:
print(f"Would delete: {file.name}")
else:
file.unlink()
print(f"Deleted: {file.name}")
deleted_count += 1

return deleted_count

# Preview first
count = delete_old_files("./logs", days=30, dry_run=True)
print(f"Found {count} files to delete")

# Then actually delete
# count = delete_old_files("./logs", days=30, dry_run=False)

Using datetime for Clearer Time Handling

More readable approach using datetime objects:

from datetime import datetime, timedelta
from pathlib import Path

def delete_old_files(folder: str, days: int) -> list[Path]:
"""Delete files older than specified days, return deleted paths."""
folder_path = Path(folder)
cutoff_date = datetime.now() - timedelta(days=days)
deleted = []

for file in folder_path.iterdir():
if not file.is_file():
continue

# Convert mtime to datetime
file_time = datetime.fromtimestamp(file.stat().st_mtime)

if file_time < cutoff_date:
file.unlink()
deleted.append(file)
print(f"Deleted: {file.name} (modified {file_time.date()})")

return deleted

deleted_files = delete_old_files("./cache", days=7)
print(f"Removed {len(deleted_files)} files")

Recursive Deletion with os.walk

Clean files in subdirectories as well:

import os
import time
from pathlib import Path

def delete_old_files_recursive(
folder: str,
days: int,
pattern: str = "*",
dry_run: bool = True
) -> dict:
"""Recursively delete old files matching pattern."""
cutoff = time.time() - (days * 86400)
stats = {"deleted": 0, "freed_bytes": 0, "errors": 0}

for root, dirs, files in os.walk(folder):
root_path = Path(root)

for filename in files:
file_path = root_path / filename

# Optional pattern matching
if pattern != "*" and not file_path.match(pattern):
continue

try:
file_stat = file_path.stat()

if file_stat.st_mtime < cutoff:
if dry_run:
print(f"Would delete: {file_path}")
else:
file_path.unlink()
print(f"Deleted: {file_path}")

stats["deleted"] += 1
stats["freed_bytes"] += file_stat.st_size

except OSError as e:
print(f"Error processing {file_path}: {e}")
stats["errors"] += 1

return stats

# Example to Delete old log files recursively
stats = delete_old_files_recursive(
"./logs",
days=30,
pattern="*.log",
dry_run=True
)

print(f"Files: {stats['deleted']}, Space: {stats['freed_bytes'] / 1024 / 1024:.2f} MB")
warning

Always test with dry_run=True before actual deletion. Recursive deletion in the wrong folder can cause significant data loss.

Filtering by File Extension

Target specific file types:

from pathlib import Path
import time

def cleanup_by_extension(
folder: str,
days: int,
extensions: set[str]
) -> int:
"""Delete old files with specific extensions."""
folder_path = Path(folder)
cutoff = time.time() - (days * 86400)
deleted = 0

for file in folder_path.rglob("*"):
if not file.is_file():
continue

if file.suffix.lower() not in extensions:
continue

if file.stat().st_mtime < cutoff:
file.unlink()
deleted += 1
print(f"Deleted: {file}")

return deleted

# Clean old temporary and log files
deleted = cleanup_by_extension(
"./temp",
days=7,
extensions={".tmp", ".log", ".bak", ".cache"}
)

Keeping Recent Files Per Directory

More sophisticated cleanup that keeps N most recent files:

from pathlib import Path
from typing import Callable

def cleanup_keep_recent(
folder: str,
keep_count: int = 10,
pattern: str = "*"
) -> int:
"""Keep only the N most recent files in a folder."""
folder_path = Path(folder)
deleted = 0

files = [f for f in folder_path.glob(pattern) if f.is_file()]

# Sort by modification time (newest first)
files.sort(key=lambda f: f.stat().st_mtime, reverse=True)

# Delete files beyond the keep count
for old_file in files[keep_count:]:
old_file.unlink()
deleted += 1
print(f"Deleted: {old_file.name}")

return deleted

# Keep only 10 most recent backups
deleted = cleanup_keep_recent("./backups", keep_count=10, pattern="*.zip")

Scheduled Cleanup Script

Complete script suitable for cron or Task Scheduler:

#!/usr/bin/env python3
"""Automated file cleanup script."""

import argparse
import logging
from datetime import datetime
from pathlib import Path
import time

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

def cleanup_old_files(
folder: str,
days: int,
extensions: set[str] | None = None,
recursive: bool = False,
dry_run: bool = False
) -> dict:
"""Clean up old files with comprehensive options."""
folder_path = Path(folder)

if not folder_path.exists():
logger.error(f"Folder does not exist: {folder}")
return {"error": "Folder not found"}

cutoff = time.time() - (days * 86400)
stats = {"scanned": 0, "deleted": 0, "freed_bytes": 0}

# Choose iteration method
iterator = folder_path.rglob("*") if recursive else folder_path.iterdir()

for file_path in iterator:
if not file_path.is_file():
continue

stats["scanned"] += 1

# Filter by extension if specified
if extensions and file_path.suffix.lower() not in extensions:
continue

try:
file_stat = file_path.stat()

if file_stat.st_mtime >= cutoff:
continue

if dry_run:
logger.info(f"Would delete: {file_path}")
else:
file_path.unlink()
logger.info(f"Deleted: {file_path}")

stats["deleted"] += 1
stats["freed_bytes"] += file_stat.st_size

except PermissionError:
logger.warning(f"Permission denied: {file_path}")
except OSError as e:
logger.error(f"Error deleting {file_path}: {e}")

return stats

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Delete old files")
parser.add_argument("folder", help="Target folder")
parser.add_argument("--days", type=int, default=30, help="Age threshold in days")
parser.add_argument("--extensions", nargs="+", help="File extensions to target")
parser.add_argument("--recursive", action="store_true", help="Include subdirectories")
parser.add_argument("--dry-run", action="store_true", help="Preview without deleting")

args = parser.parse_args()

extensions = set(args.extensions) if args.extensions else None

stats = cleanup_old_files(
args.folder,
args.days,
extensions=extensions,
recursive=args.recursive,
dry_run=args.dry_run
)

freed_mb = stats["freed_bytes"] / 1024 / 1024
print(f"\nScanned: {stats['scanned']} files")
print(f"Deleted: {stats['deleted']} files")
print(f"Freed: {freed_mb:.2f} MB")

Usage:

# Preview deletion
python cleanup.py ./logs --days 30 --extensions .log .tmp --dry-run

# Actually delete
python cleanup.py ./logs --days 30 --extensions .log .tmp --recursive

File Timestamp Reference

AttributeMeaningBest For
st_mtimeLast content modificationCleanup scripts
st_ctimeCreation (Windows) / Metadata change (Unix)Audit trails
st_atimeLast access timeDetecting unused files
tip

Use st_mtime for most cleanup scripts since it reflects when the file content was actually modified. Access time (st_atime) may be disabled on some systems for performance reasons.

Empty Directory Cleanup

Remove empty directories after file deletion:

from pathlib import Path

def remove_empty_dirs(folder: str) -> int:
"""Remove empty directories recursively."""
folder_path = Path(folder)
removed = 0

# Walk bottom-up to remove nested empty dirs
for dir_path in sorted(folder_path.rglob("*"), reverse=True):
if dir_path.is_dir():
try:
dir_path.rmdir() # Only succeeds if empty
removed += 1
print(f"Removed empty dir: {dir_path}")
except OSError:
pass # Directory not empty

return removed

Use pathlib for simple, readable scripts and os.walk for performance-critical recursive operations. Always include dry-run functionality to prevent accidental data loss.