How to Implement File Versioning in Python
File versioning creates backups before modifications, preventing data loss and enabling rollback. Python's standard library provides all the tools needed for robust versioning systems.
Timestamp-Based Versioning
Append datetime to filenames for automatic, chronological backups:
import shutil
from datetime import datetime
from pathlib import Path
def backup_with_timestamp(filepath):
"""Create timestamped backup of file."""
p = Path(filepath)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{p.stem}_{timestamp}{p.suffix}"
backup_path = p.parent / backup_name
shutil.copy2(p, backup_path)
return backup_path
# Usage
backup_with_timestamp("report.csv")
# Creates: report_20260115_093022.csv
This approach guarantees unique filenames and makes sorting by date trivial.
Incremental Version Numbers
For user-facing documents, sequential version numbers are more intuitive:
from pathlib import Path
import shutil
def get_next_version(filepath):
"""Find next available version number."""
p = Path(filepath)
version = 1
while True:
versioned_name = f"{p.stem}_v{version}{p.suffix}"
versioned_path = p.parent / versioned_name
if not versioned_path.exists():
return versioned_path
version += 1
def save_new_version(filepath):
"""Save file as next version."""
original = Path(filepath)
new_path = get_next_version(filepath)
shutil.copy2(original, new_path)
return new_path
# Usage
save_new_version("document.pdf")
# Creates: document_v1.pdf, document_v2.pdf, etc.
Use zero-padded version numbers (v001, v002) if you expect many versions. This ensures proper alphabetical sorting in file managers.
versioned_name = f"{p.stem}_v{version:03d}{p.suffix}"
# document_v001.pdf, document_v002.pdf
Automatic Backup on Modification
Create backups automatically before any file write:
from pathlib import Path
from datetime import datetime
import shutil
class VersionedFile:
"""File wrapper that creates backups before writes."""
def __init__(self, filepath, backup_dir=None):
self.path = Path(filepath)
self.backup_dir = Path(backup_dir) if backup_dir else self.path.parent / "backups"
def backup(self):
"""Create backup of current file."""
if not self.path.exists():
return None
self.backup_dir.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{self.path.stem}_{timestamp}{self.path.suffix}"
backup_path = self.backup_dir / backup_name
shutil.copy2(self.path, backup_path)
return backup_path
def write(self, content):
"""Backup existing file, then write new content."""
self.backup()
self.path.write_text(content)
def list_versions(self):
"""List all backup versions."""
if not self.backup_dir.exists():
return []
pattern = f"{self.path.stem}_*{self.path.suffix}"
return sorted(self.backup_dir.glob(pattern), reverse=True)
# Usage
vf = VersionedFile("config.json")
vf.write('{"setting": "new_value"}')
for version in vf.list_versions():
print(version)
Limiting Backup Count
Prevent disk space issues by removing old backups:
from pathlib import Path
import shutil
from datetime import datetime
def backup_with_limit(filepath, max_backups=5):
"""Create backup, keeping only the most recent versions."""
p = Path(filepath)
backup_dir = p.parent / "backups"
backup_dir.mkdir(exist_ok=True)
# Create new backup
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{p.stem}_{timestamp}{p.suffix}"
shutil.copy2(p, backup_dir / backup_name)
# Remove old backups
pattern = f"{p.stem}_*{p.suffix}"
backups = sorted(backup_dir.glob(pattern), reverse=True)
for old_backup in backups[max_backups:]:
old_backup.unlink()
print(f"Removed old backup: {old_backup.name}")
backup_with_limit("data.db", max_backups=3)
Always verify backups exist before deleting originals. File operations can fail due to permissions, disk space, or concurrent access.
Restoring from Backup
Implement restore functionality to complete the versioning system:
from pathlib import Path
import shutil
def list_backups(filepath):
"""List available backups for a file."""
p = Path(filepath)
backup_dir = p.parent / "backups"
if not backup_dir.exists():
return []
pattern = f"{p.stem}_*{p.suffix}"
return sorted(backup_dir.glob(pattern), reverse=True)
def restore_backup(filepath, backup_path):
"""Restore file from specific backup."""
p = Path(filepath)
backup = Path(backup_path)
if not backup.exists():
raise FileNotFoundError(f"Backup not found: {backup}")
# Backup current before restoring
if p.exists():
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
pre_restore = p.parent / "backups" / f"{p.stem}_prerestore_{timestamp}{p.suffix}"
shutil.copy2(p, pre_restore)
shutil.copy2(backup, p)
return p
# Usage
backups = list_backups("config.json")
if backups:
print(f"Latest backup: {backups[0]}")
restore_backup("config.json", backups[0])
Why Use copy2?
The shutil.copy2 function preserves file metadata:
import shutil
# copy - copies content only
shutil.copy("file.txt", "file_copy.txt")
# copy2 - copies content AND metadata
shutil.copy2("file.txt", "file_backup.txt")
# Preserves: modification time, permissions, flags
| Function | Content | Permissions | Timestamps |
|---|---|---|---|
copy | ✅ | ✅ | ❌ |
copy2 | ✅ | ✅ | ✅ |
copyfile | ✅ | ❌ | ❌ |
Versioning Strategy Comparison
| Method | Best For | Filename Example |
|---|---|---|
| Timestamp | Automated backups, logs | report_20240115_093022.csv |
| Incremental | User documents, releases | document_v3.pdf |
| Hash-based | Deduplication, Git-style | a1b2c3d4.blob |
Summary
Use timestamp-based versioning for automated systems where chronological order matters. Use incremental version numbers for user-facing files where simplicity is preferred. Always use shutil.copy2 to preserve metadata, and implement backup limits to prevent disk space issues.