Skip to main content

How to Implement File Versioning in Python

File versioning creates backups before modifications, preventing data loss and enabling rollback. Python's standard library provides all the tools needed for robust versioning systems.

Timestamp-Based Versioning

Append datetime to filenames for automatic, chronological backups:

import shutil
from datetime import datetime
from pathlib import Path

def backup_with_timestamp(filepath):
"""Create timestamped backup of file."""
p = Path(filepath)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{p.stem}_{timestamp}{p.suffix}"
backup_path = p.parent / backup_name

shutil.copy2(p, backup_path)
return backup_path

# Usage
backup_with_timestamp("report.csv")
# Creates: report_20260115_093022.csv

This approach guarantees unique filenames and makes sorting by date trivial.

Incremental Version Numbers

For user-facing documents, sequential version numbers are more intuitive:

from pathlib import Path
import shutil

def get_next_version(filepath):
"""Find next available version number."""
p = Path(filepath)
version = 1

while True:
versioned_name = f"{p.stem}_v{version}{p.suffix}"
versioned_path = p.parent / versioned_name

if not versioned_path.exists():
return versioned_path
version += 1

def save_new_version(filepath):
"""Save file as next version."""
original = Path(filepath)
new_path = get_next_version(filepath)
shutil.copy2(original, new_path)
return new_path

# Usage
save_new_version("document.pdf")
# Creates: document_v1.pdf, document_v2.pdf, etc.
tip

Use zero-padded version numbers (v001, v002) if you expect many versions. This ensures proper alphabetical sorting in file managers.

versioned_name = f"{p.stem}_v{version:03d}{p.suffix}"
# document_v001.pdf, document_v002.pdf

Automatic Backup on Modification

Create backups automatically before any file write:

from pathlib import Path
from datetime import datetime
import shutil

class VersionedFile:
"""File wrapper that creates backups before writes."""

def __init__(self, filepath, backup_dir=None):
self.path = Path(filepath)
self.backup_dir = Path(backup_dir) if backup_dir else self.path.parent / "backups"

def backup(self):
"""Create backup of current file."""
if not self.path.exists():
return None

self.backup_dir.mkdir(exist_ok=True)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{self.path.stem}_{timestamp}{self.path.suffix}"
backup_path = self.backup_dir / backup_name

shutil.copy2(self.path, backup_path)
return backup_path

def write(self, content):
"""Backup existing file, then write new content."""
self.backup()
self.path.write_text(content)

def list_versions(self):
"""List all backup versions."""
if not self.backup_dir.exists():
return []

pattern = f"{self.path.stem}_*{self.path.suffix}"
return sorted(self.backup_dir.glob(pattern), reverse=True)

# Usage
vf = VersionedFile("config.json")
vf.write('{"setting": "new_value"}')

for version in vf.list_versions():
print(version)

Limiting Backup Count

Prevent disk space issues by removing old backups:

from pathlib import Path
import shutil
from datetime import datetime

def backup_with_limit(filepath, max_backups=5):
"""Create backup, keeping only the most recent versions."""
p = Path(filepath)
backup_dir = p.parent / "backups"
backup_dir.mkdir(exist_ok=True)

# Create new backup
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{p.stem}_{timestamp}{p.suffix}"
shutil.copy2(p, backup_dir / backup_name)

# Remove old backups
pattern = f"{p.stem}_*{p.suffix}"
backups = sorted(backup_dir.glob(pattern), reverse=True)

for old_backup in backups[max_backups:]:
old_backup.unlink()
print(f"Removed old backup: {old_backup.name}")

backup_with_limit("data.db", max_backups=3)
warning

Always verify backups exist before deleting originals. File operations can fail due to permissions, disk space, or concurrent access.

Restoring from Backup

Implement restore functionality to complete the versioning system:

from pathlib import Path
import shutil

def list_backups(filepath):
"""List available backups for a file."""
p = Path(filepath)
backup_dir = p.parent / "backups"

if not backup_dir.exists():
return []

pattern = f"{p.stem}_*{p.suffix}"
return sorted(backup_dir.glob(pattern), reverse=True)

def restore_backup(filepath, backup_path):
"""Restore file from specific backup."""
p = Path(filepath)
backup = Path(backup_path)

if not backup.exists():
raise FileNotFoundError(f"Backup not found: {backup}")

# Backup current before restoring
if p.exists():
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
pre_restore = p.parent / "backups" / f"{p.stem}_prerestore_{timestamp}{p.suffix}"
shutil.copy2(p, pre_restore)

shutil.copy2(backup, p)
return p

# Usage
backups = list_backups("config.json")
if backups:
print(f"Latest backup: {backups[0]}")
restore_backup("config.json", backups[0])

Why Use copy2?

The shutil.copy2 function preserves file metadata:

import shutil

# copy - copies content only
shutil.copy("file.txt", "file_copy.txt")

# copy2 - copies content AND metadata
shutil.copy2("file.txt", "file_backup.txt")
# Preserves: modification time, permissions, flags
FunctionContentPermissionsTimestamps
copy
copy2
copyfile

Versioning Strategy Comparison

MethodBest ForFilename Example
TimestampAutomated backups, logsreport_20240115_093022.csv
IncrementalUser documents, releasesdocument_v3.pdf
Hash-basedDeduplication, Git-stylea1b2c3d4.blob

Summary

Use timestamp-based versioning for automated systems where chronological order matters. Use incremental version numbers for user-facing files where simplicity is preferred. Always use shutil.copy2 to preserve metadata, and implement backup limits to prevent disk space issues.