Skip to main content

How to Compare Two Files Line-by-Line in Python

Comparing text files is a common task in DevOps, testing, and configuration management. Python provides two main approaches: a manual loop for speed and custom logic, or difflib for generating human-readable diff reports.

This guide covers both methods along with handling edge cases like files of different lengths.

Consider the two following files for the example in this guide:

# Sample configuration
host = 127.0.0.1
port = 8080
debug = True
max_connections = 100
log_level = INFO
# Sample configuration
host = 127.0.0.1
port = 9090
debug = True
max_connections = 100
log_level = DEBUG

The zip() Loop (Fast and Memory Efficient)

When you need to know if files differ or find the first difference, use zip() to stream both files simultaneously:

def compare_lines(file1, file2):
"""Compare two files line by line, stop at first difference."""
with open(file1) as f1, open(file2) as f2:
for line_num, (line1, line2) in enumerate(zip(f1, f2), start=1):
if line1 != line2:
print(f"Difference at line {line_num}:")
print(f" < {line1.strip()}")
print(f" > {line2.strip()}")
return False

print("Files are identical.")
return True

# Usage
compare_lines("config_1.txt", "config_2.txt")

Output:

Difference at line 3:
< port = 8080
> port = 9090
zip() Stops at Shortest File

The zip() function stops when the shorter file ends. If one file has extra lines, they won't be compared. See the next section for handling different file lengths.

Handling Files of Different Lengths

Use itertools.zip_longest to compare files that may have different line counts:

from itertools import zip_longest

def compare_files_full(file1, file2):
"""Compare files completely, including length differences."""
with open(file1) as f1, open(file2) as f2:
differences = []

for line_num, (line1, line2) in enumerate(
zip_longest(f1, f2, fillvalue=None), start=1
):
if line1 is None:
differences.append((line_num, "ADDED", line2.strip()))
elif line2 is None:
differences.append((line_num, "REMOVED", line1.strip()))
elif line1 != line2:
differences.append((line_num, "CHANGED", line1.strip(), line2.strip()))

return differences

# Usage
diffs = compare_files_full("config_1.txt", "config_2.txt")

if not diffs:
print("Files are identical")
else:
for diff in diffs:
if diff[1] == "CHANGED":
print(f"Line {diff[0]}: Changed")
print(f" - {diff[2]}")
print(f" + {diff[3]}")
elif diff[1] == "ADDED":
print(f"Line {diff[0]}: Added")
print(f" + {diff[2]}")
else:
print(f"Line {diff[0]}: Removed")
print(f" - {diff[2]}")

Output:

Line 3: Changed
- port = 8080
+ port = 9090
Line 6: Changed
- log_level = INFO
+ log_level = DEBUG

Using difflib for Diff Reports

For Git-style diff output that's familiar to developers, use the standard library difflib:

import difflib

def print_unified_diff(file1, file2):
"""Generate unified diff output like git diff."""
with open(file1) as f1, open(file2) as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()

diff = difflib.unified_diff(
lines1,
lines2,
fromfile=file1,
tofile=file2,
lineterm=''
)

output = list(diff)
if output:
print('\n'.join(output))
else:
print("Files are identical")


print_unified_diff("config_1.txt", "config_2.txt")

Output:

--- config_1.txt
+++ config_2.txt
@@ -1,6 +1,6 @@
# Sample configuration

host = 127.0.0.1

-port = 8080

+port = 9090

debug = True

max_connections = 100

-log_level = INFO

+log_level = DEBUG
note

difflib reads entire files into memory. For very large files, prefer the streaming zip() approach.

Side-by-Side Comparison

For a visual side-by-side view:

import difflib

def side_by_side_diff(file1, file2):
"""Generate HTML side-by-side comparison."""
with open(file1) as f1, open(file2) as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()

differ = difflib.HtmlDiff()
html = differ.make_file(
lines1,
lines2,
fromdesc=file1,
todesc=file2
)

with open("diff_report.html", "w") as out:
out.write(html)

print("Diff report saved to diff_report.html")

# For terminal output, use context_diff
def context_diff(file1, file2):
"""Show differences with surrounding context."""
with open(file1) as f1, open(file2) as f2:
diff = difflib.context_diff(
f1.readlines(),
f2.readlines(),
fromfile=file1,
tofile=file2,
n=2 # Lines of context
)

for line in diff:
print(line, end='')

side_by_side_diff("config_1.txt", "config_2.txt")
context_diff("config_1.txt", "config_2.txt")

Output:

side_by_side_diff("config_1.txt", "config_2.txt")
context_diff("config_1.txt", "config_2.txt")

Ignoring Whitespace Differences

Sometimes you want to compare content while ignoring formatting:

from itertools import zip_longest

def compare_ignore_whitespace(file1, file2):
"""Compare files ignoring leading/trailing whitespace."""
with open(file1) as f1, open(file2) as f2:
for line_num, (line1, line2) in enumerate(
zip_longest(f1, f2, fillvalue=''), start=1
):
if line1.strip() != line2.strip():
print(f"First difference at line {line_num}:")
print(f" < {line1.rstrip()}")
print(f" > {line2.rstrip()}")
return False

print("Files are identical (ignoring leading/trailing whitespace).")
return True


def compare_ignore_blank_lines(file1, file2):
"""Compare files ignoring blank lines entirely."""
with open(file1) as f1, open(file2) as f2:
lines1 = [l.rstrip() for l in f1 if l.strip()]
lines2 = [l.rstrip() for l in f2 if l.strip()]

if lines1 == lines2:
print("Files are identical (ignoring blank lines).")
return True

# Show first difference
for i, (line1, line2) in enumerate(
zip_longest(lines1, lines2, fillvalue=''), start=1
):
if line1 != line2:
print(f"First Difference at line {i}:")
print(f" < {line1}")
print(f" > {line2}")
break

return False


# Usage
compare_ignore_whitespace("config_1.txt", "config_2.txt")
print()
compare_ignore_blank_lines("config_1.txt", "config_2.txt")

Output:

First difference at line 3:
< port = 8080
> port = 9090

First Difference at logical line 3:
< port = 8080
> port = 9090

Finding Similar Lines with Ratios

Use SequenceMatcher to find how similar two files are:

import difflib

def similarity_ratio(file1, file2):
"""Calculate similarity percentage between files."""
with open(file1) as f1, open(file2) as f2:
content1 = f1.read()
content2 = f2.read()

ratio = difflib.SequenceMatcher(None, content1, content2).ratio()
return ratio * 100

# Usage
similarity = similarity_ratio("config_1.txt", "config_2.txt")
print(f"Files are {similarity:.1f}% similar")

Output:

Files are 93.8% similar

Practical Example: Config File Comparison

A complete example for comparing configuration files:

import difflib
from pathlib import Path

def compare_configs(file1, file2, ignore_comments=True):
"""Compare config files with optional comment filtering."""

def read_config_lines(filepath):
with open(filepath) as f:
lines = f.readlines()
if ignore_comments:
lines = [l for l in lines if not l.strip().startswith('#')]
return lines

lines1 = read_config_lines(file1)
lines2 = read_config_lines(file2)

diff = list(difflib.unified_diff(
lines1, lines2,
fromfile=str(file1),
tofile=str(file2)
))

if diff:
print("Configuration differences found:")
print(''.join(diff))
return False
else:
print("Configurations are equivalent")
return True

# Usage
compare_configs("config_1.txt", "config_2.txt")

Output:

Configuration differences found:
--- config_1.txt
+++ config_2.txt
@@ -1,5 +1,5 @@
host = 127.0.0.1
-port = 8080
+port = 9090
debug = True
max_connections = 100
-log_level = INFO
+log_level = DEBUG

Summary

MethodMemory UsageOutput TypeBest For
zip() loopLow (streaming)Boolean / CustomQuick checks, large files
zip_longest()Low (streaming)CustomFiles of different lengths
difflib.unified_diffHigh (loads file)Standard diffDeveloper-friendly reports
difflib.HtmlDiffHighHTML reportVisual comparison
Best Practice

For simple checks ("Are these configs identical?"), use a zip() loop for memory efficiency and full control. For displaying changes to users or generating reports, use difflib.unified_diff which produces familiar Git-style output.