Skip to main content

How to Convert Text File to CSV in Python

Text files from logs, legacy systems, or data exports often contain structured data that needs standardization for analysis. Converting to CSV format enables compatibility with spreadsheets, databases, and data processing tools.

Convert Delimited Text Files with Pandas

Pandas provides robust parsing with automatic type detection, delimiter handling, and encoding management.

import pandas as pd

# Read tab-separated text file
df = pd.read_csv("data.txt", sep="\t")

# Save as standard CSV
df.to_csv("data.csv", index=False)

print(f"Converted {len(df)} rows")

Handle Various Delimiters

import pandas as pd

# Pipe-delimited file
df_pipe = pd.read_csv("data.txt", sep="|")

# Semicolon-delimited (common in European exports)
df_semi = pd.read_csv("data.txt", sep=";")

# Multiple whitespace as delimiter
df_space = pd.read_csv("data.txt", sep=r"\s+", engine="python")

# Auto-detect delimiter
df_auto = pd.read_csv("data.txt", sep=None, engine="python")
tip

Use sep=None with engine="python" to let Pandas automatically detect the delimiter. This works well for unknown file formats.

Parse Fixed-Width Text Files

Legacy mainframe exports and report files often use fixed-width columns aligned by spaces.

ID    Name        Department  Salary
001 Alice Engineering 75000
002 Bob Marketing 65000
003 Charlie Sales 70000
import pandas as pd

# Pandas auto-detects column widths
df = pd.read_fwf("legacy_report.txt")

print(df)
# ID Name Department Salary
# 0 1 Alice Engineering 75000
# 1 2 Bob Marketing 65000
# 2 3 Charlie Sales 70000

# Save as CSV
df.to_csv("legacy_report.csv", index=False)

Specify Column Widths Manually

import pandas as pd

# Define exact column positions
col_specs = [
(0, 5), # ID: columns 0-5
(6, 18), # Name: columns 6-18
(19, 31), # Department: columns 19-31
(32, 40) # Salary: columns 32-40
]

df = pd.read_fwf(
"legacy_report.txt",
colspecs=col_specs,
names=["ID", "Name", "Department", "Salary"]
)

Convert Using Standard Library

For lightweight conversions without external dependencies, use Python's built-in csv module.

import csv

with open('input.txt', 'r') as infile, \
open('output.csv', 'w', newline='') as outfile:

# Configure reader for source format (pipe-delimited)
reader = csv.reader(infile, delimiter='|')

# Standard CSV writer
writer = csv.writer(outfile)

# Transfer all rows
writer.writerows(reader)

print("Conversion complete")

Process and Transform During Conversion

import csv

with open('input.txt', 'r') as infile, \
open('output.csv', 'w', newline='') as outfile:

reader = csv.reader(infile, delimiter='\t')
writer = csv.writer(outfile)

# Write header
header = next(reader)
writer.writerow(header)

# Process each row
for row in reader:
# Skip empty rows
if not any(row):
continue

# Clean whitespace from each field
cleaned = [field.strip() for field in row]
writer.writerow(cleaned)

Parse Log Files with Custom Patterns

Log files require regex-based parsing to extract structured data.

import re
import csv

# Sample log format: [2024-01-15 10:30:45] INFO: User login - user=alice ip=192.168.1.1
log_pattern = r'\[([\d-]+ [\d:]+)\] (\w+): (.+) - user=(\w+) ip=([\d.]+)'

with open('app.log', 'r') as infile, \
open('logs.csv', 'w', newline='') as outfile:

writer = csv.writer(outfile)
writer.writerow(['timestamp', 'level', 'message', 'user', 'ip'])

for line in infile:
match = re.match(log_pattern, line)
if match:
writer.writerow(match.groups())

Parse Common Log Format (Web Server Logs)

import pandas as pd
import re

# Apache/Nginx combined log format
log_pattern = r'(\S+) \S+ \S+ \[(.*?)\] "(.*?)" (\d+) (\d+)'

records = []

with open('access.log', 'r') as f:
for line in f:
match = re.match(log_pattern, line)
if match:
records.append({
'ip': match.group(1),
'timestamp': match.group(2),
'request': match.group(3),
'status': int(match.group(4)),
'bytes': int(match.group(5))
})

df = pd.DataFrame(records)
df.to_csv('access_logs.csv', index=False)

print(f"Parsed {len(df)} log entries")

Handle Encoding Issues

Text files from different systems may use various encodings.

import pandas as pd

# Try common encodings
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']

for encoding in encodings:
try:
df = pd.read_csv("data.txt", sep="\t", encoding=encoding)
print(f"Successfully read with {encoding}")
break
except UnicodeDecodeError:
continue

# Save as UTF-8 CSV (universal compatibility)
df.to_csv("data.csv", index=False, encoding='utf-8')
warning

Windows systems often produce files in cp1252 encoding. If you see garbled characters after reading with utf-8, try cp1252 or latin-1.

Handle Files Without Headers

import pandas as pd

# Specify that file has no header row
df = pd.read_csv(
"data.txt",
sep="\t",
header=None,
names=["ID", "Name", "Value", "Date"]
)

df.to_csv("data.csv", index=False)

Skip Metadata and Comments

Many text files include header comments or metadata before the actual data.

import pandas as pd

# Skip first 5 lines of metadata
df = pd.read_csv("data.txt", sep="\t", skiprows=5)

# Skip comment lines
df = pd.read_csv("data.txt", sep="\t", comment='#')

# Skip footer lines
df = pd.read_csv("data.txt", sep="\t", skipfooter=3, engine='python')

Quick Reference

Text FormatPandas MethodStandard Library
Tab-delimitedread_csv(f, sep='\t')csv.reader(f, delimiter='\t')
Pipe-delimitedread_csv(f, sep='|')csv.reader(f, delimiter='|')
Fixed-widthread_fwf(f)Manual slicing
Auto-detectread_csv(f, sep=None, engine='python')Not available
Log filesRegex + DataFrameRegex + csv.writer

Conclusion

Use Pandas for complex text file parsing. It handles delimiters, encodings, missing data, and type inference automatically. The read_fwf() function specifically addresses fixed-width legacy formats. For simple conversions on systems without Pandas, the standard csv module provides lightweight delimiter transformation. For log files and irregular formats, combine regex pattern matching with either approach.