How to Format Phone Numbers with Regex in Python
Standardizing phone numbers from various input formats into a consistent format like (XXX) XXX-XXXX is a common data cleaning task. Regular expressions make this straightforward.
Basic Phone Formatting
Strip non-digits and rearrange using capture groups:
import re
def format_phone(raw):
"""Format 10-digit phone number as (XXX) XXX-XXXX."""
# Remove all non-digit characters
digits = re.sub(r'\D', '', raw)
# Format using capture groups
pattern = r'(\d{3})(\d{3})(\d{4})'
return re.sub(pattern, r'(\1) \2-\3', digits)
# Various input formats
print(format_phone("1234567890")) # (123) 456-7890
print(format_phone("123-456-7890")) # (123) 456-7890
print(format_phone("(123) 456 7890")) # (123) 456-7890
print(format_phone("123.456.7890")) # (123) 456-7890
Handling Country Codes
Handle optional +1 or 1 prefix for US numbers:
import re
def format_phone_with_country(raw):
"""Format phone number, handling optional +1 country code."""
digits = re.sub(r'\D', '', raw)
# Remove leading 1 if 11 digits
if len(digits) == 11 and digits.startswith('1'):
digits = digits[1:]
if len(digits) != 10:
return None # Invalid length
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
print(format_phone_with_country("+1 234 567 8901")) # (234) 567-8901
print(format_phone_with_country("12345678901")) # (234) 567-8901
print(format_phone_with_country("234-567-8901")) # (234) 567-8901
Using regex with non-capturing groups:
import re
def format_phone_regex(raw):
"""Format using regex with optional country code."""
digits = re.sub(r'\D', '', raw)
# Match optional 1, then 10 digits
pattern = r'^1?(\d{3})(\d{3})(\d{4})$'
match = re.match(pattern, digits)
if match:
return f"({match.group(1)}) {match.group(2)}-{match.group(3)}"
return None
Formatting Multiple Numbers in Text
Find and format all phone numbers within a larger text:
import re
def format_phones_in_text(text):
"""Find and format all phone numbers in text."""
def replacer(match):
digits = re.sub(r'\D', '', match.group())
if len(digits) == 10:
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
return match.group()
# Pattern matches common phone formats
pattern = r'\+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
return re.sub(pattern, replacer, text)
text = "Call 1112223333 or 444-555-6666. Fax: (777) 888 9999"
print(format_phones_in_text(text))
# Call(111) 222-3333 or(444) 555-6666. Fax:(777) 888-9999
The pattern \+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4} handles most US phone formats including:
1234567890123-456-7890(123) 456-7890+1 123 456 7890
Validation Before Formatting
Ensure numbers are valid before formatting:
import re
def validate_and_format(raw):
"""Validate and format phone number."""
digits = re.sub(r'\D', '', raw)
# Handle country code
if len(digits) == 11 and digits[0] == '1':
digits = digits[1:]
# Validate length
if len(digits) != 10:
raise ValueError(f"Invalid phone number: expected 10 digits, got {len(digits)}")
# Validate area code (can't start with 0 or 1)
if digits[0] in '01':
raise ValueError(f"Invalid area code: {digits[:3]}")
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
print(validate_and_format("234-567-8901")) # (234) 567-8901
print(validate_and_format("034-567-8901")) # ValueError: Invalid area code: 034
Multiple Output Formats
Support different formatting styles:
import re
def format_phone_style(raw, style="standard"):
"""Format phone number in various styles."""
digits = re.sub(r'\D', '', raw)
if len(digits) == 11 and digits[0] == '1':
digits = digits[1:]
if len(digits) != 10:
return None
formats = {
"standard": f"({digits[:3]}) {digits[3:6]}-{digits[6:]}",
"dashes": f"{digits[:3]}-{digits[3:6]}-{digits[6:]}",
"dots": f"{digits[:3]}.{digits[3:6]}.{digits[6:]}",
"plain": digits,
"e164": f"+1{digits}",
"international": f"+1 {digits[:3]} {digits[3:6]} {digits[6:]}"
}
return formats.get(style, formats["standard"])
phone = "123-456-7890"
print(format_phone_style(phone, "standard")) # (123) 456-7890
print(format_phone_style(phone, "dashes")) # 123-456-7890
print(format_phone_style(phone, "e164")) # +11234567890
print(format_phone_style(phone, "international")) # +1 123 456 7890
Using the phonenumbers Library
For production use with international numbers, use the phonenumbers library:
import phonenumbers
def format_international(raw, region="US"):
"""Format phone number using phonenumbers library."""
try:
parsed = phonenumbers.parse(raw, region)
if not phonenumbers.is_valid_number(parsed):
return None
# Various format options
return {
"national": phonenumbers.format_number(
parsed, phonenumbers.PhoneNumberFormat.NATIONAL
),
"international": phonenumbers.format_number(
parsed, phonenumbers.PhoneNumberFormat.INTERNATIONAL
),
"e164": phonenumbers.format_number(
parsed, phonenumbers.PhoneNumberFormat.E164
)
}
except phonenumbers.NumberParseException:
return None
print(format_international("234-567-8901"))
# {'national': '(234) 567-8901', 'international': '+1 234-567-8901', 'e164': '+12345678901'}
Regex-based formatting works well for US numbers but struggles with international formats. The phonenumbers library handles 200+ countries with proper validation rules.
Regex Quick Reference
| Task | Pattern | Example |
|---|---|---|
| Remove non-digits | \D | re.sub(r'\D', '', s) |
| Match 3 digits | \d{3} | Matches 123 |
| Capture group | (\d{3}) | Access with \1 |
| Non-capturing group | (?:\d{3}) | Match but don't save |
| Optional character | \+? | + or nothing |
Summary
Use re.sub(r'\D', '', raw) to strip non-digits, then apply capture groups to rearrange into your desired format.
- For simple US number formatting, regex works well.
- For production applications handling international numbers or requiring validation, use the
phonenumberslibrary.