Skip to main content

How to Format Phone Numbers with Regex in Python

Standardizing phone numbers from various input formats into a consistent format like (XXX) XXX-XXXX is a common data cleaning task. Regular expressions make this straightforward.

Basic Phone Formatting

Strip non-digits and rearrange using capture groups:

import re

def format_phone(raw):
"""Format 10-digit phone number as (XXX) XXX-XXXX."""
# Remove all non-digit characters
digits = re.sub(r'\D', '', raw)

# Format using capture groups
pattern = r'(\d{3})(\d{3})(\d{4})'
return re.sub(pattern, r'(\1) \2-\3', digits)

# Various input formats
print(format_phone("1234567890")) # (123) 456-7890
print(format_phone("123-456-7890")) # (123) 456-7890
print(format_phone("(123) 456 7890")) # (123) 456-7890
print(format_phone("123.456.7890")) # (123) 456-7890

Handling Country Codes

Handle optional +1 or 1 prefix for US numbers:

import re

def format_phone_with_country(raw):
"""Format phone number, handling optional +1 country code."""
digits = re.sub(r'\D', '', raw)

# Remove leading 1 if 11 digits
if len(digits) == 11 and digits.startswith('1'):
digits = digits[1:]

if len(digits) != 10:
return None # Invalid length

return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"

print(format_phone_with_country("+1 234 567 8901")) # (234) 567-8901
print(format_phone_with_country("12345678901")) # (234) 567-8901
print(format_phone_with_country("234-567-8901")) # (234) 567-8901

Using regex with non-capturing groups:

import re

def format_phone_regex(raw):
"""Format using regex with optional country code."""
digits = re.sub(r'\D', '', raw)

# Match optional 1, then 10 digits
pattern = r'^1?(\d{3})(\d{3})(\d{4})$'
match = re.match(pattern, digits)

if match:
return f"({match.group(1)}) {match.group(2)}-{match.group(3)}"
return None

Formatting Multiple Numbers in Text

Find and format all phone numbers within a larger text:

import re

def format_phones_in_text(text):
"""Find and format all phone numbers in text."""

def replacer(match):
digits = re.sub(r'\D', '', match.group())
if len(digits) == 10:
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
return match.group()

# Pattern matches common phone formats
pattern = r'\+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
return re.sub(pattern, replacer, text)

text = "Call 1112223333 or 444-555-6666. Fax: (777) 888 9999"
print(format_phones_in_text(text))
# Call(111) 222-3333 or(444) 555-6666. Fax:(777) 888-9999
tip

The pattern \+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4} handles most US phone formats including:

  • 1234567890
  • 123-456-7890
  • (123) 456-7890
  • +1 123 456 7890

Validation Before Formatting

Ensure numbers are valid before formatting:

import re

def validate_and_format(raw):
"""Validate and format phone number."""
digits = re.sub(r'\D', '', raw)

# Handle country code
if len(digits) == 11 and digits[0] == '1':
digits = digits[1:]

# Validate length
if len(digits) != 10:
raise ValueError(f"Invalid phone number: expected 10 digits, got {len(digits)}")

# Validate area code (can't start with 0 or 1)
if digits[0] in '01':
raise ValueError(f"Invalid area code: {digits[:3]}")

return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"

print(validate_and_format("234-567-8901")) # (234) 567-8901
print(validate_and_format("034-567-8901")) # ValueError: Invalid area code: 034

Multiple Output Formats

Support different formatting styles:

import re

def format_phone_style(raw, style="standard"):
"""Format phone number in various styles."""
digits = re.sub(r'\D', '', raw)

if len(digits) == 11 and digits[0] == '1':
digits = digits[1:]

if len(digits) != 10:
return None

formats = {
"standard": f"({digits[:3]}) {digits[3:6]}-{digits[6:]}",
"dashes": f"{digits[:3]}-{digits[3:6]}-{digits[6:]}",
"dots": f"{digits[:3]}.{digits[3:6]}.{digits[6:]}",
"plain": digits,
"e164": f"+1{digits}",
"international": f"+1 {digits[:3]} {digits[3:6]} {digits[6:]}"
}

return formats.get(style, formats["standard"])

phone = "123-456-7890"
print(format_phone_style(phone, "standard")) # (123) 456-7890
print(format_phone_style(phone, "dashes")) # 123-456-7890
print(format_phone_style(phone, "e164")) # +11234567890
print(format_phone_style(phone, "international")) # +1 123 456 7890

Using the phonenumbers Library

For production use with international numbers, use the phonenumbers library:

import phonenumbers

def format_international(raw, region="US"):
"""Format phone number using phonenumbers library."""
try:
parsed = phonenumbers.parse(raw, region)

if not phonenumbers.is_valid_number(parsed):
return None

# Various format options
return {
"national": phonenumbers.format_number(
parsed, phonenumbers.PhoneNumberFormat.NATIONAL
),
"international": phonenumbers.format_number(
parsed, phonenumbers.PhoneNumberFormat.INTERNATIONAL
),
"e164": phonenumbers.format_number(
parsed, phonenumbers.PhoneNumberFormat.E164
)
}
except phonenumbers.NumberParseException:
return None

print(format_international("234-567-8901"))
# {'national': '(234) 567-8901', 'international': '+1 234-567-8901', 'e164': '+12345678901'}
warning

Regex-based formatting works well for US numbers but struggles with international formats. The phonenumbers library handles 200+ countries with proper validation rules.

Regex Quick Reference

TaskPatternExample
Remove non-digits\Dre.sub(r'\D', '', s)
Match 3 digits\d{3}Matches 123
Capture group(\d{3})Access with \1
Non-capturing group(?:\d{3})Match but don't save
Optional character\+?+ or nothing

Summary

Use re.sub(r'\D', '', raw) to strip non-digits, then apply capture groups to rearrange into your desired format.

  • For simple US number formatting, regex works well.
  • For production applications handling international numbers or requiring validation, use the phonenumbers library.