Skip to main content

How to Convert Folder Structure to JSON in Python

Creating a JSON representation of directory trees is useful for file explorers, documentation generators, and backup manifests. Python's pathlib module makes this task straightforward.

Basic Recursive Conversion

Use depth-first traversal to build a nested dictionary structure:

import json
from pathlib import Path

def dir_to_dict(path):
"""Convert directory tree to nested dictionary."""
p = Path(path)
node = {"name": p.name, "type": "directory", "children": []}

try:
for item in sorted(p.iterdir()):
if item.is_dir():
node["children"].append(dir_to_dict(item))
else:
node["children"].append({"name": item.name, "type": "file"})
except PermissionError:
node["error"] = "Access Denied"

return node

# Convert and save
tree = dir_to_dict("./my_project")

# printr structure to terminal
print(json.dumps(tree, indent=2))

# save structure to file
with open("structure.json", "w") as f:
json.dump(tree, f, indent=2)

Example Output

{
"name": "my_project",
"type": "directory",
"children": [
{"name": "main.py", "type": "file"},
{
"name": "src",
"type": "directory",
"children": [
{"name": "utils.py", "type": "file"},
{"name": "config.py", "type": "file"}
]
}
]
}

Adding File Metadata

Include size, modification time, and extension for richer output:

import json
from pathlib import Path
from datetime import datetime

def dir_to_dict_detailed(path):
"""Convert directory tree with file metadata."""
p = Path(path)

if p.is_file():
stat = p.stat()
return {
"name": p.name,
"type": "file",
"extension": p.suffix,
"size": stat.st_size,
"modified": datetime.fromtimestamp(stat.st_mtime).isoformat()
}

node = {
"name": p.name,
"type": "directory",
"children": []
}

try:
for item in sorted(p.iterdir()):
node["children"].append(dir_to_dict_detailed(item))
except PermissionError:
node["error"] = "Access Denied"

return node

tree = dir_to_dict_detailed("./my_project")
print(json.dumps(tree, indent=2))

Output with metadata:

{
"name": "my_project",
"type": "directory",
"children": [
{
"name": "main.py",
"type": "file",
"extension": ".py",
"size": 1024,
"modified": "2024-01-15T09:30:22"
}
]
}
tip

Use sorted(p.iterdir()) to ensure consistent ordering across runs. Without sorting, directory order depends on the filesystem and may vary.

Filtering Files and Directories

Exclude certain patterns like hidden files or specific directories:

from pathlib import Path
import json

def dir_to_dict_filtered(path, exclude_patterns=None):
"""Convert directory tree, excluding specified patterns."""
exclude_patterns = exclude_patterns or []
p = Path(path)

def should_exclude(item):
name = item.name
for pattern in exclude_patterns:
if pattern.startswith('.') and name.startswith('.'):
return True
if name == pattern:
return True
if item.match(pattern):
return True
return False

node = {"name": p.name, "type": "directory", "children": []}

try:
for item in sorted(p.iterdir()):
if should_exclude(item):
continue

if item.is_dir():
node["children"].append(dir_to_dict_filtered(item, exclude_patterns))
else:
node["children"].append({"name": item.name, "type": "file"})
except PermissionError:
node["error"] = "Access Denied"

return node

# Exclude hidden files, __pycache__, node_modules
tree = dir_to_dict_filtered(
"./my_project",
exclude_patterns=[".*", "__pycache__", "node_modules", "*.pyc"]
)
print(json.dumps(tree, indent=2))

Limiting Depth

Prevent deep recursion for large directory trees:

from pathlib import Path
import json

def dir_to_dict_limited(path, max_depth=3, current_depth=0):
"""Convert directory tree with depth limit."""
p = Path(path)
node = {"name": p.name, "type": "directory", "children": []}

if current_depth >= max_depth:
node["truncated"] = True
return node

try:
for item in sorted(p.iterdir()):
if item.is_dir():
child = dir_to_dict_limited(item, max_depth, current_depth + 1)
node["children"].append(child)
else:
node["children"].append({"name": item.name, "type": "file"})
except PermissionError:
node["error"] = "Access Denied"

return node

tree = dir_to_dict_limited("./", max_depth=2)
print(json.dumps(tree, indent=2))
warning

Deep directory trees can cause stack overflow with pure recursion. For extremely deep structures, consider iterative approaches with explicit stack management.

Generating File Paths List

Sometimes a flat list of paths is more useful than nested structure:

from pathlib import Path
import json

def get_all_paths(path, relative=True):
"""Get flat list of all file paths."""
p = Path(path)
base = p if relative else Path()

paths = []

for item in p.rglob("*"):
if item.is_file():
path_str = str(item.relative_to(base)) if relative else str(item)
paths.append(path_str)

return sorted(paths)

files = get_all_paths("./my_project")
print(json.dumps(files, indent=2))

Output:

[
"main.py",
"src/config.py",
"src/utils.py",
"tests/test_main.py"
]

Complete Solution with Options

Combine all features into a flexible function:

from pathlib import Path
from datetime import datetime
import json

def directory_to_json(
path,
include_metadata=False,
exclude_patterns=None,
max_depth=None,
output_file=None
):
"""Convert directory to JSON with configurable options."""
exclude_patterns = exclude_patterns or []

def should_exclude(item):
return any(item.match(p) for p in exclude_patterns)

def build_tree(p, depth=0):
if max_depth is not None and depth > max_depth:
return {"name": p.name, "type": "directory", "truncated": True}

if p.is_file():
node = {"name": p.name, "type": "file"}
if include_metadata:
stat = p.stat()
node["size"] = stat.st_size
node["modified"] = datetime.fromtimestamp(stat.st_mtime).isoformat()
return node

node = {"name": p.name, "type": "directory", "children": []}

try:
for item in sorted(p.iterdir()):
if not should_exclude(item):
node["children"].append(build_tree(item, depth + 1))
except PermissionError:
node["error"] = "Access Denied"

return node

tree = build_tree(Path(path))

if output_file:
with open(output_file, "w") as f:
json.dump(tree, f, indent=2)

return tree

# Usage
tree = directory_to_json(
"./my_project",
include_metadata=True,
exclude_patterns=["__pycache__", "*.pyc", ".*"],
max_depth=3,
output_file="structure.json"
)
print(json.dumps(tree, indent=2))

Summary

FeatureImplementation
Basic treeRecursive iterdir()
MetadataAdd stat() info
FilteringPattern matching before recursion
Depth limitTrack and check depth parameter
Error handlingCatch PermissionError

Use pathlib for cross-platform compatibility and json.dump() with indent=2 for readable output. Always handle permission errors to prevent crashes when traversing system directories.