How to Convert Folder Structure to JSON in Python
Creating a JSON representation of directory trees is useful for file explorers, documentation generators, and backup manifests. Python's pathlib module makes this task straightforward.
Basic Recursive Conversion
Use depth-first traversal to build a nested dictionary structure:
import json
from pathlib import Path
def dir_to_dict(path):
"""Convert directory tree to nested dictionary."""
p = Path(path)
node = {"name": p.name, "type": "directory", "children": []}
try:
for item in sorted(p.iterdir()):
if item.is_dir():
node["children"].append(dir_to_dict(item))
else:
node["children"].append({"name": item.name, "type": "file"})
except PermissionError:
node["error"] = "Access Denied"
return node
# Convert and save
tree = dir_to_dict("./my_project")
# printr structure to terminal
print(json.dumps(tree, indent=2))
# save structure to file
with open("structure.json", "w") as f:
json.dump(tree, f, indent=2)
Example Output
{
"name": "my_project",
"type": "directory",
"children": [
{"name": "main.py", "type": "file"},
{
"name": "src",
"type": "directory",
"children": [
{"name": "utils.py", "type": "file"},
{"name": "config.py", "type": "file"}
]
}
]
}
Adding File Metadata
Include size, modification time, and extension for richer output:
import json
from pathlib import Path
from datetime import datetime
def dir_to_dict_detailed(path):
"""Convert directory tree with file metadata."""
p = Path(path)
if p.is_file():
stat = p.stat()
return {
"name": p.name,
"type": "file",
"extension": p.suffix,
"size": stat.st_size,
"modified": datetime.fromtimestamp(stat.st_mtime).isoformat()
}
node = {
"name": p.name,
"type": "directory",
"children": []
}
try:
for item in sorted(p.iterdir()):
node["children"].append(dir_to_dict_detailed(item))
except PermissionError:
node["error"] = "Access Denied"
return node
tree = dir_to_dict_detailed("./my_project")
print(json.dumps(tree, indent=2))
Output with metadata:
{
"name": "my_project",
"type": "directory",
"children": [
{
"name": "main.py",
"type": "file",
"extension": ".py",
"size": 1024,
"modified": "2024-01-15T09:30:22"
}
]
}
Use sorted(p.iterdir()) to ensure consistent ordering across runs. Without sorting, directory order depends on the filesystem and may vary.
Filtering Files and Directories
Exclude certain patterns like hidden files or specific directories:
from pathlib import Path
import json
def dir_to_dict_filtered(path, exclude_patterns=None):
"""Convert directory tree, excluding specified patterns."""
exclude_patterns = exclude_patterns or []
p = Path(path)
def should_exclude(item):
name = item.name
for pattern in exclude_patterns:
if pattern.startswith('.') and name.startswith('.'):
return True
if name == pattern:
return True
if item.match(pattern):
return True
return False
node = {"name": p.name, "type": "directory", "children": []}
try:
for item in sorted(p.iterdir()):
if should_exclude(item):
continue
if item.is_dir():
node["children"].append(dir_to_dict_filtered(item, exclude_patterns))
else:
node["children"].append({"name": item.name, "type": "file"})
except PermissionError:
node["error"] = "Access Denied"
return node
# Exclude hidden files, __pycache__, node_modules
tree = dir_to_dict_filtered(
"./my_project",
exclude_patterns=[".*", "__pycache__", "node_modules", "*.pyc"]
)
print(json.dumps(tree, indent=2))
Limiting Depth
Prevent deep recursion for large directory trees:
from pathlib import Path
import json
def dir_to_dict_limited(path, max_depth=3, current_depth=0):
"""Convert directory tree with depth limit."""
p = Path(path)
node = {"name": p.name, "type": "directory", "children": []}
if current_depth >= max_depth:
node["truncated"] = True
return node
try:
for item in sorted(p.iterdir()):
if item.is_dir():
child = dir_to_dict_limited(item, max_depth, current_depth + 1)
node["children"].append(child)
else:
node["children"].append({"name": item.name, "type": "file"})
except PermissionError:
node["error"] = "Access Denied"
return node
tree = dir_to_dict_limited("./", max_depth=2)
print(json.dumps(tree, indent=2))
Deep directory trees can cause stack overflow with pure recursion. For extremely deep structures, consider iterative approaches with explicit stack management.
Generating File Paths List
Sometimes a flat list of paths is more useful than nested structure:
from pathlib import Path
import json
def get_all_paths(path, relative=True):
"""Get flat list of all file paths."""
p = Path(path)
base = p if relative else Path()
paths = []
for item in p.rglob("*"):
if item.is_file():
path_str = str(item.relative_to(base)) if relative else str(item)
paths.append(path_str)
return sorted(paths)
files = get_all_paths("./my_project")
print(json.dumps(files, indent=2))
Output:
[
"main.py",
"src/config.py",
"src/utils.py",
"tests/test_main.py"
]
Complete Solution with Options
Combine all features into a flexible function:
from pathlib import Path
from datetime import datetime
import json
def directory_to_json(
path,
include_metadata=False,
exclude_patterns=None,
max_depth=None,
output_file=None
):
"""Convert directory to JSON with configurable options."""
exclude_patterns = exclude_patterns or []
def should_exclude(item):
return any(item.match(p) for p in exclude_patterns)
def build_tree(p, depth=0):
if max_depth is not None and depth > max_depth:
return {"name": p.name, "type": "directory", "truncated": True}
if p.is_file():
node = {"name": p.name, "type": "file"}
if include_metadata:
stat = p.stat()
node["size"] = stat.st_size
node["modified"] = datetime.fromtimestamp(stat.st_mtime).isoformat()
return node
node = {"name": p.name, "type": "directory", "children": []}
try:
for item in sorted(p.iterdir()):
if not should_exclude(item):
node["children"].append(build_tree(item, depth + 1))
except PermissionError:
node["error"] = "Access Denied"
return node
tree = build_tree(Path(path))
if output_file:
with open(output_file, "w") as f:
json.dump(tree, f, indent=2)
return tree
# Usage
tree = directory_to_json(
"./my_project",
include_metadata=True,
exclude_patterns=["__pycache__", "*.pyc", ".*"],
max_depth=3,
output_file="structure.json"
)
print(json.dumps(tree, indent=2))
Summary
| Feature | Implementation |
|---|---|
| Basic tree | Recursive iterdir() |
| Metadata | Add stat() info |
| Filtering | Pattern matching before recursion |
| Depth limit | Track and check depth parameter |
| Error handling | Catch PermissionError |
Use pathlib for cross-platform compatibility and json.dump() with indent=2 for readable output. Always handle permission errors to prevent crashes when traversing system directories.