ta-tocgen
readme
md
# ta-tocgen
指定したディレクトリ配下を走査し、Markdown ドキュメント群の目次を生成するツール。
- 出力モードは `tree` または `table`
- `table` の出力形式は複数対応。詳細は [table 出力仕様](docs/specs/output-table.md) を参照
- ディレクトリ単位 / ファイル単位で Metadata を持たせ、表示名や並び順、追加列に反映できる
## 最小実行例
```bash
python ta-tocgen.py docs
python ta-tocgen.py docs --out-mode table --table-format csv
python ta-tocgen.py docs --out-mode table --table-format md
python ta-tocgen.py docs --out-mode table --table-format xlsx --table-columns sequence,dirnames-title,filename-title,description
```
## 入出力イメージ
### tree モード
```md
# ${title}
## Files and Directories
- [aaa](aaa/)/
- [aaa-1](aaa/aaa-1/)/
- [aaa-11](aaa/aaa-1/aaa-11.md)
```
### table モード
```csv
${title}
directories , , , , filename
aaa , aaa-1 , , , aaa-11.md
, , , , aaa-12.md
, aaa-2 , , , aaa-21.md
bbb , bbb-1 , bbb-11 , , bbb-111.md
, bbb-2 , , , bbb-21.md
, , , , bbb-22.md
```
## Metadata 記述例
### ディレクトリ Metadata
`_toc-metadata.md`
```md
- title: Getting Started
- description: Initial Setup
- order: 10
```
### ファイル Metadata
```md
<!-- toc-metadata
- description: CLI の基本操作
- order: 20
- category: reference
-->
# Command Reference
```
## 詳細仕様
- [CLI 仕様](docs/specs/cli.md)
- [Metadata 仕様](docs/specs/metadata.md)
- [tree 出力仕様](docs/specs/output-tree.md)
- [table 出力仕様](docs/specs/output-table.md)
- [Excel 罫線仕様](docs/specs/table-xlsx-ruled-line-format.md)
## メモ
- このファイルは README の簡約版ドラフト
scripts
md
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import csv
import fnmatch
import html
import os
import re
import sys
import zipfile
from dataclasses import dataclass, field
from pathlib import Path
from xml.sax.saxutils import escape as xml_escape
METADATA_FILENAME = "_toc-metadata.md"
DEFAULT_TREE_FORMAT = '${indent}- [${title}](${fullpath})${dir-separator}${description[prefix=" : "]}'
DEFAULT_TABLE_COLUMNS = "sequence,dirnames,filename"
LINE_METADATA_RE = re.compile(r"^\s*-\s*(?P<key>[^:]+?)\s*:\s*(?P<value>.*?)\s*$")
FILE_METADATA_BLOCK_RE = re.compile(r"<!--\s*toc-metadata\s*\n(?P<body>.*?)\n-->", re.DOTALL)
H1_RE = re.compile(r"^# (.+?)\s*$")
H2_RE = re.compile(r"^## (.+?)\s*$")
PLACEHOLDER_RE = re.compile(r"\$\{([^}]+)\}")
ATTR_VALUE_RE = re.compile(r'([A-Za-z][A-Za-z0-9_-]*)=(?:"([^"]*)"|([^,\]]*))')
class TocError(Exception):
pass
@dataclass
class Metadata:
values: dict[str, str] = field(default_factory=dict)
order: int | None = None
hidden: bool = False
def get(self, key: str) -> str:
if key == "order":
return "" if self.order is None else str(self.order)
if key == "hidden":
return "true" if self.hidden else "false"
return self.values.get(key, "")
@dataclass
class Node:
path: Path
rel_path: str
is_dir: bool
depth: int
parent: Node | None = None
dir_metadata: Metadata = field(default_factory=Metadata)
file_metadata: Metadata = field(default_factory=Metadata)
h1_title: str = ""
h2_values: dict[str, str] = field(default_factory=dict)
children: list["Node"] = field(default_factory=list)
@property
def basename(self) -> str:
return "/" if not self.rel_path else self.rel_path.rsplit("/", 1)[-1]
@property
def dirname(self) -> str:
if not self.rel_path or "/" not in self.rel_path:
return ""
return self.rel_path.rsplit("/", 1)[0]
@property
def fullpath(self) -> str:
return self.rel_path
@property
def active_metadata(self) -> Metadata:
return self.dir_metadata if self.is_dir else self.file_metadata
@property
def title(self) -> str:
if self.is_dir:
return self.dir_metadata.values.get("title", self.basename)
return self.h1_title or self.basename
@property
def description(self) -> str:
return self.active_metadata.values.get("description", "")
def key_value(self, key_name: str) -> str:
return self.active_metadata.values.get(key_name, "")
@dataclass
class ColumnSpec:
key: str
attrs: dict[str, str]
@property
def label(self) -> str:
value = self.attrs.get("label")
if value is not None:
return value
if self.key == "spacer":
return ""
if self.key.startswith("key-"):
return self.key[4:]
if self.key.startswith("h2-"):
return self.key[3:]
return self.key
@property
def width(self) -> float | None:
value = self.attrs.get("width")
if value is None or value == "":
return None
try:
return float(value)
except ValueError as exc:
raise TocError(f"invalid width for column '{self.key}': {value}") from exc
@dataclass
class Row:
node: Node
sequence: int
sequence_separated: str
@dataclass
class FlatLayout:
header: list[str]
rows: list[list[str]]
raw_dir_values: list[list[list[str] | None]]
column_specs: list[ColumnSpec]
expanded_specs: list[ColumnSpec]
row_defs: list[Row]
expanded_column_kinds: list[str]
def normalize_slashes(value: str) -> str:
return value.replace("\\", "/")
def normalize_rel_path(path: Path, base_dir: Path) -> str:
rel = normalize_slashes(str(path.relative_to(base_dir)))
return "" if rel == "." else rel
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("base_dir")
parser.add_argument("--out-mode", default="tree", choices=["tree", "table"])
parser.add_argument("--node-type", default="file-and-directory", choices=["file-only", "directory-only", "file-and-directory"])
parser.add_argument("--out-file-path")
parser.add_argument("--table-format", default="csv", choices=["csv", "md", "xlsx", "html"])
parser.add_argument("--table-columns", default=DEFAULT_TABLE_COLUMNS)
parser.add_argument("--tree-node-format", default=DEFAULT_TREE_FORMAT)
parser.add_argument("--with-root-node", action="store_true")
parser.add_argument("--max-dir-columns", type=int)
parser.add_argument("--exclude-path", nargs="+", default=[])
parser.add_argument("--cell-start", default="B2")
parser.add_argument("--margin-column-width", type=float, default=2)
parser.add_argument("--header-color", default="#F2F2F2")
parser.add_argument("--with-title", nargs="?", const="")
return parser.parse_args()
def validate_args(args: argparse.Namespace) -> None:
if args.out_mode == "tree":
if args.table_columns != DEFAULT_TABLE_COLUMNS:
raise TocError("--table-columns is available only in table mode")
if args.table_format != "csv":
raise TocError("--table-format is available only in table mode")
if args.max_dir_columns is not None:
raise TocError("--max-dir-columns is available only in table mode")
if args.cell_start != "B2":
raise TocError("--cell-start is available only in table mode")
if args.margin_column_width != 2:
raise TocError("--margin-column-width is available only in table mode")
if args.header_color != "#F2F2F2":
raise TocError("--header-color is available only in table mode")
else:
if args.with_root_node:
raise TocError("--with-root-node is available only in tree mode")
if args.max_dir_columns is not None and args.max_dir_columns <= 0:
raise TocError("--max-dir-columns must be positive")
if not re.fullmatch(r"#[0-9A-Fa-f]{6}", args.header_color):
raise TocError("--header-color must be #RRGGBB")
def parse_metadata_lines(text: str, source: str, is_dir: bool) -> Metadata:
metadata = Metadata()
keys_seen: dict[str, str] = {}
reserved_lower_seen: dict[str, str] = {}
for line in text.splitlines():
match = LINE_METADATA_RE.match(line)
if not match:
continue
key = match.group("key").strip()
value = match.group("value").strip()
if key in keys_seen:
raise TocError(f"duplicate metadata key '{key}' in {source}")
lowered = key.lower()
if lowered in {"title", "description"}:
existing = reserved_lower_seen.get(lowered)
if existing is not None and existing != key:
raise TocError(f"invalid key case '{key}' in {source}")
reserved_lower_seen[lowered] = key
keys_seen[key] = value
if key == "title":
if not is_dir:
raise TocError(f"file metadata must not contain 'title' in {source}")
metadata.values[key] = value
elif key == "description":
metadata.values[key] = value
elif key == "order":
try:
metadata.order = int(value)
except ValueError as exc:
raise TocError(f"invalid order value in {source}: {value}") from exc
elif key == "hidden":
if value not in {"true", "false"}:
raise TocError(f"invalid hidden value in {source}: {value}")
metadata.hidden = value == "true"
else:
if ":" in key or "," in key:
raise TocError(f"invalid metadata key '{key}' in {source}")
if lowered in {"title", "description", "order", "hidden"} and key != lowered:
raise TocError(f"invalid key case '{key}' in {source}")
metadata.values[key] = value
return metadata
def read_directory_metadata(dir_path: Path) -> Metadata:
entries = list(dir_path.iterdir())
matches = [entry.name for entry in entries if entry.name.lower() == METADATA_FILENAME.lower()]
if len(matches) > 1:
raise TocError(f"multiple metadata files found under {dir_path}")
if matches and matches[0] != METADATA_FILENAME:
raise TocError(f"invalid metadata filename case under {dir_path}: {matches[0]}")
metadata_path = dir_path / METADATA_FILENAME
if not metadata_path.exists():
return Metadata()
return parse_metadata_lines(metadata_path.read_text(encoding="utf-8"), str(metadata_path), is_dir=True)
def strip_toc_metadata_block(text: str) -> str:
return FILE_METADATA_BLOCK_RE.sub("", text, count=1)
def parse_h1_title(text: str) -> str:
for line in text.splitlines():
stripped = line.strip()
if not stripped:
continue
match = H1_RE.match(stripped)
return match.group(1).strip() if match else ""
return ""
def parse_h2_values(text: str, requested_h2_keys: set[str]) -> dict[str, str]:
if not requested_h2_keys:
return {}
values: dict[str, str] = {}
current_key: str | None = None
waiting_for_body = False
for line in text.splitlines():
stripped = line.rstrip("\n")
h2_match = H2_RE.match(stripped.strip())
if h2_match:
key = h2_match.group(1).strip()
current_key = key if key in requested_h2_keys and key not in values else None
waiting_for_body = current_key is not None
continue
if re.match(r"^#\s+", stripped.strip()):
current_key = None
waiting_for_body = False
continue
if current_key is None or not waiting_for_body:
continue
if stripped.strip():
values[current_key] = stripped.strip()
waiting_for_body = False
return values
def read_file_metadata(file_path: Path, requested_h2_keys: set[str]) -> tuple[Metadata, str, dict[str, str]]:
if file_path.suffix.lower() != ".md":
return Metadata(), "", {}
text = file_path.read_text(encoding="utf-8")
blocks = list(FILE_METADATA_BLOCK_RE.finditer(text))
if len(blocks) > 1:
raise TocError(f"multiple toc-metadata blocks found in {file_path}")
metadata = Metadata()
if blocks:
metadata = parse_metadata_lines(blocks[0].group("body"), str(file_path), is_dir=False)
body = strip_toc_metadata_block(text)
return metadata, parse_h1_title(body), parse_h2_values(body, requested_h2_keys)
def parse_attributes(attr_text: str, source: str) -> dict[str, str]:
attrs: dict[str, str] = {}
pos = 0
text = attr_text.strip()
while pos < len(text):
while pos < len(text) and text[pos] in " ,":
pos += 1
if pos >= len(text):
break
match = ATTR_VALUE_RE.match(text, pos)
if not match:
raise TocError(f"invalid attribute syntax in {source}: {attr_text}")
key = match.group(1)
value = match.group(2) if match.group(2) is not None else (match.group(3) or "").strip()
attrs[key] = value
pos = match.end()
while pos < len(text) and text[pos] == " ":
pos += 1
if pos < len(text):
if text[pos] != ",":
raise TocError(f"invalid attribute separator in {source}: {attr_text}")
pos += 1
return attrs
def parse_column_spec(part: str) -> ColumnSpec:
part = part.strip()
if not part:
raise TocError("empty column in --table-columns")
if "[" not in part:
key = part
attrs: dict[str, str] = {}
else:
if not part.endswith("]"):
raise TocError(f"invalid column syntax: {part}")
key, attr_text = part.split("[", 1)
attrs = parse_attributes(attr_text[:-1], f"column '{part}'")
key = key.strip()
validate_key(key, "--table-columns", allow_dirnames=True, allow_spacer=True, allow_link_keys=True)
for attr_key in attrs:
if attr_key not in {"label", "width", "prefix", "suffix"}:
raise TocError(f"unsupported column attribute '{attr_key}' in {part}")
return ColumnSpec(key=key, attrs=attrs)
def split_top_level_commas(value: str, source: str) -> list[str]:
parts: list[str] = []
start = 0
depth = 0
for index, char in enumerate(value):
if char == "[":
depth += 1
elif char == "]":
depth -= 1
if depth < 0:
raise TocError(f"unbalanced brackets in {source}: {value}")
elif char == "," and depth == 0:
parts.append(value[start:index])
start = index + 1
if depth != 0:
raise TocError(f"unbalanced brackets in {source}: {value}")
parts.append(value[start:])
return parts
def parse_table_columns(value: str) -> list[ColumnSpec]:
return [parse_column_spec(part) for part in split_top_level_commas(value, "--table-columns")]
def parse_placeholder_expr(expr: str, source: str) -> tuple[str, dict[str, str]]:
if "[" not in expr:
key = expr.strip()
attrs: dict[str, str] = {}
else:
if not expr.endswith("]"):
raise TocError(f"invalid placeholder syntax in {source}: {expr}")
key, attr_text = expr.split("[", 1)
attrs = parse_attributes(attr_text[:-1], source)
key = key.strip()
validate_key(key, source, allow_dirnames=False, allow_spacer=False, allow_link_keys=False)
for attr_key in attrs:
if attr_key not in {"prefix", "suffix"}:
raise TocError(f"unsupported placeholder attribute '{attr_key}' in {source}")
return key, attrs
def validate_key(key: str, source: str, allow_dirnames: bool, allow_spacer: bool, allow_link_keys: bool) -> None:
allowed = {
"sequence",
"sequence-separated",
"fullpath",
"fullpath-from-toc",
"dirname",
"filename",
"filename-title",
"depth",
"description",
"title",
"order",
"dir-separator",
"indent",
}
if allow_dirnames:
allowed.update({"dirnames", "dirnames-title"})
if allow_spacer:
allowed.add("spacer")
if allow_link_keys:
allowed.update({"filename-with-link", "filename-title-with-link"})
if key in allowed:
return
if key.startswith("key-") and len(key) > 4:
return
if key.startswith("h2-") and len(key) > 3:
return
raise TocError(f"unsupported key in {source}: {key}")
def collect_requested_h2_keys(table_specs: list[ColumnSpec], tree_format: str) -> set[str]:
keys: set[str] = set()
for spec in table_specs:
if spec.key.startswith("h2-"):
keys.add(spec.key[3:])
for match in PLACEHOLDER_RE.finditer(tree_format):
key, _attrs = parse_placeholder_expr(match.group(1), "--tree-node-format")
if key.startswith("h2-"):
keys.add(key[3:])
return keys
def should_exclude(rel_path: str, patterns: list[str]) -> bool:
rel_path = normalize_slashes(rel_path)
for pattern in patterns:
normalized = normalize_slashes(pattern)
trimmed = normalized.rstrip("/")
if trimmed:
if rel_path == trimmed:
return True
if rel_path.startswith(trimmed + "/"):
return True
if fnmatch.fnmatch(rel_path, normalized):
return True
if fnmatch.fnmatch(Path(rel_path).name, normalized):
return True
return False
def should_skip_builtin_entry(name: str) -> bool:
lowered = name.lower()
return lowered == METADATA_FILENAME.lower() or lowered == ".ds_store" or name.startswith("~$")
def build_tree(base_dir: Path, requested_h2_keys: set[str], exclude_patterns: list[str]) -> list[Node]:
def visit_dir(dir_path: Path, parent: Node | None) -> Node | None:
rel_path = normalize_rel_path(dir_path, base_dir)
if rel_path and should_exclude(rel_path, exclude_patterns):
return None
dir_metadata = read_directory_metadata(dir_path)
if dir_metadata.hidden:
return None
node = Node(
path=dir_path,
rel_path=rel_path,
is_dir=True,
depth=0 if not rel_path else rel_path.count("/"),
parent=parent,
dir_metadata=dir_metadata,
)
for entry in sorted(dir_path.iterdir(), key=lambda item: item.name.lower()):
if should_skip_builtin_entry(entry.name):
if entry.name.lower() == METADATA_FILENAME.lower() and entry.name != METADATA_FILENAME:
raise TocError(f"invalid metadata filename case under {dir_path}: {entry.name}")
continue
child_rel_path = normalize_rel_path(entry, base_dir)
if should_exclude(child_rel_path, exclude_patterns):
continue
if entry.is_dir():
child = visit_dir(entry, node)
elif entry.is_file():
child = visit_file(entry, node)
else:
child = None
if child is not None:
node.children.append(child)
return node
def visit_file(file_path: Path, parent: Node) -> Node | None:
rel_path = normalize_rel_path(file_path, base_dir)
file_metadata, h1_title, h2_values = read_file_metadata(file_path, requested_h2_keys)
if file_metadata.hidden:
return None
return Node(
path=file_path,
rel_path=rel_path,
is_dir=False,
depth=rel_path.count("/"),
parent=parent,
file_metadata=file_metadata,
h1_title=h1_title,
h2_values=h2_values,
)
roots: list[Node] = []
for entry in sorted(base_dir.iterdir(), key=lambda item: item.name.lower()):
if should_skip_builtin_entry(entry.name):
if entry.name.lower() == METADATA_FILENAME.lower() and entry.name != METADATA_FILENAME:
raise TocError(f"invalid metadata filename case under {base_dir}: {entry.name}")
continue
rel_path = normalize_rel_path(entry, base_dir)
if should_exclude(rel_path, exclude_patterns):
continue
if entry.is_dir():
child = visit_dir(entry, None)
elif entry.is_file():
child = visit_file(entry, None)
else:
child = None
if child is not None:
roots.append(child)
return roots
def sort_nodes(nodes: list[Node], out_mode: str) -> list[Node]:
if out_mode == "tree":
type_rank = lambda node: 0 if node.is_dir else 1
else:
type_rank = lambda node: 0 if not node.is_dir else 1
return sorted(
nodes,
key=lambda node: (
type_rank(node),
0 if node.active_metadata.order is not None else 1,
node.active_metadata.order if node.active_metadata.order is not None else 0,
node.basename.lower(),
node.basename,
),
)
def iter_rows(nodes: list[Node], out_mode: str, node_type: str) -> list[Row]:
rows: list[Row] = []
def include(node: Node) -> bool:
if node_type == "file-only":
return not node.is_dir
if node_type == "directory-only":
return node.is_dir
return True
def visit(children: list[Node], prefix: list[int]) -> None:
ordered = sort_nodes(children, out_mode)
display_index = 0
for node in ordered:
if include(node):
display_index += 1
chain = prefix + [display_index]
rows.append(Row(node=node, sequence=len(rows) + 1, sequence_separated="-".join(str(v) for v in chain) + "."))
visit(node.children, chain)
else:
visit(node.children, prefix)
visit(nodes, [])
return rows
def collect_dir_title_map(nodes: list[Node]) -> dict[str, str]:
mapping: dict[str, str] = {}
def visit(items: list[Node]) -> None:
for node in items:
if node.is_dir:
mapping[node.fullpath] = node.title
visit(node.children)
visit(nodes)
return mapping
def apply_prefix_suffix(value: str, attrs: dict[str, str]) -> str:
if value == "":
return ""
return f'{attrs.get("prefix", "")}{value}{attrs.get("suffix", "")}'
def relative_path_from_output(target_path: Path, output_dir: Path) -> str:
return normalize_slashes(os.path.relpath(target_path, output_dir))
def row_value(row: Row, key: str, attrs: dict[str, str], dir_title_map: dict[str, str], output_dir: Path | None = None) -> str:
node = row.node
if key == "sequence":
value = str(row.sequence)
elif key == "sequence-separated":
value = row.sequence_separated
elif key == "fullpath":
value = node.fullpath
elif key == "fullpath-from-toc":
if output_dir is None:
raise TocError("fullpath-from-toc requires output directory context")
value = relative_path_from_output(node.path, output_dir)
elif key == "dirname":
value = node.dirname
elif key == "filename":
value = "" if node.is_dir else node.basename
elif key == "filename-title":
value = "" if node.is_dir else node.title
elif key == "filename-with-link":
value = "" if node.is_dir else node.basename
elif key == "filename-title-with-link":
value = "" if node.is_dir else node.title
elif key == "depth":
value = str(node.depth)
elif key == "description":
value = node.description
elif key == "title":
value = node.title
elif key == "order":
value = node.active_metadata.get("order")
elif key == "spacer":
value = ""
elif key == "dir-separator":
value = "/" if node.is_dir else ""
elif key == "indent":
value = " " * (node.depth * 4)
elif key.startswith("key-"):
value = node.key_value(key[4:])
elif key.startswith("h2-"):
value = node.h2_values.get(key[3:], "")
else:
raise TocError(f"unsupported key: {key}")
return apply_prefix_suffix(value, attrs)
def link_target(row: Row, output_dir: Path) -> str:
if row.node.is_dir:
return ""
return relative_path_from_output(row.node.path, output_dir)
def dir_values(node: Node, title_mode: bool, dir_title_map: dict[str, str]) -> list[str]:
if not node.rel_path:
return []
segments = node.rel_path.split("/")
targets = segments if node.is_dir else segments[:-1]
paths: list[str] = []
current: list[str] = []
for segment in targets:
current.append(segment)
path = "/".join(current)
if title_mode:
paths.append(dir_title_map.get(path, segment))
else:
paths.append(segment)
return paths
def build_flat_layout(rows: list[Row], specs: list[ColumnSpec], max_dir_columns: int | None, dir_title_map: dict[str, str], output_dir: Path) -> FlatLayout:
max_depth = max((len(dir_values(row.node, False, dir_title_map)) for row in rows), default=0)
dir_col_count = max_depth if max_dir_columns is None else max_dir_columns
if max_dir_columns is not None and max_depth > max_dir_columns:
raise TocError("row depth exceeds --max-dir-columns")
header: list[str] = []
table_rows: list[list[str]] = []
raw_dir_values: list[list[list[str] | None]] = []
expanded_column_specs: list[ColumnSpec] = []
expanded_column_kinds: list[str] = []
expanded_specs: list[tuple[ColumnSpec, int | None, int | None]] = []
group_count = 0
spacer_group_index: int | None = None
for spec in specs:
if spec.key in {"dirnames", "dirnames-title"}:
group_index = group_count
group_count += 1
spacer_group_index = group_index
for index in range(dir_col_count):
expanded_specs.append((spec, index, group_index))
expanded_column_specs.append(spec)
expanded_column_kinds.append("dirnames")
header.append(apply_prefix_suffix(spec.label if index == 0 else "", {"prefix": "", "suffix": ""}))
elif spec.key == "spacer" and spacer_group_index is not None:
expanded_specs.append((spec, dir_col_count, spacer_group_index))
expanded_column_specs.append(spec)
expanded_column_kinds.append("dirnames")
header.append(spec.label)
else:
spacer_group_index = None
expanded_specs.append((spec, None, None))
expanded_column_specs.append(spec)
if spec.key in {"filename", "filename-title"}:
expanded_column_kinds.append("filename")
else:
expanded_column_kinds.append("other")
header.append(spec.label)
for row in rows:
output_row: list[str] = []
dir_group_values: list[list[str] | None] = [None] * group_count
for spec, dir_index, group_index in expanded_specs:
if spec.key in {"dirnames", "dirnames-title"}:
values = dir_values(row.node, spec.key == "dirnames-title", dir_title_map)
values = [apply_prefix_suffix(value, spec.attrs) for value in values]
if group_index is not None and dir_group_values[group_index] is None:
dir_group_values[group_index] = values
output_row.append(values[dir_index] if dir_index is not None and dir_index < len(values) else "")
elif spec.key == "spacer" and group_index is not None:
if dir_group_values[group_index] is None:
dir_group_values[group_index] = dir_values(row.node, False, dir_title_map)
output_row.append("")
else:
output_row.append(row_value(row, spec.key, spec.attrs, dir_title_map, output_dir))
table_rows.append(output_row)
raw_dir_values.append(dir_group_values)
collapse_repeated_dir_values(table_rows, raw_dir_values, expanded_specs)
return FlatLayout(
header=header,
rows=table_rows,
raw_dir_values=raw_dir_values,
column_specs=specs,
expanded_specs=expanded_column_specs,
row_defs=rows,
expanded_column_kinds=expanded_column_kinds,
)
def collapse_repeated_dir_values(rows: list[list[str]], raw_dir_values: list[list[list[str] | None]], expanded_specs: list[tuple[ColumnSpec, int | None, int | None]]) -> None:
groups: dict[int, list[int]] = {}
for col_index, (_spec, _dir_index, group_index) in enumerate(expanded_specs):
if group_index is not None:
groups.setdefault(group_index, []).append(col_index)
for group_index, columns in groups.items():
previous: list[list[str] | None] = [None] * len(columns)
for row_index, row in enumerate(rows):
values = raw_dir_values[row_index][group_index] or []
for pos, col_index in enumerate(columns):
current = values[pos] if pos < len(values) else ""
if current and previous[pos] == values[: pos + 1]:
row[col_index] = ""
elif current:
previous[pos] = values[: pos + 1]
else:
previous[pos] = None
def render_tree(
rows: list[Row],
tree_format: str,
with_root_node: bool,
dir_title_map: dict[str, str],
title: str | None,
out_path: Path,
) -> str:
lines: list[str] = []
if title is not None:
lines.extend([f"# {title}", "", "## Files and Directories", ""])
if with_root_node:
lines.append("- /")
for row in rows:
def replace(match: re.Match[str]) -> str:
key, attrs = parse_placeholder_expr(match.group(1), "--tree-node-format")
return row_value(row, key, attrs, dir_title_map, out_path.parent)
lines.append(PLACEHOLDER_RE.sub(replace, tree_format))
return "\n".join(lines).rstrip() + "\n"
def render_csv(path: Path, layout: FlatLayout, title: str | None) -> None:
with path.open("w", encoding="utf-8-sig", newline="") as handle:
writer = csv.writer(handle, quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
if title is not None:
writer.writerow([title])
writer.writerow([])
writer.writerow(layout.header)
writer.writerows(layout.rows)
def markdown_escape_cell(value: str) -> str:
return value.replace("\\", "\\\\").replace("|", "\\|").replace("\n", "<br>")
def render_markdown(path: Path, layout: FlatLayout, title: str | None, output_dir: Path) -> None:
lines: list[str] = []
if title is not None:
lines.extend([f"# {title}", ""])
header = [markdown_escape_cell(value) for value in layout.header]
separators = ["---"] * len(layout.header)
lines.append("| " + " | ".join(header) + " |")
lines.append("| " + " | ".join(separators) + " |")
for row_index, row in enumerate(layout.rows):
cells: list[str] = []
for col_index, value in enumerate(row):
spec = layout.expanded_specs[col_index]
if spec.key in {"filename-with-link", "filename-title-with-link"} and value != "":
target = link_target(layout.row_defs[row_index], output_dir)
cells.append(f"[{markdown_escape_cell(value)}](<{target}>)")
else:
cells.append(markdown_escape_cell(value))
lines.append("| " + " | ".join(cells) + " |")
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
def coordinate_to_indexes(cell: str) -> tuple[int, int]:
match = re.fullmatch(r"([A-Z]+)([1-9][0-9]*)", cell)
if not match:
raise TocError(f"invalid --cell-start: {cell}")
letters, row_text = match.groups()
col = 0
for char in letters:
col = col * 26 + (ord(char) - ord("A") + 1)
return int(row_text), col
def column_letter(index: int) -> str:
letters = []
while index > 0:
index, rem = divmod(index - 1, 26)
letters.append(chr(ord("A") + rem))
return "".join(reversed(letters))
def excel_cell(value: str, style_id: int) -> str:
if value == "":
return f'<c s="{style_id}"/>'
escaped = xml_escape(value)
preserve = ' xml:space="preserve"' if value != value.strip() else ""
return f'<c s="{style_id}" t="inlineStr"><is><t{preserve}>{escaped}</t></is></c>'
def excel_hyperlink_rel(cell_ref: str, rel_id: str) -> str:
return f'<hyperlink ref="{cell_ref}" r:id="{rel_id}"/>'
def excel_hyperlink_relationship(rel_id: str, target: str) -> str:
return (
f'<Relationship Id="{rel_id}" '
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" '
f'Target="{xml_escape(target)}" TargetMode="External"/>'
)
def depth_for_row(row: Row) -> int:
if not row.node.rel_path:
return 0
parts = row.node.rel_path.split("/")
return len(parts) if row.node.is_dir else len(parts) - 1
def border_tuple(left: bool, right: bool, top: bool, bottom: bool) -> tuple[bool, bool, bool, bool]:
return (left, right, top, bottom)
def needs_trailing_border_row(layout: FlatLayout) -> bool:
return bool(layout.row_defs) and not layout.row_defs[-1].node.is_dir
def contiguous_dir_column_groups(layout: FlatLayout) -> list[list[int]]:
groups: list[list[int]] = []
current: list[int] = []
for index, kind in enumerate(layout.expanded_column_kinds):
if kind == "dirnames":
current.append(index)
elif current:
groups.append(current)
current = []
if current:
groups.append(current)
return groups
def dir_group_change_index(previous: list[str] | None, current: list[str] | None, group_len: int) -> int | None:
if previous is None and current is None:
return None
previous = previous or []
current = current or []
for index in range(group_len):
prev_value = previous[index] if index < len(previous) else ""
curr_value = current[index] if index < len(current) else ""
if prev_value != curr_value:
return index
return None
def common_prefix_len(left: list[str] | None, right: list[str] | None) -> int:
left = left or []
right = right or []
count = 0
for left_value, right_value in zip(left, right):
if left_value != right_value:
break
count += 1
return count
def build_table_border_matrix(layout: FlatLayout) -> list[list[tuple[bool, bool, bool, bool]]]:
if not layout.rows:
return []
row_count = len(layout.rows) + 1
col_count = len(layout.header)
matrix = [[border_tuple(False, False, False, False) for _ in range(col_count)] for _ in range(row_count)]
dir_col_indexes = [index for index, kind in enumerate(layout.expanded_column_kinds) if kind == "dirnames"]
first_dir_col = dir_col_indexes[0] if dir_col_indexes else None
last_dir_col = dir_col_indexes[-1] if dir_col_indexes else None
dir_groups = contiguous_dir_column_groups(layout)
has_any_dir_row = any(item.node.is_dir for item in layout.row_defs)
has_mixed_rows = has_any_dir_row and any(not item.node.is_dir for item in layout.row_defs)
dir_group_lookup: dict[int, tuple[int, int, int]] = {}
for group_index, columns in enumerate(dir_groups):
for pos, col_index in enumerate(columns):
dir_group_lookup[col_index] = (group_index, pos, len(columns))
for col_index, kind in enumerate(layout.expanded_column_kinds):
if kind == "dirnames":
matrix[0][col_index] = border_tuple(
col_index == first_dir_col,
col_index == last_dir_col,
True,
True,
)
else:
matrix[0][col_index] = border_tuple(True, True, True, True)
for row_index, row in enumerate(layout.row_defs, start=1):
self_depth = depth_for_row(row)
for col_index, kind in enumerate(layout.expanded_column_kinds):
is_mixed_final_file_row = has_any_dir_row and row_index == len(layout.row_defs) and needs_trailing_border_row(layout) and not row.node.is_dir
if kind == "filename":
if row.node.is_dir:
matrix[row_index][col_index] = border_tuple(False, True, True, True)
else:
if has_mixed_rows:
left = True
else:
left = not (dir_col_indexes and self_depth == len(dir_col_indexes))
bottom = not is_mixed_final_file_row
matrix[row_index][col_index] = border_tuple(left, True, True, bottom)
continue
if kind != "dirnames":
bottom = not is_mixed_final_file_row
matrix[row_index][col_index] = border_tuple(True, True, True, bottom)
continue
group_index, pos_in_group, group_len = dir_group_lookup[col_index]
current_values = layout.raw_dir_values[row_index - 1][group_index] or []
prev_values = None if row_index == 1 else (layout.raw_dir_values[row_index - 2][group_index] or [])
next_values = None if row_index == len(layout.row_defs) else (layout.raw_dir_values[row_index][group_index] or [])
if not row.node.is_dir:
if has_mixed_rows:
next_same_dir = next_values == current_values
if next_same_dir:
bottom_threshold = None
elif self_depth == 1:
bottom_threshold = 0
else:
bottom_threshold = max(common_prefix_len(current_values, next_values), 2)
left = pos_in_group < self_depth
right = pos_in_group == group_len - 1 and self_depth == 3 and pos_in_group >= self_depth
top = False
bottom = bottom_threshold is not None and pos_in_group >= bottom_threshold
matrix[row_index][col_index] = border_tuple(left, right, top, bottom)
continue
top_change = 1 if row_index == 1 else dir_group_change_index(prev_values, current_values, group_len)
bottom_change = 0 if row_index == len(layout.row_defs) else dir_group_change_index(current_values, next_values, group_len)
rendered_value = layout.rows[row_index - 1][col_index]
left = pos_in_group < self_depth
right = pos_in_group < self_depth - 1 or pos_in_group == group_len - 1
if top_change is not None and pos_in_group == top_change and row_index != 1:
right = pos_in_group == group_len - 1
if rendered_value == "" and self_depth >= 4 and pos_in_group == self_depth - 2:
right = False
if top_change is not None and top_change > 0 and pos_in_group == top_change - 1 and pos_in_group >= 2 and rendered_value == "":
right = False
if bottom_change is not None and bottom_change > 0 and pos_in_group == bottom_change - 1 and pos_in_group >= 2 and rendered_value == "":
right = False
top = top_change is not None and pos_in_group >= top_change
bottom_start = bottom_change
if bottom_start == 0 and top_change is not None and top_change > 0:
bottom_start = top_change - 1
bottom = bottom_start is not None and pos_in_group >= bottom_start
matrix[row_index][col_index] = border_tuple(left, right, top, bottom)
continue
if row.node.is_dir:
prev_depth = len(prev_values or [])
next_depth = len(next_values or [])
prev_lcp = common_prefix_len(prev_values, current_values)
next_lcp = common_prefix_len(current_values, next_values)
next_is_same_dir_file = row_index < len(layout.row_defs) and (not layout.row_defs[row_index].node.is_dir) and next_values == current_values
next_is_child = bool(next_values) and len(next_values) > len(current_values) and next_values[:len(current_values)] == current_values
if has_mixed_rows:
if pos_in_group < self_depth - 1:
bottom = False
if not next_is_same_dir_file and not next_is_child and row_index > 1 and layout.row_defs[row_index - 2].node.is_dir:
bottom = pos_in_group >= next_lcp
matrix[row_index][col_index] = border_tuple(True, False, False, bottom)
continue
if pos_in_group == self_depth - 1:
bottom = not (next_is_same_dir_file or next_is_child)
matrix[row_index][col_index] = border_tuple(True, False, True, bottom)
continue
bottom = not next_is_same_dir_file
matrix[row_index][col_index] = border_tuple(False, False, True, bottom)
continue
if pos_in_group < self_depth - 1:
right = pos_in_group == 0 and pos_in_group == self_depth - 2 and prev_lcp > pos_in_group and prev_depth > self_depth
bottom = pos_in_group >= next_lcp and not right
matrix[row_index][col_index] = border_tuple(True, right, False, bottom)
continue
if pos_in_group == self_depth - 1:
if self_depth == 1:
left = prev_depth <= 1
else:
left = prev_depth <= self_depth + 1
bottom = row_index == len(layout.row_defs) or next_depth <= self_depth
matrix[row_index][col_index] = border_tuple(left, False, True, bottom)
continue
matrix[row_index][col_index] = border_tuple(False, False, True, True)
continue
if self_depth <= 0:
matrix[row_index][col_index] = border_tuple(False, False, False, False)
continue
if pos_in_group < self_depth - 1:
matrix[row_index][col_index] = border_tuple(True, True, False, False)
elif pos_in_group == self_depth - 1:
if row.node.is_dir:
matrix[row_index][col_index] = border_tuple(True, False, True, False)
else:
matrix[row_index][col_index] = border_tuple(True, False, False, False)
else:
if row.node.is_dir:
matrix[row_index][col_index] = border_tuple(False, False, True, True)
else:
matrix[row_index][col_index] = border_tuple(False, False, False, False)
if needs_trailing_border_row(layout):
matrix.append([border_tuple(False, False, True, False) for _ in range(col_count)])
return matrix
def build_xlsx_sheet_xml(
layout: FlatLayout,
start_row: int,
start_col: int,
title: str | None,
margin_column_width: float,
output_dir: Path,
style_resolver,
) -> tuple[str, str]:
sheet_rows: list[tuple[int, list[tuple[int, str]]]] = []
hyperlinks: list[tuple[str, str]] = []
widths: dict[int, float] = {}
current_row = start_row
border_matrix = build_table_border_matrix(layout)
for col in range(1, start_col):
widths[col] = margin_column_width
if title is not None:
sheet_rows.append((current_row, [(start_col, excel_cell(title, style_resolver("title", None, False)))]))
widths[start_col] = max(widths.get(start_col, 0), len(title) + 2)
current_row += 2
header_row = current_row
header_cells: list[tuple[int, str]] = []
for offset, value in enumerate(layout.header):
style_id = style_resolver(
"header",
border_matrix[0][offset] if border_matrix else border_tuple(False, False, False, False),
layout.expanded_column_kinds[offset] == "dirnames",
)
col_index = start_col + offset
header_cells.append((col_index, excel_cell(value, style_id)))
widths[col_index] = max(widths.get(col_index, 0), len(value) + 2)
sheet_rows.append((header_row, header_cells))
current_row += 1
for row_offset, row_values in enumerate(layout.rows, start=1):
cells: list[tuple[int, str]] = []
for offset, value in enumerate(row_values):
col_index = start_col + offset
cell_ref = f"{column_letter(col_index)}{current_row}"
style_kind = "data"
spec = layout.expanded_specs[offset]
if spec.key in {"filename-with-link", "filename-title-with-link"} and value != "":
target = link_target(layout.row_defs[row_offset - 1], output_dir)
hyperlinks.append((cell_ref, target))
style_kind = "data-link"
style_id = style_resolver(style_kind, border_matrix[row_offset][offset], False)
cells.append((col_index, excel_cell(value, style_id)))
widths[col_index] = max(widths.get(col_index, 0), len(value) + 2)
sheet_rows.append((current_row, cells))
current_row += 1
if needs_trailing_border_row(layout):
footer_cells: list[tuple[int, str]] = []
footer_row_index = len(layout.rows) + 1
for offset in range(len(layout.header)):
col_index = start_col + offset
footer_cells.append((col_index, excel_cell("", style_resolver("data", border_matrix[footer_row_index][offset], False))))
sheet_rows.append((current_row, footer_cells))
current_row += 1
apply_column_width_overrides(widths, layout, start_col)
cols_xml = "".join(
f'<col min="{col}" max="{col}" width="{max(width, 4):.2f}" customWidth="1"/>'
for col, width in sorted(widths.items())
)
rows_xml = []
for row_index, cells in sheet_rows:
cells_xml = "".join(f'<c r="{column_letter(col_index)}{row_index}"{cell_xml[2:] if cell_xml.startswith("<c ") else cell_xml}' for col_index, cell_xml in cells)
rows_xml.append(f'<row r="{row_index}">{cells_xml}</row>')
freeze_row = header_row + 1
freeze_cell = f"{column_letter(start_col)}{freeze_row}"
hyperlinks_xml = ""
rels_xml = ""
if hyperlinks:
hyperlink_tags = []
rel_tags = []
for index, (cell_ref, target) in enumerate(hyperlinks, start=1):
rel_id = f"rId{index}"
hyperlink_tags.append(excel_hyperlink_rel(cell_ref, rel_id))
rel_tags.append(excel_hyperlink_relationship(rel_id, target))
hyperlinks_xml = f"<hyperlinks>{''.join(hyperlink_tags)}</hyperlinks>"
rels_xml = f'''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
{''.join(rel_tags)}
</Relationships>
'''
sheet_xml = f'''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheetViews>
<sheetView workbookViewId="0" showGridLines="0">
<pane ySplit="{freeze_row - 1}" topLeftCell="{freeze_cell}" activePane="bottomLeft" state="frozen"/>
</sheetView>
</sheetViews>
<cols>{cols_xml}</cols>
<sheetData>{''.join(rows_xml)}</sheetData>{hyperlinks_xml}
</worksheet>
'''
return sheet_xml, rels_xml
def apply_column_width_overrides(widths: dict[int, float], layout: FlatLayout, start_col: int) -> None:
for offset, spec in enumerate(layout.expanded_specs):
if spec.width is not None:
widths[start_col + offset] = spec.width
def build_styles_xml(header_color: str) -> tuple[str, object]:
fill_color = header_color[1:].upper()
border_registry: dict[tuple[bool, bool, bool, bool], int] = {border_tuple(False, False, False, False): 0}
style_registry: dict[tuple[str, int, bool], int] = {}
style_defs: list[tuple[str, int, bool]] = []
def ensure_border_id(sides: tuple[bool, bool, bool, bool] | None) -> int:
if sides is None:
return 0
if sides not in border_registry:
border_registry[sides] = len(border_registry)
return border_registry[sides]
def ensure_style(kind: str, sides: tuple[bool, bool, bool, bool] | None, is_dir_header: bool) -> int:
border_id = ensure_border_id(sides)
key = (kind, border_id, is_dir_header)
if key not in style_registry:
style_registry[key] = len(style_registry)
style_defs.append(key)
return style_registry[key]
ensure_style("base", None, False)
ensure_style("title", None, False)
def build_xml() -> str:
borders_xml = ['<border/>']
for sides, _border_id in sorted(border_registry.items(), key=lambda item: item[1])[1:]:
left, right, top, bottom = sides
borders_xml.append(
"<border>"
f"<left{(' style=\"thin\"' if left else '')}/>"
f"<right{(' style=\"thin\"' if right else '')}/>"
f"<top{(' style=\"thin\"' if top else '')}/>"
f"<bottom{(' style=\"thin\"' if bottom else '')}/>"
"</border>"
)
xf_xml: list[str] = []
for kind, border_id, is_dir_header in style_defs:
if kind == "base":
xf_xml.append('<xf fontId="0" fillId="0" borderId="0" xfId="0"/>')
elif kind == "title":
xf_xml.append('<xf fontId="1" fillId="0" borderId="0" xfId="0" applyFont="1"/>')
elif kind == "data-link":
xf_xml.append(f'<xf fontId="2" fillId="0" borderId="{border_id}" xfId="0" applyFont="1" applyBorder="1"/>')
elif kind == "header":
align = "centerContinuous" if is_dir_header else "center"
xf_xml.append(
f'<xf fontId="0" fillId="2" borderId="{border_id}" xfId="0" applyFill="1" applyBorder="1" applyAlignment="1">'
f'<alignment horizontal="{align}"/>'
"</xf>"
)
else:
xf_xml.append(f'<xf fontId="0" fillId="0" borderId="{border_id}" xfId="0" applyBorder="1"/>')
return f'''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
<fonts count="3">
<font><name val="Meiryo UI"/><sz val="9"/></font>
<font><name val="Meiryo UI"/><sz val="11"/><b/></font>
<font><name val="Meiryo UI"/><sz val="9"/><color rgb="FF0563C1"/><u/></font>
</fonts>
<fills count="3">
<fill><patternFill patternType="none"/></fill>
<fill><patternFill patternType="gray125"/></fill>
<fill><patternFill patternType="solid"><fgColor rgb="FF{fill_color}"/><bgColor indexed="64"/></patternFill></fill>
</fills>
<borders count="{len(border_registry)}">
{''.join(borders_xml)}
</borders>
<cellStyleXfs count="1"><xf/></cellStyleXfs>
<cellXfs count="{len(style_defs)}">
{''.join(xf_xml)}
</cellXfs>
</styleSheet>
'''
return build_xml, ensure_style
def write_xlsx(path: Path, layout: FlatLayout, start_cell: str, title: str | None, margin_column_width: float, header_color: str) -> None:
start_row, start_col = coordinate_to_indexes(start_cell)
styles_builder, style_resolver = build_styles_xml(header_color)
sheet_xml, sheet_rels_xml = build_xlsx_sheet_xml(layout, start_row, start_col, title, margin_column_width, path.parent, style_resolver)
styles_xml = styles_builder()
with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr("[Content_Types].xml", '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
<Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
<Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
</Types>
''')
zf.writestr("_rels/.rels", '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
</Relationships>
''')
zf.writestr("docProps/app.xml", '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">
<Application>ta-tocgen</Application>
</Properties>
''')
zf.writestr("docProps/core.xml", '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title>ta-tocgen</dc:title>
</cp:coreProperties>
''')
zf.writestr("xl/workbook.xml", '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheets><sheet name="TOC" sheetId="1" r:id="rId1"/></sheets>
</workbook>
''')
zf.writestr("xl/_rels/workbook.xml.rels", '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
</Relationships>
''')
zf.writestr("xl/styles.xml", styles_xml)
zf.writestr("xl/worksheets/sheet1.xml", sheet_xml)
if sheet_rels_xml:
zf.writestr("xl/worksheets/_rels/sheet1.xml.rels", sheet_rels_xml)
def render_html(path: Path, layout: FlatLayout, title: str | None, output_dir: Path, header_color: str) -> None:
rowspan_map = compute_dir_rowspans(layout)
lines: list[str] = []
table_style = 'border-collapse: collapse; border: 1px solid #d0d7de;'
cell_style = 'vertical-align: top; border: 1px solid #d0d7de; background-color: white;'
header_style = (
f'vertical-align: top; border: 1px solid #d0d7de; background-color: {header_color}; '
'text-align: center; font-weight: normal;'
)
if title is not None:
lines.extend([f"# {title}", ""])
lines.append(f'<table style="{table_style}">')
lines.append(" <thead>")
lines.append(" <tr>")
col_index = 0
while col_index < len(layout.header):
if layout.expanded_column_kinds[col_index] == "dirnames":
start = col_index
while col_index < len(layout.header) and layout.expanded_column_kinds[col_index] == "dirnames":
col_index += 1
colspan = col_index - start
label = layout.header[start]
lines.append(f' <th colspan="{colspan}" style="{header_style}">{html.escape(label)}</th>')
continue
lines.append(f' <th style="{header_style}">{html.escape(layout.header[col_index])}</th>')
col_index += 1
lines.append(" </tr>")
lines.append(" </thead>")
lines.append(" <tbody>")
for row_index, row in enumerate(layout.rows):
lines.append(" <tr>")
for col_index, value in enumerate(row):
rowspan = rowspan_map.get((row_index, col_index))
if rowspan == 0:
continue
attr = f' rowspan="{rowspan}"' if rowspan and rowspan > 1 else ""
spec = layout.expanded_specs[col_index]
if spec.key in {"filename-with-link", "filename-title-with-link"} and value != "":
target = html.escape(link_target(layout.row_defs[row_index], output_dir), quote=True)
cell_body = f'<a href="{target}">{html.escape(value)}</a>'
else:
cell_body = html.escape(value)
lines.append(f' <td{attr} style="{cell_style}">{cell_body}</td>')
lines.append(" </tr>")
lines.append(" </tbody>")
lines.append("</table>")
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
def compute_dir_rowspans(layout: FlatLayout) -> dict[tuple[int, int], int]:
spans: dict[tuple[int, int], int] = {}
for group_index, columns in enumerate(contiguous_dir_column_groups(layout)):
for pos_in_group, col_index in enumerate(columns):
start_row = 0
current_token: tuple[str, tuple[str, ...]] | None = None
for row_index in range(len(layout.rows)):
values = layout.raw_dir_values[row_index][group_index] or []
if pos_in_group < len(values):
token = ("path", tuple(values[: pos_in_group + 1]))
else:
token = ("empty", tuple(values))
if current_token is None:
current_token = token
start_row = row_index
continue
if token != current_token:
spans[(start_row, col_index)] = row_index - start_row
for hidden_row in range(start_row + 1, row_index):
spans[(hidden_row, col_index)] = 0
current_token = token
start_row = row_index
if current_token is not None:
spans[(start_row, col_index)] = len(layout.rows) - start_row
for hidden_row in range(start_row + 1, len(layout.rows)):
spans[(hidden_row, col_index)] = 0
return spans
def resolve_title(base_dir: Path, raw_value: str | None) -> str | None:
if raw_value is None:
return None
if raw_value != "":
return raw_value
readme_path = base_dir / "README.md"
if readme_path.exists():
title = parse_h1_title(readme_path.read_text(encoding="utf-8"))
if title:
return title
return base_dir.name
def default_output_path(base_dir: Path, out_mode: str, table_format: str) -> Path:
if out_mode == "tree":
return base_dir / "TOC.md"
if table_format == "csv":
return base_dir / "TOC-table.csv"
if table_format == "md":
return base_dir / "TOC-table.md"
if table_format == "xlsx":
return base_dir / "TOC-table.xlsx"
return base_dir / "TOC-table-html.md"
def main() -> int:
try:
args = parse_args()
validate_args(args)
base_dir = Path(normalize_slashes(args.base_dir)).expanduser().resolve()
if not base_dir.exists() or not base_dir.is_dir():
raise TocError(f"BASE_DIR is not a directory: {args.base_dir}")
table_specs = parse_table_columns(args.table_columns)
requested_h2_keys = collect_requested_h2_keys(table_specs, args.tree_node_format)
roots = build_tree(base_dir, requested_h2_keys, [normalize_slashes(value) for value in args.exclude_path])
rows = iter_rows(roots, args.out_mode, args.node_type)
title = resolve_title(base_dir, args.with_title)
out_path = Path(args.out_file_path).expanduser() if args.out_file_path else default_output_path(base_dir, args.out_mode, args.table_format)
dir_title_map = collect_dir_title_map(roots)
if args.out_mode == "tree":
out_path.write_text(render_tree(rows, args.tree_node_format, args.with_root_node, dir_title_map, title, out_path), encoding="utf-8")
return 0
layout = build_flat_layout(rows, table_specs, args.max_dir_columns, dir_title_map, out_path.parent)
if args.table_format == "csv":
render_csv(out_path, layout, title)
elif args.table_format == "md":
render_markdown(out_path, layout, title, out_path.parent)
elif args.table_format == "xlsx":
write_xlsx(out_path, layout, args.cell_start.upper(), title, args.margin_column_width, args.header_color)
else:
render_html(out_path, layout, title, out_path.parent, args.header_color)
return 0
except TocError as exc:
print(f"Error: {exc}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())