mirror of
https://github.com/Vonng/ddia.git
synced 2026-06-21 00:47:05 +08:00
220 lines
6.6 KiB
Python
Executable file
220 lines
6.6 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""PDF generation from Markdown using pandoc + LaTeX."""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import subprocess
|
|
import argparse
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict
|
|
import shutil
|
|
import importlib.util
|
|
|
|
|
|
CHAPTER_ORDER = [
|
|
"_index.md",
|
|
"preface.md",
|
|
"part-i.md",
|
|
"ch1.md", "ch2.md", "ch3.md", "ch4.md",
|
|
"part-ii.md",
|
|
"ch5.md", "ch6.md", "ch7.md", "ch8.md", "ch9.md",
|
|
"part-iii.md",
|
|
"ch10.md", "ch11.md", "ch12.md", "ch13.md", "ch14.md",
|
|
"colophon.md", "glossary.md",
|
|
]
|
|
|
|
DEFAULT_FONTS = {
|
|
"mainfont": "PingFang SC",
|
|
"sansfont": "Heiti SC",
|
|
}
|
|
|
|
YAML_FRONT_RE = re.compile(r'^---\n(.*?)\n---\n', re.DOTALL)
|
|
TITLE_RE = re.compile(r'^title:\s*["\']?([^"\'\n]+)["\']?\s*$', re.MULTILINE)
|
|
CHAPTER_NUM_RE = re.compile(r'^\d+\.\s*')
|
|
CHAPTER_FILE_RE = re.compile(r'^ch(\d+)\.md$')
|
|
CALLOUT_RE = re.compile(r'^> \[!(NOTE|TIP|WARNING|CAUTION|DANGER)\] ', re.MULTILINE)
|
|
|
|
|
|
def convert_pdf_markdown(text: str, filename: str) -> str:
|
|
"""PDF-specific markdown conversions."""
|
|
text = _convert_callouts(text)
|
|
text = _add_title_heading(text, filename)
|
|
return text
|
|
|
|
|
|
def _convert_callouts(text: str) -> str:
|
|
"""Convert [!NOTE], [!TIP], etc. to Chinese."""
|
|
def replace_callout(match):
|
|
callout_type = match.group(1).lower()
|
|
title_map = {
|
|
'note': '注',
|
|
'tip': '提示',
|
|
'warning': '警告',
|
|
'caution': '注意',
|
|
'danger': '危险'
|
|
}
|
|
return f"**{title_map.get(callout_type, callout_type)}**: "
|
|
|
|
text = CALLOUT_RE.sub(replace_callout, text)
|
|
text = re.sub(r'^> ?', '', text, flags=re.MULTILINE)
|
|
return text
|
|
|
|
|
|
def _add_title_heading(text: str, filename: str) -> str:
|
|
"""Add title heading from YAML frontmatter."""
|
|
match = YAML_FRONT_RE.match(text)
|
|
if match:
|
|
frontmatter = match.group(1)
|
|
title_match = TITLE_RE.search(frontmatter)
|
|
if title_match:
|
|
title = title_match.group(1)
|
|
body = text[match.end():]
|
|
clean_title = CHAPTER_NUM_RE.sub('', title)
|
|
|
|
if CHAPTER_FILE_RE.match(filename):
|
|
heading = f"# {clean_title}"
|
|
else:
|
|
heading = f"## {clean_title}"
|
|
|
|
return f"---\n{frontmatter}\n---\n\n{heading}\n\n{body}"
|
|
return text
|
|
|
|
|
|
def check_cmd(cmd: str) -> bool:
|
|
return subprocess.run(["which", cmd], capture_output=True).returncode == 0
|
|
|
|
|
|
def get_available_engine() -> Optional[str]:
|
|
if check_cmd("xelatex"):
|
|
return "xelatex"
|
|
if check_cmd("lualatex"):
|
|
return "lualatex"
|
|
return None
|
|
|
|
|
|
def check_dependencies() -> List[str]:
|
|
missing = []
|
|
if not check_cmd("pandoc"):
|
|
missing.append("pandoc")
|
|
if not get_available_engine():
|
|
missing.append("xelatex or lualatex (LaTeX engine)")
|
|
return missing
|
|
|
|
|
|
def preprocess_markdown(input_dir: Path, output_dir: Path) -> None:
|
|
script_dir = Path(__file__).parent
|
|
preprocess_script = script_dir / "preprocess-epub.py"
|
|
spec = importlib.util.spec_from_file_location("preprocess_epub", preprocess_script)
|
|
if spec is None:
|
|
raise RuntimeError("Failed to load preprocess module")
|
|
module = importlib.util.module_from_spec(spec)
|
|
if spec.loader is None:
|
|
raise RuntimeError("Failed to load preprocess module loader")
|
|
spec.loader.exec_module(module)
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
md_files = sorted(input_dir.glob("*.md"))
|
|
|
|
print(f"Preprocessing {len(md_files)} files...")
|
|
for md_file in md_files:
|
|
temp_output = output_dir / "tmp_preprocess.md"
|
|
module.process_file(str(md_file), str(temp_output))
|
|
|
|
with open(temp_output, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
content = convert_pdf_markdown(content, md_file.name)
|
|
|
|
with open(output_dir / md_file.name, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
temp_output.unlink()
|
|
|
|
|
|
def generate_pdf(
|
|
temp_dir: Path,
|
|
output_file: Path,
|
|
metadata_file: Optional[str],
|
|
engine: str,
|
|
fonts: Dict[str, str],
|
|
margin: str = "1in",
|
|
) -> None:
|
|
chapters = [str(temp_dir / ch) for ch in CHAPTER_ORDER if (temp_dir / ch).exists()]
|
|
|
|
if not chapters:
|
|
raise ValueError("No valid chapter files found")
|
|
|
|
script_dir = Path(__file__).parent
|
|
header_file = script_dir / "header.tex"
|
|
|
|
cmd = [
|
|
"pandoc", "-o", str(output_file),
|
|
"--metadata-file", metadata_file or "",
|
|
"-H", str(header_file),
|
|
"--toc",
|
|
"--toc-depth=2",
|
|
"--top-level-division=chapter",
|
|
"--file-scope",
|
|
f"--pdf-engine={engine}",
|
|
f"-V geometry:margin={margin}",
|
|
"-V linestretch=1.5",
|
|
"-V book=true",
|
|
"-V classoption=openany",
|
|
"-V mainfont=PingFang SC",
|
|
]
|
|
cmd = [c for c in cmd if c]
|
|
cmd.extend(chapters)
|
|
|
|
print(f"Generating PDF with {engine}...")
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"PDF generation failed: {result.stderr}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate PDF from Markdown")
|
|
parser.add_argument("-i", "--input", default="content/zh", help="Input directory")
|
|
parser.add_argument("-o", "--output", default="output", help="Output directory")
|
|
parser.add_argument("-m", "--metadata", help="Metadata YAML file")
|
|
parser.add_argument("-e", "--engine", choices=["xelatex", "lualatex"], help="PDF engine")
|
|
parser.add_argument("--no-cleanup", action="store_true", help="Keep temp files")
|
|
args = parser.parse_args()
|
|
|
|
project_root = Path(__file__).parent.parent
|
|
input_dir = project_root / args.input
|
|
output_dir = project_root / args.output
|
|
temp_dir = output_dir / "temp"
|
|
|
|
missing = check_dependencies()
|
|
if missing:
|
|
print("Error: Missing dependencies:")
|
|
for dep in missing:
|
|
print(f" - {dep}")
|
|
print("\nInstall: brew install pandoc && brew install --cask mactex")
|
|
sys.exit(1)
|
|
|
|
detected_engine = get_available_engine()
|
|
if detected_engine is None:
|
|
print("Error: No PDF engine available")
|
|
sys.exit(1)
|
|
|
|
engine = args.engine or detected_engine
|
|
metadata = args.metadata or str(project_root / "metadata.yaml")
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
output_file = output_dir / "ddia.pdf"
|
|
output_file.unlink(missing_ok=True)
|
|
|
|
preprocess_markdown(input_dir, temp_dir)
|
|
generate_pdf(temp_dir, output_file, metadata, engine, DEFAULT_FONTS)
|
|
|
|
if not args.no_cleanup and temp_dir.exists():
|
|
shutil.rmtree(temp_dir)
|
|
|
|
print(f"PDF created: {output_file}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|