diff --git a/bin/epub b/bin/epub index 2ff80af..2dc5723 100755 --- a/bin/epub +++ b/bin/epub @@ -28,7 +28,7 @@ convert_to_epub() { pandoc -o "$OUTPUT_BOOK" --metadata-file="$meta_file" \ --toc-depth=2 \ --top-level-division=chapter \ - --file-scope=true \ + --split-level=1 \ --css="$css_file" \ --webtex \ --wrap=preserve \ @@ -39,14 +39,14 @@ convert_to_epub() { "${TEMP_DIR}"/ch2.md \ "${TEMP_DIR}"/ch3.md \ "${TEMP_DIR}"/ch4.md \ - "${TEMP_DIR}"/part-ii.md \ "${TEMP_DIR}"/ch5.md \ + "${TEMP_DIR}"/part-ii.md \ "${TEMP_DIR}"/ch6.md \ "${TEMP_DIR}"/ch7.md \ "${TEMP_DIR}"/ch8.md \ "${TEMP_DIR}"/ch9.md \ - "${TEMP_DIR}"/part-iii.md \ "${TEMP_DIR}"/ch10.md \ + "${TEMP_DIR}"/part-iii.md \ "${TEMP_DIR}"/ch11.md \ "${TEMP_DIR}"/ch12.md \ "${TEMP_DIR}"/ch13.md \ diff --git a/bin/preprocess-epub.py b/bin/preprocess-epub.py index 758f41a..b52c112 100755 --- a/bin/preprocess-epub.py +++ b/bin/preprocess-epub.py @@ -13,8 +13,18 @@ import sys from pathlib import Path FIGURE_SHORTCODE_RE = re.compile(r"\{\{<\s*figure\b(.*?)>\}\}", re.DOTALL) +CALLOUT_SHORTCODE_RE = re.compile(r"\{\{<\s*callout\b(.*?)>\}\}(.*?)\{\{<\s*/callout\s*>\}\}", re.DOTALL) ATTR_RE = re.compile(r'([\w-]+)="([^"]*)"') ABS_IMAGE_RE = re.compile(r'!\[([^\]]*)\]\(/(?!static/)([^)]+)\)') +FRONT_MATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n?", re.DOTALL) +TITLE_RE = re.compile(r'^title:\s*(?:"([^"]*)"|\'([^\']*)\'|(.+?))\s*$', re.MULTILINE) +LINK_HEADING_RE = re.compile(r"^(#{2,6})\s+(\[[^\]]+\]\([^)]+\))\s*$", re.MULTILINE) +HEADER_ID_RE = re.compile(r"\{#([A-Za-z0-9_:-]+)\}") +RAW_ID_RE = re.compile(r'(]*\bid=")([^"]+)(")', re.IGNORECASE) +RAW_HREF_RE = re.compile(r'(]*\bhref=")(/[^"#?)]*)(#[^"]*)?(")', re.IGNORECASE) +MD_HREF_RE = re.compile(r"(? {line}" if line else ">" for line in body.splitlines()) + return f"> **注意**\n>\n{quoted}" + + text = CALLOUT_SHORTCODE_RE.sub(replace_callout_shortcode, text) text = FIGURE_SHORTCODE_RE.sub(replace_figure_shortcode, text) # 把 Markdown 里的绝对路径图片 ![](/map/ch01.png) 转为 static/map/ch01.png text = ABS_IMAGE_RE.sub(r'![\1](static/\2)', text) + # 网站目录页里用二级标题承载跳转链接;EPUB 目录应指向真实章节页。 + text = LINK_HEADING_RE.sub(r"**\2**", text) + + text = HEADER_ID_RE.sub(lambda m: "{#" + _page_anchor(slug, m.group(1)) + "}", text) + text = RAW_ID_RE.sub(lambda m: f"{m.group(1)}{_page_anchor(slug, m.group(2))}{m.group(3)}", text) + text = _rewrite_links(text, slug, known_pages) + text = _rewrite_footnotes(text, slug) + + title = meta.get("title") + if title: + text = f"# {title} {{#{slug}}}\n\n{text.lstrip()}" + return text -def process_file(input_path, output_path): +def process_file(input_path, output_path, known_pages=None): """ 处理单个 Markdown 文件 @@ -68,7 +175,7 @@ def process_file(input_path, output_path): content = f.read() # 转换内容 - converted_content = convert_markdown(content) + converted_content = convert_markdown(content, _slug_for_path(input_path), known_pages) # 写入输出文件 os.makedirs(os.path.dirname(output_path), exist_ok=True) @@ -97,10 +204,11 @@ def main(): # 获取所有 .md 文件 md_files = sorted(input_dir.glob('*.md')) + known_pages = {_slug_for_path(path) for path in md_files} for md_file in md_files: output_file = os.path.join(output_dir, md_file.name) - process_file(str(md_file), output_file) + process_file(str(md_file), output_file, known_pages) print(f"\nTotal processed: {len(md_files)} files") else: