From 573bb53a0557e3231560cd0a7a7067c4675dbe39 Mon Sep 17 00:00:00 2001
From: Feng Ruohang <rh@vonng.com>
Date: Tue, 24 Feb 2026 12:20:37 +0800
Subject: [PATCH] fix(epub): handle figure attribute variants and absolute
 image paths

---
 bin/preprocess-epub.py | 61 ++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 32 deletions(-)

diff --git a/bin/preprocess-epub.py b/bin/preprocess-epub.py
index afb538d..758f41a 100755
--- a/bin/preprocess-epub.py
+++ b/bin/preprocess-epub.py
@@ -7,14 +7,24 @@
 2. {{< figure ... >}} (无 src) → 移除（通常用于代码示例）
 """
 
+import os
 import re
 import sys
-import os
 from pathlib import Path
 
-def convert_figure_shortcode(text):
+FIGURE_SHORTCODE_RE = re.compile(r"\{\{<\s*figure\b(.*?)>\}\}", re.DOTALL)
+ATTR_RE = re.compile(r'([\w-]+)="([^"]*)"')
+ABS_IMAGE_RE = re.compile(r'!\[([^\]]*)\]\(/(?!static/)([^)]+)\)')
+
+
+def _escape_alt_text(text):
+    """Escape `]` in alt text to avoid breaking Markdown image syntax."""
+    return text.replace("]", r"\]")
+
+
+def convert_markdown(text):
     """
-    转换 Hugo figure shortcode 为 Markdown 图片语法
+    转换 Hugo figure shortcode 和绝对路径图片引用。
 
     Args:
         text: Markdown 文本内容
@@ -22,40 +32,27 @@ def convert_figure_shortcode(text):
     Returns:
         转换后的文本
     """
+    def replace_figure_shortcode(match):
+        attrs_text = match.group(1)
+        attrs = dict(ATTR_RE.findall(attrs_text))
+        src = attrs.get("src")
 
-    # 先处理有 caption 的 figure shortcode
-    # 例如: {{< figure src="/fig/ddia_0302.png" caption="图 3-2. xxx" >}}
-    pattern_with_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*\scaption="([^"]*)"[^>]*>\}\}'
+        # 没有 src 的 figure 一般是代码示例占位，直接移除
+        if not src:
+            return ""
 
-    def replace_with_caption(match):
-        src = match.group(1)
-        caption = match.group(2)
-
-        # 移除开头的斜杠，添加 static 前缀
+        # 绝对路径资源转为相对 static 路径，便于 Pandoc 打包
         if src.startswith('/'):
             src = 'static' + src
 
-        # 返回 Markdown 图片语法
-        return f'![{caption}]({src})'
+        # 优先 caption，fallback 到 title，至少保证图片可渲染
+        alt = _escape_alt_text(attrs.get("caption") or attrs.get("title") or "")
+        return f'![{alt}]({src})'
 
-    text = re.sub(pattern_with_caption, replace_with_caption, text)
+    text = FIGURE_SHORTCODE_RE.sub(replace_figure_shortcode, text)
 
-    # 再处理没有 caption 的 figure shortcode
-    pattern_without_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*>\}\}'
-
-    def replace_without_caption(match):
-        src = match.group(1)
-
-        if src.startswith('/'):
-            src = 'static' + src
-
-        return f'[]({src})'
-
-    text = re.sub(pattern_without_caption, replace_without_caption, text)
-
-    # 移除完全没有 src 属性的 figure shortcode（例如用于代码块的）
-    pattern_no_src = r'\{\{< figure[^>]*>\}\}'
-    text = re.sub(pattern_no_src, '', text)
+    # 把 Markdown 里的绝对路径图片 ![](/map/ch01.png) 转为 static/map/ch01.png
+    text = ABS_IMAGE_RE.sub(r'![\1](static/\2)', text)
 
     return text
 
@@ -71,7 +68,7 @@ def process_file(input_path, output_path):
         content = f.read()
 
     # 转换内容
-    converted_content = convert_figure_shortcode(content)
+    converted_content = convert_markdown(content)
 
     # 写入输出文件
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
@@ -99,7 +96,7 @@ def main():
         input_dir = Path(input_path)
 
         # 获取所有 .md 文件
-        md_files = list(input_dir.glob('*.md'))
+        md_files = sorted(input_dir.glob('*.md'))
 
         for md_file in md_files:
             output_file = os.path.join(output_dir, md_file.name)