mirror of
https://github.com/Vonng/ddia.git
synced 2026-06-23 01:47:00 +08:00
fix(epub): handle figure attribute variants and absolute image paths
This commit is contained in:
parent
26878ff081
commit
573bb53a05
1 changed files with 29 additions and 32 deletions
|
|
@ -7,14 +7,24 @@
|
||||||
2. {{< figure ... >}} (无 src) → 移除(通常用于代码示例)
|
2. {{< figure ... >}} (无 src) → 移除(通常用于代码示例)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
def convert_figure_shortcode(text):
|
FIGURE_SHORTCODE_RE = re.compile(r"\{\{<\s*figure\b(.*?)>\}\}", re.DOTALL)
|
||||||
|
ATTR_RE = re.compile(r'([\w-]+)="([^"]*)"')
|
||||||
|
ABS_IMAGE_RE = re.compile(r'!\[([^\]]*)\]\(/(?!static/)([^)]+)\)')
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_alt_text(text):
|
||||||
|
"""Escape `]` in alt text to avoid breaking Markdown image syntax."""
|
||||||
|
return text.replace("]", r"\]")
|
||||||
|
|
||||||
|
|
||||||
|
def convert_markdown(text):
|
||||||
"""
|
"""
|
||||||
转换 Hugo figure shortcode 为 Markdown 图片语法
|
转换 Hugo figure shortcode 和绝对路径图片引用。
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Markdown 文本内容
|
text: Markdown 文本内容
|
||||||
|
|
@ -22,40 +32,27 @@ def convert_figure_shortcode(text):
|
||||||
Returns:
|
Returns:
|
||||||
转换后的文本
|
转换后的文本
|
||||||
"""
|
"""
|
||||||
|
def replace_figure_shortcode(match):
|
||||||
|
attrs_text = match.group(1)
|
||||||
|
attrs = dict(ATTR_RE.findall(attrs_text))
|
||||||
|
src = attrs.get("src")
|
||||||
|
|
||||||
# 先处理有 caption 的 figure shortcode
|
# 没有 src 的 figure 一般是代码示例占位,直接移除
|
||||||
# 例如: {{< figure src="/fig/ddia_0302.png" caption="图 3-2. xxx" >}}
|
if not src:
|
||||||
pattern_with_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*\scaption="([^"]*)"[^>]*>\}\}'
|
return ""
|
||||||
|
|
||||||
def replace_with_caption(match):
|
# 绝对路径资源转为相对 static 路径,便于 Pandoc 打包
|
||||||
src = match.group(1)
|
|
||||||
caption = match.group(2)
|
|
||||||
|
|
||||||
# 移除开头的斜杠,添加 static 前缀
|
|
||||||
if src.startswith('/'):
|
if src.startswith('/'):
|
||||||
src = 'static' + src
|
src = 'static' + src
|
||||||
|
|
||||||
# 返回 Markdown 图片语法
|
# 优先 caption,fallback 到 title,至少保证图片可渲染
|
||||||
return f''
|
alt = _escape_alt_text(attrs.get("caption") or attrs.get("title") or "")
|
||||||
|
return f''
|
||||||
|
|
||||||
text = re.sub(pattern_with_caption, replace_with_caption, text)
|
text = FIGURE_SHORTCODE_RE.sub(replace_figure_shortcode, text)
|
||||||
|
|
||||||
# 再处理没有 caption 的 figure shortcode
|
# 把 Markdown 里的绝对路径图片  转为 static/map/ch01.png
|
||||||
pattern_without_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*>\}\}'
|
text = ABS_IMAGE_RE.sub(r'', text)
|
||||||
|
|
||||||
def replace_without_caption(match):
|
|
||||||
src = match.group(1)
|
|
||||||
|
|
||||||
if src.startswith('/'):
|
|
||||||
src = 'static' + src
|
|
||||||
|
|
||||||
return f'[]({src})'
|
|
||||||
|
|
||||||
text = re.sub(pattern_without_caption, replace_without_caption, text)
|
|
||||||
|
|
||||||
# 移除完全没有 src 属性的 figure shortcode(例如用于代码块的)
|
|
||||||
pattern_no_src = r'\{\{< figure[^>]*>\}\}'
|
|
||||||
text = re.sub(pattern_no_src, '', text)
|
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
@ -71,7 +68,7 @@ def process_file(input_path, output_path):
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
# 转换内容
|
# 转换内容
|
||||||
converted_content = convert_figure_shortcode(content)
|
converted_content = convert_markdown(content)
|
||||||
|
|
||||||
# 写入输出文件
|
# 写入输出文件
|
||||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
|
@ -99,7 +96,7 @@ def main():
|
||||||
input_dir = Path(input_path)
|
input_dir = Path(input_path)
|
||||||
|
|
||||||
# 获取所有 .md 文件
|
# 获取所有 .md 文件
|
||||||
md_files = list(input_dir.glob('*.md'))
|
md_files = sorted(input_dir.glob('*.md'))
|
||||||
|
|
||||||
for md_file in md_files:
|
for md_file in md_files:
|
||||||
output_file = os.path.join(output_dir, md_file.name)
|
output_file = os.path.join(output_dir, md_file.name)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue