2
0
Fork 0
mirror of https://github.com/Vonng/ddia.git synced 2026-06-21 00:47:05 +08:00
ddia/bin/preprocess-epub.py
zexuan.peng 909e56f915 🐛 fix(epub): 恢复 EPUB 导出功能,修复图片显示问题 (Fixes #388)
Co-Authored-By: Zexuan Peng <pengzexuan2001@gmail.com>
2026-02-23 22:55:56 +08:00

114 lines
3.2 KiB
Python
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
预处理 Markdown 文件,将 Hugo shortcode 转换为 Pandoc 可识别的格式
处理两种 shortcode
1. {{< figure src="/fig/xxx.png" caption="xxx" >}} → ![xxx](static/fig/xxx.png)
2. {{< figure ... >}} (无 src) → 移除(通常用于代码示例)
"""
import re
import sys
import os
from pathlib import Path
def convert_figure_shortcode(text):
"""
转换 Hugo figure shortcode 为 Markdown 图片语法
Args:
text: Markdown 文本内容
Returns:
转换后的文本
"""
# 先处理有 caption 的 figure shortcode
# 例如: {{< figure src="/fig/ddia_0302.png" caption="图 3-2. xxx" >}}
pattern_with_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*\scaption="([^"]*)"[^>]*>\}\}'
def replace_with_caption(match):
src = match.group(1)
caption = match.group(2)
# 移除开头的斜杠,添加 static 前缀
if src.startswith('/'):
src = 'static' + src
# 返回 Markdown 图片语法
return f'![{caption}]({src})'
text = re.sub(pattern_with_caption, replace_with_caption, text)
# 再处理没有 caption 的 figure shortcode
pattern_without_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*>\}\}'
def replace_without_caption(match):
src = match.group(1)
if src.startswith('/'):
src = 'static' + src
return f'[]({src})'
text = re.sub(pattern_without_caption, replace_without_caption, text)
# 移除完全没有 src 属性的 figure shortcode例如用于代码块的
pattern_no_src = r'\{\{< figure[^>]*>\}\}'
text = re.sub(pattern_no_src, '', text)
return text
def process_file(input_path, output_path):
"""
处理单个 Markdown 文件
Args:
input_path: 输入文件路径
output_path: 输出文件路径
"""
with open(input_path, 'r', encoding='utf-8') as f:
content = f.read()
# 转换内容
converted_content = convert_figure_shortcode(content)
# 写入输出文件
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(converted_content)
print(f"Processed: {input_path} -> {output_path}")
def main():
"""主函数"""
if len(sys.argv) < 2:
print("Usage: preprocess.py <input_file> [output_file]")
print(" or: preprocess.py <input_dir> <output_dir>")
sys.exit(1)
input_path = sys.argv[1]
if os.path.isfile(input_path):
# 处理单个文件
output_path = sys.argv[2] if len(sys.argv) > 2 else input_path
process_file(input_path, output_path)
elif os.path.isdir(input_path):
# 处理目录
output_dir = sys.argv[2]
input_dir = Path(input_path)
# 获取所有 .md 文件
md_files = list(input_dir.glob('*.md'))
for md_file in md_files:
output_file = os.path.join(output_dir, md_file.name)
process_file(str(md_file), output_file)
print(f"\nTotal processed: {len(md_files)} files")
else:
print(f"Error: {input_path} is not a valid file or directory")
sys.exit(1)
if __name__ == '__main__':
main()