mirror of
https://github.com/Vonng/ddia.git
synced 2026-06-21 00:47:05 +08:00
114 lines
3.2 KiB
Python
Executable file
114 lines
3.2 KiB
Python
Executable file
#!/usr/bin/env python3
|
||
"""
|
||
预处理 Markdown 文件,将 Hugo shortcode 转换为 Pandoc 可识别的格式
|
||
|
||
处理两种 shortcode:
|
||
1. {{< figure src="/fig/xxx.png" caption="xxx" >}} → 
|
||
2. {{< figure ... >}} (无 src) → 移除(通常用于代码示例)
|
||
"""
|
||
|
||
import re
|
||
import sys
|
||
import os
|
||
from pathlib import Path
|
||
|
||
def convert_figure_shortcode(text):
|
||
"""
|
||
转换 Hugo figure shortcode 为 Markdown 图片语法
|
||
|
||
Args:
|
||
text: Markdown 文本内容
|
||
|
||
Returns:
|
||
转换后的文本
|
||
"""
|
||
|
||
# 先处理有 caption 的 figure shortcode
|
||
# 例如: {{< figure src="/fig/ddia_0302.png" caption="图 3-2. xxx" >}}
|
||
pattern_with_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*\scaption="([^"]*)"[^>]*>\}\}'
|
||
|
||
def replace_with_caption(match):
|
||
src = match.group(1)
|
||
caption = match.group(2)
|
||
|
||
# 移除开头的斜杠,添加 static 前缀
|
||
if src.startswith('/'):
|
||
src = 'static' + src
|
||
|
||
# 返回 Markdown 图片语法
|
||
return f''
|
||
|
||
text = re.sub(pattern_with_caption, replace_with_caption, text)
|
||
|
||
# 再处理没有 caption 的 figure shortcode
|
||
pattern_without_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*>\}\}'
|
||
|
||
def replace_without_caption(match):
|
||
src = match.group(1)
|
||
|
||
if src.startswith('/'):
|
||
src = 'static' + src
|
||
|
||
return f'[]({src})'
|
||
|
||
text = re.sub(pattern_without_caption, replace_without_caption, text)
|
||
|
||
# 移除完全没有 src 属性的 figure shortcode(例如用于代码块的)
|
||
pattern_no_src = r'\{\{< figure[^>]*>\}\}'
|
||
text = re.sub(pattern_no_src, '', text)
|
||
|
||
return text
|
||
|
||
def process_file(input_path, output_path):
|
||
"""
|
||
处理单个 Markdown 文件
|
||
|
||
Args:
|
||
input_path: 输入文件路径
|
||
output_path: 输出文件路径
|
||
"""
|
||
with open(input_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 转换内容
|
||
converted_content = convert_figure_shortcode(content)
|
||
|
||
# 写入输出文件
|
||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||
with open(output_path, 'w', encoding='utf-8') as f:
|
||
f.write(converted_content)
|
||
|
||
print(f"Processed: {input_path} -> {output_path}")
|
||
|
||
def main():
|
||
"""主函数"""
|
||
if len(sys.argv) < 2:
|
||
print("Usage: preprocess.py <input_file> [output_file]")
|
||
print(" or: preprocess.py <input_dir> <output_dir>")
|
||
sys.exit(1)
|
||
|
||
input_path = sys.argv[1]
|
||
|
||
if os.path.isfile(input_path):
|
||
# 处理单个文件
|
||
output_path = sys.argv[2] if len(sys.argv) > 2 else input_path
|
||
process_file(input_path, output_path)
|
||
elif os.path.isdir(input_path):
|
||
# 处理目录
|
||
output_dir = sys.argv[2]
|
||
input_dir = Path(input_path)
|
||
|
||
# 获取所有 .md 文件
|
||
md_files = list(input_dir.glob('*.md'))
|
||
|
||
for md_file in md_files:
|
||
output_file = os.path.join(output_dir, md_file.name)
|
||
process_file(str(md_file), output_file)
|
||
|
||
print(f"\nTotal processed: {len(md_files)} files")
|
||
else:
|
||
print(f"Error: {input_path} is not a valid file or directory")
|
||
sys.exit(1)
|
||
|
||
if __name__ == '__main__':
|
||
main()
|