From 909e56f915ec37e190fd557f901527d654ddd0bd Mon Sep 17 00:00:00 2001 From: "zexuan.peng" Date: Mon, 23 Feb 2026 22:55:56 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=90=9B=20fix(epub):=20=E6=81=A2?= =?UTF-8?q?=E5=A4=8D=20EPUB=20=E5=AF=BC=E5=87=BA=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8D=E5=9B=BE=E7=89=87=E6=98=BE=E7=A4=BA?= =?UTF-8?q?=E9=97=AE=E9=A2=98=20(Fixes=20#388)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Zexuan Peng --- bin/epub | 52 ++++++---- bin/preprocess-epub.py | 114 +++++++++++++++++++++ js/epub.css | 221 +++++++++++++++++++++++++++++++++++++++++ metadata.yaml | 5 + 4 files changed, 372 insertions(+), 20 deletions(-) create mode 100755 bin/preprocess-epub.py create mode 100644 js/epub.css create mode 100644 metadata.yaml diff --git a/bin/epub b/bin/epub index 5412a4a..2ff80af 100755 --- a/bin/epub +++ b/bin/epub @@ -1,12 +1,20 @@ #!/usr/bin/env bash +set -e + # Set the directory containing Markdown files SCRIPT_DIR=$(dirname "$0") INPUT_DIR=$(cd "$(dirname "$SCRIPT_DIR")" && pwd) OUTPUT_DIR="$INPUT_DIR/output" +TEMP_DIR="$OUTPUT_DIR/temp" # Create output directory if it doesn't exist mkdir -p "$OUTPUT_DIR" +mkdir -p "$TEMP_DIR" + +# Preprocess Markdown files to convert Hugo shortcodes +echo "Preprocessing Markdown files..." +python3 "${SCRIPT_DIR}/preprocess-epub.py" "${INPUT_DIR}/content/zh" "$TEMP_DIR" convert_to_epub() { # convert all EPUB files into a single EPUB book @@ -24,28 +32,32 @@ convert_to_epub() { --css="$css_file" \ --webtex \ --wrap=preserve \ - "${INPUT_DIR}"/SUMMARY.md \ - "${INPUT_DIR}"/README.md \ - "${INPUT_DIR}"/preface.md \ - "${INPUT_DIR}"/part-i.md \ - "${INPUT_DIR}"/ch1.md \ - "${INPUT_DIR}"/ch2.md \ - "${INPUT_DIR}"/ch3.md \ - "${INPUT_DIR}"/ch4.md \ - "${INPUT_DIR}"/part-ii.md \ - "${INPUT_DIR}"/ch5.md \ - "${INPUT_DIR}"/ch6.md \ - "${INPUT_DIR}"/ch7.md \ - "${INPUT_DIR}"/ch8.md \ - "${INPUT_DIR}"/ch9.md \ - "${INPUT_DIR}"/part-iii.md \ - "${INPUT_DIR}"/ch10.md \ - "${INPUT_DIR}"/ch11.md \ - "${INPUT_DIR}"/ch12.md \ - "${INPUT_DIR}"/colophon.md \ - "${INPUT_DIR}"/glossary.md + "${TEMP_DIR}"/_index.md \ + "${TEMP_DIR}"/preface.md \ + "${TEMP_DIR}"/part-i.md \ + "${TEMP_DIR}"/ch1.md \ + "${TEMP_DIR}"/ch2.md \ + "${TEMP_DIR}"/ch3.md \ + "${TEMP_DIR}"/ch4.md \ + "${TEMP_DIR}"/part-ii.md \ + "${TEMP_DIR}"/ch5.md \ + "${TEMP_DIR}"/ch6.md \ + "${TEMP_DIR}"/ch7.md \ + "${TEMP_DIR}"/ch8.md \ + "${TEMP_DIR}"/ch9.md \ + "${TEMP_DIR}"/part-iii.md \ + "${TEMP_DIR}"/ch10.md \ + "${TEMP_DIR}"/ch11.md \ + "${TEMP_DIR}"/ch12.md \ + "${TEMP_DIR}"/ch13.md \ + "${TEMP_DIR}"/ch14.md \ + "${TEMP_DIR}"/colophon.md \ + "${TEMP_DIR}"/glossary.md echo "Converted EPUB book created at $OUTPUT_BOOK." } convert_to_epub + +# Clean up temporary files +rm -rf "$TEMP_DIR" diff --git a/bin/preprocess-epub.py b/bin/preprocess-epub.py new file mode 100755 index 0000000..afb538d --- /dev/null +++ b/bin/preprocess-epub.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +预处理 Markdown 文件,将 Hugo shortcode 转换为 Pandoc 可识别的格式 + +处理两种 shortcode: +1. {{< figure src="/fig/xxx.png" caption="xxx" >}} → ![xxx](static/fig/xxx.png) +2. {{< figure ... >}} (无 src) → 移除(通常用于代码示例) +""" + +import re +import sys +import os +from pathlib import Path + +def convert_figure_shortcode(text): + """ + 转换 Hugo figure shortcode 为 Markdown 图片语法 + + Args: + text: Markdown 文本内容 + + Returns: + 转换后的文本 + """ + + # 先处理有 caption 的 figure shortcode + # 例如: {{< figure src="/fig/ddia_0302.png" caption="图 3-2. xxx" >}} + pattern_with_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*\scaption="([^"]*)"[^>]*>\}\}' + + def replace_with_caption(match): + src = match.group(1) + caption = match.group(2) + + # 移除开头的斜杠,添加 static 前缀 + if src.startswith('/'): + src = 'static' + src + + # 返回 Markdown 图片语法 + return f'![{caption}]({src})' + + text = re.sub(pattern_with_caption, replace_with_caption, text) + + # 再处理没有 caption 的 figure shortcode + pattern_without_caption = r'\{\{< figure\s+src="([^"]+)"[^>]*>\}\}' + + def replace_without_caption(match): + src = match.group(1) + + if src.startswith('/'): + src = 'static' + src + + return f'[]({src})' + + text = re.sub(pattern_without_caption, replace_without_caption, text) + + # 移除完全没有 src 属性的 figure shortcode(例如用于代码块的) + pattern_no_src = r'\{\{< figure[^>]*>\}\}' + text = re.sub(pattern_no_src, '', text) + + return text + +def process_file(input_path, output_path): + """ + 处理单个 Markdown 文件 + + Args: + input_path: 输入文件路径 + output_path: 输出文件路径 + """ + with open(input_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 转换内容 + converted_content = convert_figure_shortcode(content) + + # 写入输出文件 + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, 'w', encoding='utf-8') as f: + f.write(converted_content) + + print(f"Processed: {input_path} -> {output_path}") + +def main(): + """主函数""" + if len(sys.argv) < 2: + print("Usage: preprocess.py [output_file]") + print(" or: preprocess.py ") + sys.exit(1) + + input_path = sys.argv[1] + + if os.path.isfile(input_path): + # 处理单个文件 + output_path = sys.argv[2] if len(sys.argv) > 2 else input_path + process_file(input_path, output_path) + elif os.path.isdir(input_path): + # 处理目录 + output_dir = sys.argv[2] + input_dir = Path(input_path) + + # 获取所有 .md 文件 + md_files = list(input_dir.glob('*.md')) + + for md_file in md_files: + output_file = os.path.join(output_dir, md_file.name) + process_file(str(md_file), output_file) + + print(f"\nTotal processed: {len(md_files)} files") + else: + print(f"Error: {input_path} is not a valid file or directory") + sys.exit(1) + +if __name__ == '__main__': + main() diff --git a/js/epub.css b/js/epub.css new file mode 100644 index 0000000..01efd4c --- /dev/null +++ b/js/epub.css @@ -0,0 +1,221 @@ +/* This defines styles and classes used in the book */ +@page { + margin: 10px; +} +html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, +blockquote, pre, a, abbr, acronym, address, big, cite, code, del, dfn, em, img, +ins, kbd, q, s, samp, small, strike, strong, sub, sup, tt, var, b, u, i, center, +fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, +article, aside, canvas, details, embed, figure, figcaption, footer, header, +hgroup, menu, nav, output, ruby, section, summary, time, mark, audio, video, ol, +ul, li, dl, dt, dd { + margin: 0; + padding: 0; + border: 0; + font-size: 100%; + vertical-align: baseline; +} +html { + line-height: 1.2; + font-family: Georgia, serif; + color: #1a1a1a; +} +p { + text-indent: 0; + margin: 1em 0; + widows: 2; + orphans: 2; +} +a, a:visited { + color: #1a1a1a; +} +img { + max-width: 100%; +} +sup { + vertical-align: super; + font-size: smaller; +} +sub { + vertical-align: sub; + font-size: smaller; +} +h1 { + margin: 3em 0 0 0; + font-size: 2em; + page-break-before: always; + line-height: 150%; +} +h2 { + margin: 1.5em 0 0 0; + font-size: 1.5em; + line-height: 135%; +} +h3 { + margin: 1.3em 0 0 0; + font-size: 1.3em; +} +h4 { + margin: 1.2em 0 0 0; + font-size: 1.2em; +} +h5 { + margin: 1.1em 0 0 0; + font-size: 1.1em; +} +h6 { + font-size: 1em; +} +h1, h2, h3, h4, h5, h6 { + text-indent: 0; + text-align: left; + font-weight: bold; + page-break-after: avoid; + page-break-inside: avoid; +} + +ol, ul { + margin: 1em 0 0 1.7em; +} +li > ol, li > ul { + margin-top: 0; +} +blockquote { + margin: 1em 0 1em 1.7em; +} +code { + font-family: Menlo, Monaco, 'Lucida Console', Consolas, monospace; + font-size: 85%; + margin: 0; + hyphens: manual; +} +/*pre {*/ +/* margin: 1em 0;*/ +/* overflow: auto;*/ +/*}*/ +pre code { + white-space: pre-wrap; + word-wrap: break-word; + background-color: #f5f5f5; + padding: 1em; +} +.sourceCode { + background-color: transparent; + overflow: visible; +} +hr { + background-color: #1a1a1a; + border: none; + height: 1px; + margin: 1em 0; +} +table { + margin: 1em 0; + border-collapse: collapse; + width: 100%; + overflow-x: auto; + display: block; +} +table caption { + margin-bottom: 0.75em; +} +tbody { + margin-top: 0.5em; + border-top: 1px solid #1a1a1a; + border-bottom: 1px solid #1a1a1a; +} +th, td { + padding: 0.25em 0.5em 0.25em 0.5em; +} +th { + border-top: 1px solid #1a1a1a; +} +header { + margin-bottom: 4em; + text-align: center; +} +#TOC li { + list-style: none; +} +#TOC ul { + padding-left: 1.3em; +} +#TOC > ul { + padding-left: 0; +} +#TOC a:not(:hover) { + text-decoration: none; +} +code { + white-space: pre-wrap; +} +span.smallcaps { + font-variant: small-caps; +} + +/* This is the most compatible CSS, but it only allows two columns: */ +div.column { + display: inline-block; + vertical-align: top; + width: 50%; +} +/* If you can rely on CSS3 support, use this instead: */ +/* div.columns { + display: flex; + gap: min(4vw, 1.5em); +} +div.column { + flex: auto; + overflow-x: auto; +} */ + +div.hanging-indent { + margin-left: 1.5em; + text-indent: -1.5em; +} +ul.task-list { + list-style: none; +} +ul.task-list li input[type="checkbox"] { + width: 0.8em; + margin: 0 0.8em 0.2em -1.6em; + vertical-align: middle; +} +.display.math { + display: block; + text-align: center; + margin: 0.5rem auto; +} + +/* For title, author, and date on the cover page */ +h1.title { } +p.author { } +p.date { } + +nav#toc ol, nav#landmarks ol { + padding: 0; + margin-left: 1em; +} +nav#toc ol li, nav#landmarks ol li { + list-style-type: none; + margin: 0; + padding: 0; +} +a.footnote-ref { + vertical-align: super; +} +em, em em em, em em em em em { + font-style: italic; +} +em em, em em em em { + font-style: normal; +} +q { + quotes: """''"'"; +} +@media screen { /* Workaround for iBooks issue; see #6242 */ + .sourceCode { + overflow: visible !important; + white-space: pre-wrap !important; + } +} diff --git a/metadata.yaml b/metadata.yaml new file mode 100644 index 0000000..e1a75c7 --- /dev/null +++ b/metadata.yaml @@ -0,0 +1,5 @@ +--- +title: 设计数据密集型应用 +author: Martin Kleppmann +rights: Creative Commons Non-Commercial Share Alike 3.0 +language: 中文 From 63bf9f5d397529b24a2f0297e800ee243559cb1f Mon Sep 17 00:00:00 2001 From: "zexuan.peng" Date: Tue, 24 Feb 2026 00:24:55 +0800 Subject: [PATCH 2/3] =?UTF-8?q?feat(epub):=20=E6=B7=BB=E5=8A=A0=E5=B0=81?= =?UTF-8?q?=E9=9D=A2=E5=9B=BE=E7=89=87=E5=88=B0=20EPUB=20=E5=85=83?= =?UTF-8?q?=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metadata.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/metadata.yaml b/metadata.yaml index e1a75c7..19bc80f 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -3,3 +3,4 @@ title: 设计数据密集型应用 author: Martin Kleppmann rights: Creative Commons Non-Commercial Share Alike 3.0 language: 中文 +cover-image: ./static/title.jpg From b9f59a646a5333f9bd89ccd62cfe44319dc76c85 Mon Sep 17 00:00:00 2001 From: Zexuan Peng <87429578+demo-zexuan@users.noreply.github.com> Date: Tue, 24 Feb 2026 00:34:00 +0800 Subject: [PATCH 3/3] Update metadata.yaml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata.yaml b/metadata.yaml index 19bc80f..0cea259 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -2,5 +2,5 @@ title: 设计数据密集型应用 author: Martin Kleppmann rights: Creative Commons Non-Commercial Share Alike 3.0 -language: 中文 +language: zh cover-image: ./static/title.jpg