在 Word 中插入高亮代码

申请软件著作权需要提供申请表、操作说明书与源代码(源程序和文档应提交前、后各连续 30 页,不足 60 页的,应当全部提交,源程序每页不少于 50 行),当我使用 python-docx 将全部代码插入 Word 中后突发奇想:如果能自动将代码高亮展示该多好。于是,在确定了语法高亮库 pygments 后,我对该功能进行了开发。代码如下,核心代码见高亮,最后的示例见链接(转化为 PDF 格式后)。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
'''
pygments = "^2.12.0"
python-docx = "^0.8.11"
'''
import pathlib as p
import typing as t

from docx import Document
from docx.shared import Pt, RGBColor
from pygments.lexers import guess_lexer_for_filename
from pygments.styles import get_style_by_name


Path = t.Union[str, p.Path]


class Word:
    Self = __qualname__

    def __init__(
        self,
        root: Path = '.', style: str = 'default', page_break: bool = True,
        font_name: str = 'Times New Roman', font_size: int = 9,
    ) -> None:
        self._root = p.Path(root).absolute()
        self._default = {
            'style': style,
            'page_break': page_break,
        }
        self._doc = Document()
        font = self._doc.styles['Normal'].font
        font.name = font_name
        font.size = Pt(font_size)

    @classmethod
    def new(cls, *args, **kwargs) -> Self:
        return cls(*args, **kwargs)

    @property
    def root(self) -> p.Path:
        return self._root

    def add(
        self,
        path: Path, plain: bool = False,
        style: t.Optional[str] = None, page_break: t.Optional[bool] = None,
        title: t.Optional[str] = None,
    ) -> Self:
        if plain:
            return self._add_plain(path, page_break, title)
        path = p.Path(path).absolute()
        code = path.read_text()
        lexer = guess_lexer_for_filename(path.name, code)
        styles = dict(get_style_by_name(style or self._default['style']))
        # heading
        self._doc.add_heading(title or path.relative_to(self._root).as_posix(), 0)
        # paragraph
        paragraph = self._doc.add_paragraph()
        for type, value in lexer.get_tokens(code):
            style = styles.get(type, {})
            run = paragraph.add_run(value)
            # bold, italic, underline
            run.bold = style.get('bold', False)
            run.italic = style.get('italic', False)
            run.underline = style.get('underline', False)
            # color
            color = style.get('color', None)
            if color is not None:
                run.font.color.rgb = RGBColor.from_string(color)
        # page break
        if page_break or self._default['page_break']:
            self._doc.add_page_break()
        return self

    def save(self, path: Path) -> Self:
        self._doc.save(path)
        return self

    def _add_plain(
        self,
        path: Path,
        page_break: t.Optional[bool] = None, title: t.Optional[bool] = None,
    ) -> Self:
        path = p.Path(path).absolute()
        self._doc.add_heading(title or path.relative_to(self._root).as_posix(), 0)
        self._doc.add_paragraph(path.read_text())
        if page_break or self._default['page_break']:
            self._doc.add_page_break()
        return self


if __name__ == '__main__':
    from pygments.styles import get_all_styles

    word = Word.new(root='.', page_break=False, font_size=7)
    for style in get_all_styles():
        word.add(__file__, style=style, title=style)
    word.save('demo.docx')