Skip to content

Commit f0f2f9b

Browse files
committed
Support Taiwanese phrases
1 parent 2f29bb6 commit f0f2f9b

6 files changed

Lines changed: 125 additions & 40 deletions

File tree

.github/workflows/build.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Build
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
7+
jobs:
8+
build:
9+
runs-on: macos-latest
10+
steps:
11+
- uses: actions/checkout@v2
12+
- name: Install otfcc
13+
run: |
14+
brew tap caryll/tap
15+
brew install otfcc-mac64
16+
- name: Set up Python 3.8
17+
uses: actions/setup-python@v2
18+
with:
19+
python-version: 3.8
20+
- name: Install dependencies
21+
run: |
22+
python -m pip install --upgrade pip
23+
python -m pip install -r requirements.txt
24+
- name: Build
25+
run: |
26+
python build/main.py
27+
- name: Upload artifact
28+
uses: actions/upload-artifact@v2
29+
with:
30+
name: Font files
31+
path: output/*.ttf

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Fan Wun Ming 繁媛明朝
1+
# Fan Wun Ming 繁媛明朝 [![](https://github.com/ayaka14732/FanWunMing/workflows/Build/badge.svg)](https://github.com/ayaka14732/FanWunMing/actions?query=workflow%3ABuild)
22

33
![](demo.png)
44

@@ -17,8 +17,8 @@ See [release page](https://github.com/ayaka14732/FanWunMing/releases).<br/>
1717

1818
## Build 構建
1919

20-
Install Python and [otfcc](https://github.com/caryll/otfcc). Then run `python build/main.py`.<br/>
21-
安裝 Python 與 [otfcc](https://github.com/caryll/otfcc),然後執行 `python build/main.py`
20+
See [build script](.github/workflows/build.yml).<br/>
21+
參見[建置腳本](.github/workflows/build.yml)
2222

2323
## License 授權條款
2424

build/main.py

Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@
33
from glob import glob
44
from itertools import chain
55
import json
6+
from opencc import OpenCC
67
import os
78
import subprocess
89

9-
FONT_VERSION = 1.001
10+
FONT_VERSION = 1.002
1011

1112
# Define the max entries size in a subtable.
1213
# We define a number that is small enough here, so that the entries will not exceed
1314
# the size limit.
14-
SUBTABLE_MAX_COUNT = 5000
15+
SUBTABLE_MAX_COUNT = 4000
1516

1617
# This function is used to split a GSUB table into several subtables.
1718
def grouper(lst, n, start=0):
@@ -27,11 +28,16 @@ def grouper(lst, n, start=0):
2728
def prepare_files():
2829
'''Download necessary files for the next steps.'''
2930
os.system('mkdir -p output')
30-
os.system('wget -nc -P cache https://github.com/ButTaiwan/genyo-font/releases/download/v1.501/GenYoMin.zip')
31-
os.system('wget -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STCharacters.txt')
32-
os.system('wget -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STPhrases.txt')
33-
os.system('wget -nc -P cache https://gist.githubusercontent.com/fatum12/941a10f31ac1ad48ccbc/raw/59d7e29b307ae3439317a975ef390cd729f9bc17/ttc2ttf.pe')
34-
os.system('wget -nc -P cache https://raw.githubusercontent.com/rime-aca/character_set/e7d009a8a185a83f62ad2c903565b8bb85719221/通用規範漢字表.txt')
31+
os.system('wget -q -nc -P cache https://github.com/ButTaiwan/genyo-font/releases/download/v1.501/GenYoMin.zip')
32+
os.system('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STCharacters.txt')
33+
os.system('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STPhrases.txt')
34+
os.system('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWPhrasesIT.txt')
35+
os.system('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWPhrasesName.txt')
36+
os.system('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWPhrasesOther.txt')
37+
os.system('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWVariants.txt')
38+
os.system('cat cache/TWPhrasesIT.txt cache/TWPhrasesName.txt cache/TWPhrasesOther.txt > cache/TWPhrases.txt')
39+
os.system('wget -q -nc -P cache https://gist.githubusercontent.com/fatum12/941a10f31ac1ad48ccbc/raw/59d7e29b307ae3439317a975ef390cd729f9bc17/ttc2ttf.pe')
40+
os.system('wget -q -nc -P cache https://raw.githubusercontent.com/rime-aca/character_set/e7d009a8a185a83f62ad2c903565b8bb85719221/通用規範漢字表.txt')
3541
os.system('unzip -n -d cache cache/GenYoMin.zip')
3642

3743
# An opentype font can hold at most 65535 glyphs.
@@ -100,17 +106,19 @@ def build_codepoints_non_han():
100106
# We restrict the Simplified Chinese characters (on the left side of the OpenCC dictionary
101107
# file) to the range of Tongyong Guifan Hanzi Biao, and discard those conversions that are
102108
# out of range. The remained conversions are stored in the entries variable.
109+
#
103110
# Then we calculate the range of “Which Traditional Chinese characters are needed if we
104111
# convert Tongyong Guifan Hanzi Biao to Traditional Chinese”. The range is stored in the
105112
# codepoints variable.
106-
def build_opencc_char_table(codepoints_tonggui, codepoints_font):
113+
def build_opencc_char_table(codepoints_tonggui, codepoints_font, twp=False):
107114
entries = []
108115
codepoints = set()
109116

110-
with open('cache/STCharacters.txt') as f:
117+
with open('cache/STCharacters.txt') as f: # s2t
111118
for line in f:
112119
k, vx = line.rstrip('\n').split('\t')
113120
v = vx.split(' ')[0] # Only select the first candidate
121+
v = t2twp(v) if twp else v # s2t -> s2twp
114122
codepoint_k = ord(k)
115123
codepoint_v = ord(v)
116124
if codepoint_k in codepoints_tonggui and codepoint_v in codepoints_font:
@@ -119,22 +127,37 @@ def build_opencc_char_table(codepoints_tonggui, codepoints_font):
119127

120128
return entries, codepoints
121129

122-
def build_opencc_word_table(codepoints_tonggui, codepoints_font):
123-
entries = []
130+
def build_opencc_word_table(codepoints_tonggui, codepoints_font, twp=False):
131+
entries = {}
124132
codepoints = set()
125133

126-
with open('cache/STPhrases.txt') as f:
134+
with open('cache/STPhrases.txt') as f: # s2t
127135
for line in f:
128136
k, vx = line.rstrip('\n').split('\t')
129137
v = vx.split(' ')[0] # Only select the first candidate
130-
codepoints_k = [ord(c) for c in k]
131-
codepoints_v = [ord(c) for c in v]
138+
v = t2twp(v) if twp else v # s2t -> s2twp
139+
codepoints_k = tuple(ord(c) for c in k)
140+
codepoints_v = tuple(ord(c) for c in v)
132141
if all(codepoint in codepoints_tonggui for codepoint in codepoints_k) \
133142
and all(codepoint in codepoints_font for codepoint in codepoints_v):
134-
entries.append((codepoints_k, codepoints_v))
143+
entries[codepoints_k] = codepoints_v
135144
codepoints.update(codepoints_v)
136145

137-
return entries, codepoints
146+
if twp:
147+
with open('cache/TWPhrases.txt') as f: # t2twp
148+
for line in f:
149+
k, vx = line.rstrip('\n').split('\t')
150+
v = vx.split(' ')[0] # Only select the first candidate
151+
k = t2s(k) # t2twp -> s2twp
152+
codepoints_k = tuple(ord(c) for c in k)
153+
codepoints_v = tuple(ord(c) for c in v)
154+
if all(codepoint in codepoints_tonggui for codepoint in codepoints_k) \
155+
and all(codepoint in codepoints_font for codepoint in codepoints_v):
156+
entries[codepoints_k] = codepoints_v
157+
codepoints.update(codepoints_v)
158+
159+
# Sort from longest to shortest to force longest match
160+
return sorted(((k, v) for k, v in entries.items()), key=lambda k_v: (-len(k_v[0]), k_v[0])), codepoints
138161

139162
def disassociate_codepoint_and_glyph_name(obj, codepoint, glyph_name):
140163
'''
@@ -293,49 +316,55 @@ def create_pseu2word_table(obj, feature_name, conversions):
293316
}
294317
obj['GSUB']['lookupOrder'].append('pseu2word')
295318

296-
def build_fanwunming_name_header(style, version, date):
319+
def build_fanwunming_name_header(style, version, date, twp=False):
297320
with open('build/name.json') as f:
298321
name_header = json.load(f)
299322

300323
for item in name_header:
301324
item['nameString'] = item['nameString'] \
302-
.replace('<Style>', style) \
303-
.replace('<Version>', version) \
304-
.replace('<Date>', date)
325+
.replace('<Style>', style) \
326+
.replace('<Version>', version) \
327+
.replace('<Date>', date)
328+
329+
if twp:
330+
item['nameString'] = item['nameString'] \
331+
.replace('繁媛明朝', '繁媛明朝 TW') \
332+
.replace('Fan Wun Ming', 'Fan Wun Ming TW') \
333+
.replace('FanWunMing', 'FanWunMing-TW')
305334

306335
return name_header
307336

308-
def modify_metadata(obj):
337+
def modify_metadata(obj, twp=False):
309338
style = next(item['nameString'] for item in obj['name'] if item['nameID'] == 17)
310339
today = date.today().strftime('%b %d, %Y')
311340

312-
name_header = build_fanwunming_name_header(style, str(FONT_VERSION), today)
341+
name_header = build_fanwunming_name_header(style, str(FONT_VERSION), today, twp=twp)
313342

314343
obj['head']['fontRevision'] = FONT_VERSION
315344
obj['name'] = name_header
316345

317-
def build_dest_path_from_src_path(path):
346+
def build_dest_path_from_src_path(path, twp=False):
318347
'''
319348
>>> build_dest_path_from_src_path('cache/GenYoMin-R.ttc')
320349
'output/FanWunMing-R.ttf'
321350
'''
322351
return path \
323-
.replace('cache/', 'output/') \
324-
.replace('GenYoMin', 'FanWunMing') \
325-
.replace('ttc', 'ttf')
352+
.replace('cache/', 'output/') \
353+
.replace('GenYoMin', 'FanWunMing' + ('-TW' if twp else '')) \
354+
.replace('ttc', 'ttf')
326355

327-
def go(path):
356+
def go(path, twp=False):
328357
font = load_font(path, ttc_index=0)
329358

330359
codepoints_font = build_codepoints_font(font)
331360
codepoints_tonggui = build_codepoints_tonggui() & codepoints_font
332361

333362
codepoints_final = codepoints_tonggui | build_codepoints_non_han() & codepoints_font
334363

335-
entries_char, codepoints_char = build_opencc_char_table(codepoints_tonggui, codepoints_font)
364+
entries_char, codepoints_char = build_opencc_char_table(codepoints_tonggui, codepoints_font, twp=twp)
336365
codepoints_final |= codepoints_char
337366

338-
entries_word, codepoints_word = build_opencc_word_table(codepoints_tonggui, codepoints_font)
367+
entries_word, codepoints_word = build_opencc_word_table(codepoints_tonggui, codepoints_font, twp=twp)
339368
codepoints_final |= codepoints_word
340369

341370
remove_codepoints(font, codepoints_font - codepoints_final)
@@ -367,13 +396,15 @@ def go(path):
367396
create_char2char_table(font, feature_name, char2char_table)
368397
create_pseu2word_table(font, feature_name, pseu2word_table)
369398

370-
modify_metadata(font)
371-
save_font(font, build_dest_path_from_src_path(path))
399+
modify_metadata(font, twp=twp)
400+
save_font(font, build_dest_path_from_src_path(path, twp=twp))
401+
402+
prepare_files()
372403

373-
def main():
374-
prepare_files()
375-
for path in glob('cache/GenYoMin-*.ttc'):
376-
go(path)
404+
# Initialize OpenCC converters
405+
t2s = OpenCC('t2s').convert
406+
t2twp = OpenCC('./build/t2twp').convert
377407

378-
if __name__ == '__main__':
379-
main()
408+
for path in glob('cache/GenYoMin-*.ttc'):
409+
go(path)
410+
go(path, twp=True)

build/t2twp.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"name": "Traditional Chinese to Traditional Chinese (Taiwan standard, with phrases)",
3+
"segmentation": {
4+
"type": "mmseg",
5+
"dict": {
6+
"type": "text",
7+
"file": "../cache/TWPhrases.txt"
8+
}
9+
},
10+
"conversion_chain": [{
11+
"dict": {
12+
"type": "group",
13+
"dicts": [{
14+
"type": "text",
15+
"file": "../cache/TWPhrases.txt"
16+
}, {
17+
"type": "text",
18+
"file": "../cache/TWVariants.txt"
19+
}]
20+
}
21+
}]
22+
}

demo.png

238 KB
Loading

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
opencc<1.2

0 commit comments

Comments
 (0)