33from glob import glob
44from itertools import chain
55import json
6+ from opencc import OpenCC
67import os
78import subprocess
89
9- FONT_VERSION = 1.001
10+ FONT_VERSION = 1.002
1011
1112# Define the max entries size in a subtable.
1213# We define a number that is small enough here, so that the entries will not exceed
1314# the size limit.
14- SUBTABLE_MAX_COUNT = 5000
15+ SUBTABLE_MAX_COUNT = 4000
1516
1617# This function is used to split a GSUB table into several subtables.
1718def grouper (lst , n , start = 0 ):
@@ -27,11 +28,16 @@ def grouper(lst, n, start=0):
2728def prepare_files ():
2829 '''Download necessary files for the next steps.'''
2930 os .system ('mkdir -p output' )
30- os .system ('wget -nc -P cache https://github.com/ButTaiwan/genyo-font/releases/download/v1.501/GenYoMin.zip' )
31- os .system ('wget -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STCharacters.txt' )
32- os .system ('wget -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STPhrases.txt' )
33- os .system ('wget -nc -P cache https://gist.githubusercontent.com/fatum12/941a10f31ac1ad48ccbc/raw/59d7e29b307ae3439317a975ef390cd729f9bc17/ttc2ttf.pe' )
34- os .system ('wget -nc -P cache https://raw.githubusercontent.com/rime-aca/character_set/e7d009a8a185a83f62ad2c903565b8bb85719221/通用規範漢字表.txt' )
31+ os .system ('wget -q -nc -P cache https://github.com/ButTaiwan/genyo-font/releases/download/v1.501/GenYoMin.zip' )
32+ os .system ('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STCharacters.txt' )
33+ os .system ('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/STPhrases.txt' )
34+ os .system ('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWPhrasesIT.txt' )
35+ os .system ('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWPhrasesName.txt' )
36+ os .system ('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWPhrasesOther.txt' )
37+ os .system ('wget -q -nc -P cache https://cdn.jsdelivr.net/npm/opencc-data@1.0.3/data/TWVariants.txt' )
38+ os .system ('cat cache/TWPhrasesIT.txt cache/TWPhrasesName.txt cache/TWPhrasesOther.txt > cache/TWPhrases.txt' )
39+ os .system ('wget -q -nc -P cache https://gist.githubusercontent.com/fatum12/941a10f31ac1ad48ccbc/raw/59d7e29b307ae3439317a975ef390cd729f9bc17/ttc2ttf.pe' )
40+ os .system ('wget -q -nc -P cache https://raw.githubusercontent.com/rime-aca/character_set/e7d009a8a185a83f62ad2c903565b8bb85719221/通用規範漢字表.txt' )
3541 os .system ('unzip -n -d cache cache/GenYoMin.zip' )
3642
3743# An opentype font can hold at most 65535 glyphs.
@@ -100,17 +106,19 @@ def build_codepoints_non_han():
100106# We restrict the Simplified Chinese characters (on the left side of the OpenCC dictionary
101107# file) to the range of Tongyong Guifan Hanzi Biao, and discard those conversions that are
102108# out of range. The remained conversions are stored in the entries variable.
109+ #
103110# Then we calculate the range of “Which Traditional Chinese characters are needed if we
104111# convert Tongyong Guifan Hanzi Biao to Traditional Chinese”. The range is stored in the
105112# codepoints variable.
106- def build_opencc_char_table (codepoints_tonggui , codepoints_font ):
113+ def build_opencc_char_table (codepoints_tonggui , codepoints_font , twp = False ):
107114 entries = []
108115 codepoints = set ()
109116
110- with open ('cache/STCharacters.txt' ) as f :
117+ with open ('cache/STCharacters.txt' ) as f : # s2t
111118 for line in f :
112119 k , vx = line .rstrip ('\n ' ).split ('\t ' )
113120 v = vx .split (' ' )[0 ] # Only select the first candidate
121+ v = t2twp (v ) if twp else v # s2t -> s2twp
114122 codepoint_k = ord (k )
115123 codepoint_v = ord (v )
116124 if codepoint_k in codepoints_tonggui and codepoint_v in codepoints_font :
@@ -119,22 +127,37 @@ def build_opencc_char_table(codepoints_tonggui, codepoints_font):
119127
120128 return entries , codepoints
121129
122- def build_opencc_word_table (codepoints_tonggui , codepoints_font ):
123- entries = []
130+ def build_opencc_word_table (codepoints_tonggui , codepoints_font , twp = False ):
131+ entries = {}
124132 codepoints = set ()
125133
126- with open ('cache/STPhrases.txt' ) as f :
134+ with open ('cache/STPhrases.txt' ) as f : # s2t
127135 for line in f :
128136 k , vx = line .rstrip ('\n ' ).split ('\t ' )
129137 v = vx .split (' ' )[0 ] # Only select the first candidate
130- codepoints_k = [ord (c ) for c in k ]
131- codepoints_v = [ord (c ) for c in v ]
138+ v = t2twp (v ) if twp else v # s2t -> s2twp
139+ codepoints_k = tuple (ord (c ) for c in k )
140+ codepoints_v = tuple (ord (c ) for c in v )
132141 if all (codepoint in codepoints_tonggui for codepoint in codepoints_k ) \
133142 and all (codepoint in codepoints_font for codepoint in codepoints_v ):
134- entries . append (( codepoints_k , codepoints_v ))
143+ entries [ codepoints_k ] = codepoints_v
135144 codepoints .update (codepoints_v )
136145
137- return entries , codepoints
146+ if twp :
147+ with open ('cache/TWPhrases.txt' ) as f : # t2twp
148+ for line in f :
149+ k , vx = line .rstrip ('\n ' ).split ('\t ' )
150+ v = vx .split (' ' )[0 ] # Only select the first candidate
151+ k = t2s (k ) # t2twp -> s2twp
152+ codepoints_k = tuple (ord (c ) for c in k )
153+ codepoints_v = tuple (ord (c ) for c in v )
154+ if all (codepoint in codepoints_tonggui for codepoint in codepoints_k ) \
155+ and all (codepoint in codepoints_font for codepoint in codepoints_v ):
156+ entries [codepoints_k ] = codepoints_v
157+ codepoints .update (codepoints_v )
158+
159+ # Sort from longest to shortest to force longest match
160+ return sorted (((k , v ) for k , v in entries .items ()), key = lambda k_v : (- len (k_v [0 ]), k_v [0 ])), codepoints
138161
139162def disassociate_codepoint_and_glyph_name (obj , codepoint , glyph_name ):
140163 '''
@@ -293,49 +316,55 @@ def create_pseu2word_table(obj, feature_name, conversions):
293316 }
294317 obj ['GSUB' ]['lookupOrder' ].append ('pseu2word' )
295318
296- def build_fanwunming_name_header (style , version , date ):
319+ def build_fanwunming_name_header (style , version , date , twp = False ):
297320 with open ('build/name.json' ) as f :
298321 name_header = json .load (f )
299322
300323 for item in name_header :
301324 item ['nameString' ] = item ['nameString' ] \
302- .replace ('<Style>' , style ) \
303- .replace ('<Version>' , version ) \
304- .replace ('<Date>' , date )
325+ .replace ('<Style>' , style ) \
326+ .replace ('<Version>' , version ) \
327+ .replace ('<Date>' , date )
328+
329+ if twp :
330+ item ['nameString' ] = item ['nameString' ] \
331+ .replace ('繁媛明朝' , '繁媛明朝 TW' ) \
332+ .replace ('Fan Wun Ming' , 'Fan Wun Ming TW' ) \
333+ .replace ('FanWunMing' , 'FanWunMing-TW' )
305334
306335 return name_header
307336
308- def modify_metadata (obj ):
337+ def modify_metadata (obj , twp = False ):
309338 style = next (item ['nameString' ] for item in obj ['name' ] if item ['nameID' ] == 17 )
310339 today = date .today ().strftime ('%b %d, %Y' )
311340
312- name_header = build_fanwunming_name_header (style , str (FONT_VERSION ), today )
341+ name_header = build_fanwunming_name_header (style , str (FONT_VERSION ), today , twp = twp )
313342
314343 obj ['head' ]['fontRevision' ] = FONT_VERSION
315344 obj ['name' ] = name_header
316345
317- def build_dest_path_from_src_path (path ):
346+ def build_dest_path_from_src_path (path , twp = False ):
318347 '''
319348 >>> build_dest_path_from_src_path('cache/GenYoMin-R.ttc')
320349 'output/FanWunMing-R.ttf'
321350 '''
322351 return path \
323- .replace ('cache/' , 'output/' ) \
324- .replace ('GenYoMin' , 'FanWunMing' ) \
325- .replace ('ttc' , 'ttf' )
352+ .replace ('cache/' , 'output/' ) \
353+ .replace ('GenYoMin' , 'FanWunMing' + ( '-TW' if twp else '' ) ) \
354+ .replace ('ttc' , 'ttf' )
326355
327- def go (path ):
356+ def go (path , twp = False ):
328357 font = load_font (path , ttc_index = 0 )
329358
330359 codepoints_font = build_codepoints_font (font )
331360 codepoints_tonggui = build_codepoints_tonggui () & codepoints_font
332361
333362 codepoints_final = codepoints_tonggui | build_codepoints_non_han () & codepoints_font
334363
335- entries_char , codepoints_char = build_opencc_char_table (codepoints_tonggui , codepoints_font )
364+ entries_char , codepoints_char = build_opencc_char_table (codepoints_tonggui , codepoints_font , twp = twp )
336365 codepoints_final |= codepoints_char
337366
338- entries_word , codepoints_word = build_opencc_word_table (codepoints_tonggui , codepoints_font )
367+ entries_word , codepoints_word = build_opencc_word_table (codepoints_tonggui , codepoints_font , twp = twp )
339368 codepoints_final |= codepoints_word
340369
341370 remove_codepoints (font , codepoints_font - codepoints_final )
@@ -367,13 +396,15 @@ def go(path):
367396 create_char2char_table (font , feature_name , char2char_table )
368397 create_pseu2word_table (font , feature_name , pseu2word_table )
369398
370- modify_metadata (font )
371- save_font (font , build_dest_path_from_src_path (path ))
399+ modify_metadata (font , twp = twp )
400+ save_font (font , build_dest_path_from_src_path (path , twp = twp ))
401+
402+ prepare_files ()
372403
373- def main ():
374- prepare_files ()
375- for path in glob ('cache/GenYoMin-*.ttc' ):
376- go (path )
404+ # Initialize OpenCC converters
405+ t2s = OpenCC ('t2s' ).convert
406+ t2twp = OpenCC ('./build/t2twp' ).convert
377407
378- if __name__ == '__main__' :
379- main ()
408+ for path in glob ('cache/GenYoMin-*.ttc' ):
409+ go (path )
410+ go (path , twp = True )
0 commit comments