11from collections import defaultdict
22from datetime import date
33from glob import glob
4- from itertools import chain
4+ from itertools import chain , groupby
55import json
66from opencc import OpenCC
77import os
88import subprocess
99
10- FONT_VERSION = 1.003
10+ FONT_VERSION = 1.004
1111
1212# Define the max entries size in a subtable.
1313# We define a number that is small enough here, so that the entries will not exceed
1414# the size limit.
1515SUBTABLE_MAX_COUNT = 4000
1616
17- # This function is used to split a GSUB table into several subtables.
18- def grouper (lst , n , start = 0 ):
17+ # The following two functions are used to split a GSUB table into several subtables.
18+ def grouper (iterable , n = SUBTABLE_MAX_COUNT ):
1919 '''
2020 Split a list into chunks of size n.
21- >>> list(grouper([1, 2, 3, 4, 5], 2))
21+ >>> list(grouper([1, 2, 3, 4, 5], n= 2))
2222 [[1, 2], [3, 4], [5]]
23+ >>> list(grouper([1, 2, 3, 4, 5, 6], n=2))
24+ [[1, 2], [3, 4], [5, 6]]
2325 '''
24- while start < len (lst ):
25- yield lst [start :start + n ]
26- start += n
26+ iterator = iter (iterable )
27+ while True :
28+ lst = []
29+ try :
30+ for _ in range (n ):
31+ lst .append (next (iterator ))
32+ except StopIteration :
33+ if lst :
34+ yield lst
35+ break
36+ yield lst
37+
38+ def grouper2 (iterable , n = SUBTABLE_MAX_COUNT , key = None ):
39+ '''
40+ Split a iterator into chunks of maximum size n by the given key.
41+ >>> list(grouper2(['AA', 'BBB', 'CCC', 'DDD', 'EE'], n=3, key=len))
42+ [['AA'], ['BBB', 'CCC', 'DDD'], ['EE']]
43+ >>> list(grouper2(['AA', 'BBB', 'CCC', 'DDD', 'EE'], n=2, key=len))
44+ [['AA'], ['BBB', 'CCC'], ['DDD'], ['EE']]
45+ '''
46+ for _ , vx in groupby (iterable , key = key ):
47+ for vs in grouper (vx , n ):
48+ yield vs
2749
2850# An opentype font can hold at most 65535 glyphs.
2951MAX_GLYPH_COUNT = 65535
@@ -142,7 +164,8 @@ def build_opencc_word_table(codepoints_tonggui, codepoints_font, twp=False):
142164 codepoints .update (codepoints_v )
143165
144166 # Sort from longest to shortest to force longest match
145- return sorted (((k , v ) for k , v in entries .items ()), key = lambda k_v : (- len (k_v [0 ]), k_v [0 ])), codepoints
167+ conversion_item_len = lambda conversion_item : len (conversion_item [0 ])
168+ return sorted (entries .items (), key = conversion_item_len , reverse = True ), codepoints
146169
147170def disassociate_codepoint_and_glyph_name (obj , codepoint , glyph_name ):
148171 '''
@@ -275,29 +298,34 @@ def insert_empty_feature(obj, feature_name):
275298 obj ['GSUB' ]['features' ][feature_name ] = []
276299
277300def create_word2pseu_table (obj , feature_name , conversions ):
301+ conversion_item_len = lambda conversion_item : len (conversion_item [0 ])
302+ subtables = [{'substitutions' : [{'from' : glyph_names_k , 'to' : pseudo_glyph_name } for glyph_names_k , pseudo_glyph_name in subtable ]} for subtable in grouper2 (conversions , key = conversion_item_len )] # {from: [a1, a2, ...], to: b}
278303 obj ['GSUB' ]['features' ][feature_name ].append ('word2pseu' )
279304 obj ['GSUB' ]['lookups' ]['word2pseu' ] = {
280305 'type' : 'gsub_ligature' ,
281306 'flags' : {},
282- 'subtables' : [{ 'substitutions' : subtable } for subtable in grouper ( conversions , SUBTABLE_MAX_COUNT )]
307+ 'subtables' : subtables
283308 }
284309 obj ['GSUB' ]['lookupOrder' ].append ('word2pseu' )
285310
286311def create_char2char_table (obj , feature_name , conversions ):
312+ subtables = [{k : v for k , v in subtable } for subtable in grouper (conversions )]
287313 obj ['GSUB' ]['features' ][feature_name ].append ('char2char' )
288314 obj ['GSUB' ]['lookups' ]['char2char' ] = {
289315 'type' : 'gsub_single' ,
290316 'flags' : {},
291- 'subtables' : [{ k : v for k , v in subtable } for subtable in grouper ( conversions , SUBTABLE_MAX_COUNT )]
317+ 'subtables' : subtables
292318 }
293319 obj ['GSUB' ]['lookupOrder' ].append ('char2char' )
294320
295321def create_pseu2word_table (obj , feature_name , conversions ):
322+ conversion_item_len = lambda conversion_item : len (conversion_item [1 ])
323+ subtables = [{k : v for k , v in subtable } for subtable in grouper2 (conversions , key = conversion_item_len )]
296324 obj ['GSUB' ]['features' ][feature_name ].append ('pseu2word' )
297325 obj ['GSUB' ]['lookups' ]['pseu2word' ] = {
298326 'type' : 'gsub_multiple' ,
299327 'flags' : {},
300- 'subtables' : [{ k : v for k , v in subtable } for subtable in grouper ( conversions , SUBTABLE_MAX_COUNT )]
328+ 'subtables' : subtables
301329 }
302330 obj ['GSUB' ]['lookupOrder' ].append ('pseu2word' )
303331
@@ -341,6 +369,8 @@ def build_dest_path_from_src_path(path, twp=False):
341369def go (path , twp = False ):
342370 font = load_font (path , ttc_index = 0 )
343371
372+ # Determine the final Unicode range by the original font and OpenCC convert tables
373+
344374 codepoints_font = build_codepoints_font (font )
345375 codepoints_tonggui = build_codepoints_tonggui () & codepoints_font
346376
@@ -358,6 +388,8 @@ def go(path, twp=False):
358388 available_glyph_count = MAX_GLYPH_COUNT - get_glyph_count (font )
359389 assert available_glyph_count >= len (entries_word )
360390
391+ # Build glyph substitution tables and insert into font
392+
361393 word2pseu_table = []
362394 char2char_table = []
363395 pseu2word_table = []
@@ -367,7 +399,7 @@ def go(path, twp=False):
367399 glyph_names_k = [codepoint_to_glyph_name (font , codepoint ) for codepoint in codepoints_k ]
368400 glyph_names_v = [codepoint_to_glyph_name (font , codepoint ) for codepoint in codepoints_v ]
369401 insert_empty_glyph (font , pseudo_glyph_name )
370- word2pseu_table .append ({ 'from' : glyph_names_k , 'to' : pseudo_glyph_name } )
402+ word2pseu_table .append (( glyph_names_k , pseudo_glyph_name ) )
371403 pseu2word_table .append ((pseudo_glyph_name , glyph_names_v ))
372404
373405 for codepoint_k , codepoint_v in entries_char :
0 commit comments