android_frameworks_base/tools/fonts/fontchain_lint.py
Roozbeh Pournader 7b822e5fc1 Add more emoji-related tests
1. Check that dual-use text/emoji character that default to text
style are available in a font before the emoji font. (Exceptions are
Unicode 7.0 characters that Noto Symbols doesn't support yet and four
odd characters that should be added to the Noto Symbols subset.)

2. Remove previous workaround for default emoji-style characters that
were in the Noto CJK fonts, since they are now subsetted out from
Noto CJK.

Bug: 26113320
Bug: 26187231
Change-Id: Ib9ea2f4f7e6ca0cd8d291f1dc3d7ee5053d4928e
2016-03-17 08:50:43 -07:00

264 lines
8.5 KiB
Python
Executable File

#!/usr/bin/env python
import collections
import glob
from os import path
import sys
from xml.etree import ElementTree
from fontTools import ttLib
LANG_TO_SCRIPT = {
'de': 'Latn',
'en': 'Latn',
'es': 'Latn',
'eu': 'Latn',
'ja': 'Jpan',
'ko': 'Kore',
'hu': 'Latn',
'hy': 'Armn',
'nb': 'Latn',
'nn': 'Latn',
'pt': 'Latn',
}
def lang_to_script(lang_code):
lang = lang_code.lower()
while lang not in LANG_TO_SCRIPT:
hyphen_idx = lang.rfind('-')
assert hyphen_idx != -1, (
'We do not know what script the "%s" language is written in.'
% lang_code)
assumed_script = lang[hyphen_idx+1:]
if len(assumed_script) == 4 and assumed_script.isalpha():
# This is actually the script
return assumed_script.title()
lang = lang[:hyphen_idx]
return LANG_TO_SCRIPT[lang]
def get_best_cmap(font):
font_file, index = font
font_path = path.join(_fonts_dir, font_file)
if index is not None:
ttfont = ttLib.TTFont(font_path, fontNumber=index)
else:
ttfont = ttLib.TTFont(font_path)
all_unicode_cmap = None
bmp_cmap = None
for cmap in ttfont['cmap'].tables:
specifier = (cmap.format, cmap.platformID, cmap.platEncID)
if specifier == (4, 3, 1):
assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )
bmp_cmap = cmap
elif specifier == (12, 3, 10):
assert all_unicode_cmap is None, (
'More than one UCS-4 cmap in %s' % (font, ))
all_unicode_cmap = cmap
return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap
def assert_font_supports_any_of_chars(font, chars):
best_cmap = get_best_cmap(font)
for char in chars:
if char in best_cmap:
return
sys.exit('None of characters in %s were found in %s' % (chars, font))
def assert_font_supports_all_of_chars(font, chars):
best_cmap = get_best_cmap(font)
for char in chars:
assert char in best_cmap, (
'U+%04X was not found in %s' % (char, font))
def assert_font_supports_none_of_chars(font, chars):
best_cmap = get_best_cmap(font)
for char in chars:
assert char not in best_cmap, (
'U+%04X was found in %s' % (char, font))
def check_hyphens(hyphens_dir):
# Find all the scripts that need automatic hyphenation
scripts = set()
for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):
hyb_file = path.basename(hyb_file)
assert hyb_file.startswith('hyph-'), (
'Unknown hyphenation file %s' % hyb_file)
lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]
scripts.add(lang_to_script(lang_code))
HYPHENS = {0x002D, 0x2010}
for script in scripts:
fonts = _script_to_font_map[script]
assert fonts, 'No fonts found for the "%s" script' % script
for font in fonts:
assert_font_supports_any_of_chars(font, HYPHENS)
def parse_fonts_xml(fonts_xml_path):
global _script_to_font_map, _fallback_chain
_script_to_font_map = collections.defaultdict(set)
_fallback_chain = []
tree = ElementTree.parse(fonts_xml_path)
for family in tree.findall('family'):
name = family.get('name')
variant = family.get('variant')
langs = family.get('lang')
if name:
assert variant is None, (
'No variant expected for LGC font %s.' % name)
assert langs is None, (
'No language expected for LGC fonts %s.' % name)
else:
assert variant in {None, 'elegant', 'compact'}, (
'Unexpected value for variant: %s' % variant)
if langs:
langs = langs.split()
scripts = {lang_to_script(lang) for lang in langs}
else:
scripts = set()
for child in family:
assert child.tag == 'font', (
'Unknown tag <%s>' % child.tag)
font_file = child.text
weight = int(child.get('weight'))
assert weight % 100 == 0, (
'Font weight "%d" is not a multiple of 100.' % weight)
style = child.get('style')
assert style in {'normal', 'italic'}, (
'Unknown style "%s"' % style)
index = child.get('index')
if index:
index = int(index)
_fallback_chain.append((
name,
frozenset(scripts),
variant,
weight,
style,
(font_file, index)))
if name: # non-empty names are used for default LGC fonts
map_scripts = {'Latn', 'Grek', 'Cyrl'}
else:
map_scripts = scripts
for script in map_scripts:
_script_to_font_map[script].add((font_file, index))
def check_emoji_availability():
emoji_fonts = [font[5] for font in _fallback_chain if 'Zsye' in font[1]]
emoji_chars = _emoji_properties['Emoji']
for emoji_font in emoji_fonts:
assert_font_supports_all_of_chars(emoji_font, emoji_chars)
def check_emoji_defaults():
default_emoji_chars = _emoji_properties['Emoji_Presentation']
missing_text_chars = _emoji_properties['Emoji'] - default_emoji_chars
emoji_font_seen = False
for name, scripts, variant, weight, style, font in _fallback_chain:
if 'Zsye' in scripts:
emoji_font_seen = True
# No need to check the emoji font
continue
# For later fonts, we only check them if they have a script
# defined, since the defined script may get them to a higher
# score even if they appear after the emoji font.
if emoji_font_seen and not scripts:
continue
# Check default emoji-style characters
assert_font_supports_none_of_chars(font, sorted(default_emoji_chars))
# Mark default text-style characters appearing in fonts above the emoji
# font as seen
if not emoji_font_seen:
missing_text_chars -= set(get_best_cmap(font))
# Noto does not have monochrome symbols for Unicode 7.0 wingdings and
# webdings
missing_text_chars -= _chars_by_age['7.0']
# TODO: Remove these after b/26113320 is fixed
missing_text_chars -= {
0x263A, # WHITE SMILING FACE
0x270C, # VICTORY HAND
0x2744, # SNOWFLAKE
0x2764, # HEAVY BLACK HEART
}
assert missing_text_chars == set(), (
'Text style version of some emoji characters are missing.')
# Setting reverse to true returns a dictionary that maps the values to sets of
# characters, useful for some binary properties. Otherwise, we get a
# dictionary that maps characters to the property values, assuming there's only
# one property in the file.
def parse_unicode_datafile(file_path, reverse=False):
if reverse:
output_dict = collections.defaultdict(set)
else:
output_dict = {}
with open(file_path) as datafile:
for line in datafile:
if '#' in line:
line = line[:line.index('#')]
line = line.strip()
if not line:
continue
char_range, prop = line.split(';')
char_range = char_range.strip()
prop = prop.strip()
if '..' in char_range:
char_start, char_end = char_range.split('..')
else:
char_start = char_end = char_range
char_start = int(char_start, 16)
char_end = int(char_end, 16)
char_range = xrange(char_start, char_end+1)
if reverse:
output_dict[prop].update(char_range)
else:
for char in char_range:
assert char not in output_dict
output_dict[char] = prop
return output_dict
def parse_ucd(ucd_path):
global _emoji_properties, _chars_by_age
_emoji_properties = parse_unicode_datafile(
path.join(ucd_path, 'emoji-data.txt'), reverse=True)
_chars_by_age = parse_unicode_datafile(
path.join(ucd_path, 'DerivedAge.txt'), reverse=True)
def main():
target_out = sys.argv[1]
global _fonts_dir
_fonts_dir = path.join(target_out, 'fonts')
fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')
parse_fonts_xml(fonts_xml_path)
hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')
check_hyphens(hyphens_dir)
ucd_path = sys.argv[2]
parse_ucd(ucd_path)
check_emoji_availability()
check_emoji_defaults()
if __name__ == '__main__':
main()