From 353491d69a74b2e98a30752bbf65caef40b2977b Mon Sep 17 00:00:00 2001
From: Rod S <rsheeter@google.com>
Date: Sat, 26 Mar 2022 16:16:41 -0700
Subject: [PATCH] Update fontchain_lint to detect lack of PUA in emoji font

Bug: 226676748
Test: Confirmed that the updated fontchain lint fails on the COLR font w/o PUA and passes once added
Change-Id: If831ed689ce80f26564279c6a6243cedddc56c36
---
 tools/fonts/fontchain_linter.py | 102 ++++++++++++++++++++++++++++----
 1 file changed, 90 insertions(+), 12 deletions(-)

diff --git a/tools/fonts/fontchain_linter.py b/tools/fonts/fontchain_linter.py
index 0d9ea1b70680..35a0ce60e359 100755
--- a/tools/fonts/fontchain_linter.py
+++ b/tools/fonts/fontchain_linter.py
@@ -340,29 +340,104 @@ def check_emoji_coverage(all_emoji, equivalent_emoji):
 def get_emoji_fonts():
     return [ record.font for record in _all_fonts if 'Zsye' in record.scripts ]
 
+def seq_any(sequence, pred):
+  if type(sequence) is tuple:
+    return any([pred(x) for x in sequence])
+  else:
+    return pred(sequence)
+
+def seq_all(sequence, pred):
+  if type(sequence) is tuple:
+    return all([pred(x) for x in sequence])
+  else:
+    return pred(sequence)
+
+def is_regional_indicator(x):
+    # regional indicator A..Z
+    return 0x1F1E6 <= x <= 0x1F1FF
+
+def is_tag(x):
+    # tag block
+    return 0xE0000 <= x <= 0xE007F
+
 def is_pua(x):
     return 0xE000 <= x <= 0xF8FF or 0xF0000 <= x <= 0xFFFFD or 0x100000 <= x <= 0x10FFFD
 
 def contains_pua(sequence):
-  if type(sequence) is tuple:
-    return any([is_pua(x) for x in sequence])
-  else:
-    return is_pua(sequence)
+    return seq_any(sequence, is_pua)
+
+def contains_regional_indicator(sequence):
+    return seq_any(sequence, is_regional_indicator)
+
+def only_tags(sequence):
+    return seq_all(sequence, is_tag)
 
 def get_psname(ttf):
     return str(next(x for x in ttf['name'].names
         if x.platformID == 3 and x.platEncID == 1 and x.nameID == 6))
 
-def check_emoji_compat():
+def hex_strs(sequence):
+    if type(sequence) is tuple:
+        return tuple(f"{s:X}" for s in sequence)
+    return hex(sequence)
+
+def check_plausible_compat_pua(coverage, all_emoji, equivalent_emoji):
+    # A PUA should point to every RGI emoji and that PUA should be unique to the
+    # set of equivalent sequences for the emoji.
+    problems = []
+    for seq in all_emoji:
+        # We're looking to match not-PUA with PUA so filter out existing PUA
+        if contains_pua(seq):
+            continue
+
+        # Filter out non-RGI things that end up in all_emoji
+        if only_tags(seq) or seq in {ZWJ, COMBINING_KEYCAP, EMPTY_FLAG_SEQUENCE}:
+            continue
+
+        equivalents = [seq]
+        if seq in equivalent_emoji:
+            equivalents.append(equivalent_emoji[seq])
+
+        # If there are problems the hex code is much more useful
+        log_equivalents = [hex_strs(s) for s in equivalents]
+
+        # The system compat font should NOT include regional indicators as these have been split out
+        if contains_regional_indicator(seq):
+            assert not any(s in coverage for s in equivalents), f"Regional indicators not expected in compat font, found {log_equivalents}"
+            continue
+
+        glyph = {coverage[e] for e in equivalents}
+        if len(glyph) != 1:
+            problems.append(f"{log_equivalents} should all point to the same glyph")
+            continue
+        glyph = next(iter(glyph))
+
+        pua = {s for s, g in coverage.items() if contains_pua(s) and g == glyph}
+        if not pua:
+            problems.append(f"Expected PUA for {log_equivalents} but none exist")
+            continue
+
+    assert not problems, "\n".join(sorted(problems)) + f"\n{len(problems)} PUA problems"
+
+def check_emoji_compat(all_emoji, equivalent_emoji):
+    compat_psnames = set()
     for emoji_font in get_emoji_fonts():
         ttf = open_font(emoji_font)
         psname = get_psname(ttf)
 
-        # If the font file is NotoColorEmoji, it must be Compat font.
-        if psname == 'NotoColorEmoji':
-            meta = ttf['meta']
-            assert meta, 'Compat font must have meta table'
-            assert 'Emji' in meta.data, 'meta table should have \'Emji\' data.'
+        is_compat_font = "meta" in ttf and 'Emji' in ttf["meta"].data
+        if not is_compat_font:
+            continue
+        compat_psnames.add(psname)
+
+        # If the font has compat metadata it should have PUAs for emoji sequences
+        coverage = get_emoji_map(emoji_font)
+        check_plausible_compat_pua(coverage, all_emoji, equivalent_emoji)
+
+
+    # NotoColorEmoji must be a Compat font.
+    assert 'NotoColorEmoji' in compat_psnames, 'NotoColorEmoji MUST be a compat font'
+
 
 def check_emoji_font_coverage(emoji_fonts, all_emoji, equivalent_emoji):
     coverages = []
@@ -611,6 +686,8 @@ SAME_FLAG_MAPPINGS = [
 
 ZWJ = 0x200D
 
+EMPTY_FLAG_SEQUENCE = (0x1F3F4, 0xE007F)
+
 def is_fitzpatrick_modifier(cp):
     return 0x1F3FB <= cp <= 0x1F3FF
 
@@ -636,7 +713,7 @@ def compute_expected_emoji():
     adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences)
 
     # Add empty flag tag sequence that is supported as fallback
-    _emoji_sequences[(0x1F3F4, 0xE007F)] = 'Emoji_Tag_Sequence'
+    _emoji_sequences[EMPTY_FLAG_SEQUENCE] = 'Emoji_Tag_Sequence'
 
     for sequence in _emoji_sequences.keys():
         sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)
@@ -751,6 +828,7 @@ def main():
     _fonts_dir = path.join(target_out, 'fonts')
 
     fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')
+
     parse_fonts_xml(fonts_xml_path)
 
     check_compact_only_fallback()
@@ -769,7 +847,7 @@ def main():
         ucd_path = sys.argv[3]
         parse_ucd(ucd_path)
         all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
-        check_emoji_compat()
+        check_emoji_compat(all_emoji, equivalent_emoji)
         check_emoji_coverage(all_emoji, equivalent_emoji)
         check_emoji_defaults(default_emoji)