diff --git a/tool/Makefile b/tool/Makefile index ffd0ec81..95d7b9e3 100644 --- a/tool/Makefile +++ b/tool/Makefile @@ -10,6 +10,7 @@ PROP_FILES = \ $(UNICODE_VERSION)/PropList.txt \ $(UNICODE_VERSION)/Scripts.txt \ $(UNICODE_VERSION)/UnicodeData.txt \ + $(UNICODE_VERSION)/EastAsianWidth.txt \ $(UNICODE_VERSION)/auxiliary/GraphemeBreakProperty.txt \ $(UNICODE_VERSION)/emoji-data.txt diff --git a/tool/download-ucd.sh b/tool/download-ucd.sh index 4f60dafe..41e74c6a 100755 --- a/tool/download-ucd.sh +++ b/tool/download-ucd.sh @@ -1,6 +1,6 @@ #!/bin/bash -files='Blocks.txt CaseFolding.txt DerivedAge.txt DerivedCoreProperties.txt PropertyAliases.txt PropertyValueAliases.txt PropList.txt Scripts.txt SpecialCasing.txt UnicodeData.txt auxiliary/GraphemeBreakProperty.txt' +files='Blocks.txt CaseFolding.txt DerivedAge.txt DerivedCoreProperties.txt PropertyAliases.txt PropertyValueAliases.txt PropList.txt Scripts.txt SpecialCasing.txt UnicodeData.txt EastAsianWidth.txt auxiliary/GraphemeBreakProperty.txt' emoji_files='emoji-data.txt' if [ -z $1 ] || [ -z $2 ]; then diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb index 8f6f333c..2442900a 100755 --- a/tool/enc-unicode.rb +++ b/tool/enc-unicode.rb @@ -159,6 +159,31 @@ def parse_scripts(data, categories) names.values.flatten << 'Unknown' end +def parse_east_asian_width(data) + width_types = {} + aliases = {} + data_foreach('PropertyValueAliases.txt') do |line| + if /^ea\s*; (\w+)\s*; (\w+)(?:\s*; (\w+))?/ =~ line + aliases[$1] = $2 + end + end + data_foreach('EastAsianWidth.txt') do |line| + if /^([0-9a-fA-F]+)(?:\.\.([0-9a-fA-F]+))?\s*;\s*(\w+)/ =~ line + type = aliases[$3] + width_types[type] ||= [] + if $2 + width_types[type].concat(($1.to_i(16)..$2.to_i(16)).to_a) + else + width_types[type].push($1.to_i(16)) + end + end + end + width_types.each_pair do |type, cps| + data[type] = cps + end + width_types.keys +end + def parse_aliases(data) kv = {} data_foreach('PropertyAliases.txt') do |line| @@ -398,6 +423,7 @@ def write(str) props, data = parse_unicode_data(get_file('UnicodeData.txt')) categories = {} props.concat parse_scripts(data, categories) +props.concat parse_east_asian_width(data) aliases = parse_aliases(data) ages = blocks = graphemeBreaks = nil define_posix_props(data)