Skip to content

Latest commit

 

History

History
451 lines (419 loc) · 13.2 KB

ranges.md

File metadata and controls

451 lines (419 loc) · 13.2 KB

Concrete ranges of each terms

Note: generated by:

node --run print-ranges

If you want to get code of your favorite language, add -l option (the reference source code below can be generated by commands like this):

node --run print-ranges -- -l js

To see all supported languages, run:

node --run print-ranges -- -h

Unicode Version

16

CJK code points without variation selector

  • U+1100..U+11FF (ᄀ..ᇿ)
  • U+20A9 (₩)
  • U+2329..U+232A (〈..〉)
  • U+2630..U+2637 (☰..☷)
  • U+268A..U+268F (⚊..⚏)
  • U+2E80..U+2E99 (⺀..⺙)
  • U+2E9B..U+2EF3 (⺛..⻳)
  • U+2F00..U+2FD5 (⼀..⿕)
  • U+2FF0..U+303E (⿰..〾)
  • U+3041..U+3096 (ぁ..ゖ)
  • U+3099..U+30FF (゙..ヿ)
  • U+3105..U+312F (ㄅ..ㄯ)
  • U+3131..U+318E (ㄱ..ㆎ)
  • U+3190..U+31E5 (㆐..㇥)
  • U+31EF..U+321E (㇯..㈞)
  • U+3220..U+3247 (㈠..㉇)
  • U+3250..U+A48C (㉐..ꒌ)
  • U+A490..U+A4C6 (꒐..꓆)
  • U+A960..U+A97C (ꥠ..ꥼ)
  • U+AC00..U+D7A3 (가..힣)
  • U+D7B0..U+D7C6 (ힰ..ퟆ)
  • U+D7CB..U+D7FB (ퟋ..ퟻ)
  • U+F900..U+FAFF (豈..﫿)
  • U+FE10..U+FE19 (︐..︙)
  • U+FE30..U+FE52 (︰..﹒)
  • U+FE54..U+FE66 (﹔..﹦)
  • U+FE68..U+FE6B (﹨..﹫)
  • U+FF01..U+FFBE (!..ᄒ)
  • U+FFC2..U+FFC7 (ᅡ..ᅦ)
  • U+FFCA..U+FFCF (ᅧ..ᅬ)
  • U+FFD2..U+FFD7 (ᅭ..ᅲ)
  • U+FFDA..U+FFDC (ᅳ..ᅵ)
  • U+FFE0..U+FFE6 (¢..₩)
  • U+FFE8..U+FFEE (│..○)
  • U+16FE0..U+16FE4 (𖿠..𖿤)
  • U+16FF0..U+16FF1 (𖿰..𖿱)
  • U+17000..U+187F7 (𗀀..𘟷)
  • U+18800..U+18CD5 (𘠀..𘳕)
  • U+18CFF..U+18D08 (𘳿..𘴈)
  • U+1AFF0..U+1AFF3 (𚿰..𚿳)
  • U+1AFF5..U+1AFFB (𚿵..𚿻)
  • U+1AFFD..U+1AFFE (𚿽..𚿾)
  • U+1B000..U+1B122 (𛀀..𛄢)
  • U+1B132 (𛄲)
  • U+1B150..U+1B152 (𛅐..𛅒)
  • U+1B155 (𛅕)
  • U+1B164..U+1B167 (𛅤..𛅧)
  • U+1B170..U+1B2FB (𛅰..𛋻)
  • U+1D300..U+1D356 (𝌀..𝍖)
  • U+1D360..U+1D376 (𝍠..𝍶)
  • U+1F200 (🈀)
  • U+1F202 (🈂)
  • U+1F210..U+1F219 (🈐..🈙)
  • U+1F21B..U+1F22E (🈛..🈮)
  • U+1F230..U+1F231 (🈰..🈱)
  • U+1F237 (🈷)
  • U+1F23B (🈻)
  • U+1F240..U+1F248 (🉀..🉈)
  • U+1F260..U+1F265 (🉠..🉥)
  • U+20000..U+3FFFD (𠀀..𿿽)
Reference Source Code for C (C99 or later) / C++
const bool is_cjk = 0x1100 <= cp && cp <= 0x11ff
  || cp == 0x20a9
  || 0x2329 <= cp && cp <= 0x232a
  || 0x2630 <= cp && cp <= 0x2637
  || 0x268a <= cp && cp <= 0x268f
  || 0x2e80 <= cp && cp <= 0x2e99
  || 0x2e9b <= cp && cp <= 0x2ef3
  || 0x2f00 <= cp && cp <= 0x2fd5
  || 0x2ff0 <= cp && cp <= 0x303e
  || 0x3041 <= cp && cp <= 0x3096
  || 0x3099 <= cp && cp <= 0x30ff
  || 0x3105 <= cp && cp <= 0x312f
  || 0x3131 <= cp && cp <= 0x318e
  || 0x3190 <= cp && cp <= 0x31e5
  || 0x31ef <= cp && cp <= 0x321e
  || 0x3220 <= cp && cp <= 0x3247
  || 0x3250 <= cp && cp <= 0xa48c
  || 0xa490 <= cp && cp <= 0xa4c6
  || 0xa960 <= cp && cp <= 0xa97c
  || 0xac00 <= cp && cp <= 0xd7a3
  || 0xd7b0 <= cp && cp <= 0xd7c6
  || 0xd7cb <= cp && cp <= 0xd7fb
  || 0xf900 <= cp && cp <= 0xfaff
  || 0xfe10 <= cp && cp <= 0xfe19
  || 0xfe30 <= cp && cp <= 0xfe52
  || 0xfe54 <= cp && cp <= 0xfe66
  || 0xfe68 <= cp && cp <= 0xfe6b
  || 0xff01 <= cp && cp <= 0xffbe
  || 0xffc2 <= cp && cp <= 0xffc7
  || 0xffca <= cp && cp <= 0xffcf
  || 0xffd2 <= cp && cp <= 0xffd7
  || 0xffda <= cp && cp <= 0xffdc
  || 0xffe0 <= cp && cp <= 0xffe6
  || 0xffe8 <= cp && cp <= 0xffee
  || 0x16fe0 <= cp && cp <= 0x16fe4
  || 0x16ff0 <= cp && cp <= 0x16ff1
  || 0x17000 <= cp && cp <= 0x187f7
  || 0x18800 <= cp && cp <= 0x18cd5
  || 0x18cff <= cp && cp <= 0x18d08
  || 0x1aff0 <= cp && cp <= 0x1aff3
  || 0x1aff5 <= cp && cp <= 0x1affb
  || 0x1affd <= cp && cp <= 0x1affe
  || 0x1b000 <= cp && cp <= 0x1b122
  || cp == 0x1b132
  || 0x1b150 <= cp && cp <= 0x1b152
  || cp == 0x1b155
  || 0x1b164 <= cp && cp <= 0x1b167
  || 0x1b170 <= cp && cp <= 0x1b2fb
  || 0x1d300 <= cp && cp <= 0x1d356
  || 0x1d360 <= cp && cp <= 0x1d376
  || cp == 0x1f200
  || cp == 0x1f202
  || 0x1f210 <= cp && cp <= 0x1f219
  || 0x1f21b <= cp && cp <= 0x1f22e
  || 0x1f230 <= cp && cp <= 0x1f231
  || cp == 0x1f237
  || cp == 0x1f23b
  || 0x1f240 <= cp && cp <= 0x1f248
  || 0x1f260 <= cp && cp <= 0x1f265
  || 0x20000 <= cp && cp <= 0x3fffd;
Reference Source Code for JavaScript
const isCjk = 0x1100 <= cp && cp <= 0x11ff
  || cp === 0x20a9
  || 0x2329 <= cp && cp <= 0x232a
  || 0x2630 <= cp && cp <= 0x2637
  || 0x268a <= cp && cp <= 0x268f
  || 0x2e80 <= cp && cp <= 0x2e99
  || 0x2e9b <= cp && cp <= 0x2ef3
  || 0x2f00 <= cp && cp <= 0x2fd5
  || 0x2ff0 <= cp && cp <= 0x303e
  || 0x3041 <= cp && cp <= 0x3096
  || 0x3099 <= cp && cp <= 0x30ff
  || 0x3105 <= cp && cp <= 0x312f
  || 0x3131 <= cp && cp <= 0x318e
  || 0x3190 <= cp && cp <= 0x31e5
  || 0x31ef <= cp && cp <= 0x321e
  || 0x3220 <= cp && cp <= 0x3247
  || 0x3250 <= cp && cp <= 0xa48c
  || 0xa490 <= cp && cp <= 0xa4c6
  || 0xa960 <= cp && cp <= 0xa97c
  || 0xac00 <= cp && cp <= 0xd7a3
  || 0xd7b0 <= cp && cp <= 0xd7c6
  || 0xd7cb <= cp && cp <= 0xd7fb
  || 0xf900 <= cp && cp <= 0xfaff
  || 0xfe10 <= cp && cp <= 0xfe19
  || 0xfe30 <= cp && cp <= 0xfe52
  || 0xfe54 <= cp && cp <= 0xfe66
  || 0xfe68 <= cp && cp <= 0xfe6b
  || 0xff01 <= cp && cp <= 0xffbe
  || 0xffc2 <= cp && cp <= 0xffc7
  || 0xffca <= cp && cp <= 0xffcf
  || 0xffd2 <= cp && cp <= 0xffd7
  || 0xffda <= cp && cp <= 0xffdc
  || 0xffe0 <= cp && cp <= 0xffe6
  || 0xffe8 <= cp && cp <= 0xffee
  || 0x16fe0 <= cp && cp <= 0x16fe4
  || 0x16ff0 <= cp && cp <= 0x16ff1
  || 0x17000 <= cp && cp <= 0x187f7
  || 0x18800 <= cp && cp <= 0x18cd5
  || 0x18cff <= cp && cp <= 0x18d08
  || 0x1aff0 <= cp && cp <= 0x1aff3
  || 0x1aff5 <= cp && cp <= 0x1affb
  || 0x1affd <= cp && cp <= 0x1affe
  || 0x1b000 <= cp && cp <= 0x1b122
  || cp === 0x1b132
  || 0x1b150 <= cp && cp <= 0x1b152
  || cp === 0x1b155
  || 0x1b164 <= cp && cp <= 0x1b167
  || 0x1b170 <= cp && cp <= 0x1b2fb
  || 0x1d300 <= cp && cp <= 0x1d356
  || 0x1d360 <= cp && cp <= 0x1d376
  || cp === 0x1f200
  || cp === 0x1f202
  || 0x1f210 <= cp && cp <= 0x1f219
  || 0x1f21b <= cp && cp <= 0x1f22e
  || 0x1f230 <= cp && cp <= 0x1f231
  || cp === 0x1f237
  || cp === 0x1f23b
  || 0x1f240 <= cp && cp <= 0x1f248
  || 0x1f260 <= cp && cp <= 0x1f265
  || 0x20000 <= cp && cp <= 0x3fffd;

regexp version

const isCjkRegex = /^[\u1100-\u11ff\u20a9\u2329-\u232a\u2630-\u2637\u268a-\u268f\u2e80-\u2e99\u2e9b-\u2ef3\u2f00-\u2fd5\u2ff0-\u303e\u3041-\u3096\u3099-\u30ff\u3105-\u312f\u3131-\u318e\u3190-\u31e5\u31ef-\u321e\u3220-\u3247\u3250-\ua48c\ua490-\ua4c6\ua960-\ua97c\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufaff\ufe10-\ufe19\ufe30-\ufe52\ufe54-\ufe66\ufe68-\ufe6b\uff01-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc\uffe0-\uffe6\uffe8-\uffee\u{16fe0}-\u{16fe4}\u{16ff0}-\u{16ff1}\u{17000}-\u{187f7}\u{18800}-\u{18cd5}\u{18cff}-\u{18d08}\u{1aff0}-\u{1aff3}\u{1aff5}-\u{1affb}\u{1affd}-\u{1affe}\u{1b000}-\u{1b122}\u{1b132}\u{1b150}-\u{1b152}\u{1b155}\u{1b164}-\u{1b167}\u{1b170}-\u{1b2fb}\u{1d300}-\u{1d356}\u{1d360}-\u{1d376}\u{1f200}\u{1f202}\u{1f210}-\u{1f219}\u{1f21b}-\u{1f22e}\u{1f230}-\u{1f231}\u{1f237}\u{1f23b}\u{1f240}-\u{1f248}\u{1f260}-\u{1f265}\u{20000}-\u{3fffd}]/u;
Reference Source Code for Rust
let is_cjk = matches!(
    cp,
    0x1100..=0x11ff
      | 0x20a9
      | 0x2329..=0x232a
      | 0x2630..=0x2637
      | 0x268a..=0x268f
      | 0x2e80..=0x2e99
      | 0x2e9b..=0x2ef3
      | 0x2f00..=0x2fd5
      | 0x2ff0..=0x303e
      | 0x3041..=0x3096
      | 0x3099..=0x30ff
      | 0x3105..=0x312f
      | 0x3131..=0x318e
      | 0x3190..=0x31e5
      | 0x31ef..=0x321e
      | 0x3220..=0x3247
      | 0x3250..=0xa48c
      | 0xa490..=0xa4c6
      | 0xa960..=0xa97c
      | 0xac00..=0xd7a3
      | 0xd7b0..=0xd7c6
      | 0xd7cb..=0xd7fb
      | 0xf900..=0xfaff
      | 0xfe10..=0xfe19
      | 0xfe30..=0xfe52
      | 0xfe54..=0xfe66
      | 0xfe68..=0xfe6b
      | 0xff01..=0xffbe
      | 0xffc2..=0xffc7
      | 0xffca..=0xffcf
      | 0xffd2..=0xffd7
      | 0xffda..=0xffdc
      | 0xffe0..=0xffe6
      | 0xffe8..=0xffee
      | 0x16fe0..=0x16fe4
      | 0x16ff0..=0x16ff1
      | 0x17000..=0x187f7
      | 0x18800..=0x18cd5
      | 0x18cff..=0x18d08
      | 0x1aff0..=0x1aff3
      | 0x1aff5..=0x1affb
      | 0x1affd..=0x1affe
      | 0x1b000..=0x1b122
      | 0x1b132
      | 0x1b150..=0x1b152
      | 0x1b155
      | 0x1b164..=0x1b167
      | 0x1b170..=0x1b2fb
      | 0x1d300..=0x1d356
      | 0x1d360..=0x1d376
      | 0x1f200
      | 0x1f202
      | 0x1f210..=0x1f219
      | 0x1f21b..=0x1f22e
      | 0x1f230..=0x1f231
      | 0x1f237
      | 0x1f23b
      | 0x1f240..=0x1f248
      | 0x1f260..=0x1f265
      | 0x20000..=0x3fffd
);
Reference Source Code for C# (C# 9 or later)
var isCjk =
    cp is >= 0x1100 and <= 0x11ff
    or 0x20a9
    or >= 0x2329 and <= 0x232a
    or >= 0x2630 and <= 0x2637
    or >= 0x268a and <= 0x268f
    or >= 0x2e80 and <= 0x2e99
    or >= 0x2e9b and <= 0x2ef3
    or >= 0x2f00 and <= 0x2fd5
    or >= 0x2ff0 and <= 0x303e
    or >= 0x3041 and <= 0x3096
    or >= 0x3099 and <= 0x30ff
    or >= 0x3105 and <= 0x312f
    or >= 0x3131 and <= 0x318e
    or >= 0x3190 and <= 0x31e5
    or >= 0x31ef and <= 0x321e
    or >= 0x3220 and <= 0x3247
    or >= 0x3250 and <= 0xa48c
    or >= 0xa490 and <= 0xa4c6
    or >= 0xa960 and <= 0xa97c
    or >= 0xac00 and <= 0xd7a3
    or >= 0xd7b0 and <= 0xd7c6
    or >= 0xd7cb and <= 0xd7fb
    or >= 0xf900 and <= 0xfaff
    or >= 0xfe10 and <= 0xfe19
    or >= 0xfe30 and <= 0xfe52
    or >= 0xfe54 and <= 0xfe66
    or >= 0xfe68 and <= 0xfe6b
    or >= 0xff01 and <= 0xffbe
    or >= 0xffc2 and <= 0xffc7
    or >= 0xffca and <= 0xffcf
    or >= 0xffd2 and <= 0xffd7
    or >= 0xffda and <= 0xffdc
    or >= 0xffe0 and <= 0xffe6
    or >= 0xffe8 and <= 0xffee
    or >= 0x16fe0 and <= 0x16fe4
    or >= 0x16ff0 and <= 0x16ff1
    or >= 0x17000 and <= 0x187f7
    or >= 0x18800 and <= 0x18cd5
    or >= 0x18cff and <= 0x18d08
    or >= 0x1aff0 and <= 0x1aff3
    or >= 0x1aff5 and <= 0x1affb
    or >= 0x1affd and <= 0x1affe
    or >= 0x1b000 and <= 0x1b122
    or 0x1b132
    or >= 0x1b150 and <= 0x1b152
    or 0x1b155
    or >= 0x1b164 and <= 0x1b167
    or >= 0x1b170 and <= 0x1b2fb
    or >= 0x1d300 and <= 0x1d356
    or >= 0x1d360 and <= 0x1d376
    or 0x1f200
    or 0x1f202
    or >= 0x1f210 and <= 0x1f219
    or >= 0x1f21b and <= 0x1f22e
    or >= 0x1f230 and <= 0x1f231
    or 0x1f237
    or 0x1f23b
    or >= 0x1f240 and <= 0x1f248
    or >= 0x1f260 and <= 0x1f265
    or >= 0x20000 and <= 0x3fffd;
Reference Source Code for Python
is_cjk = 0x1100 <= cp <= 0x11ff \
    or cp == 0x20a9 \
    or 0x2329 <= cp <= 0x232a \
    or 0x2630 <= cp <= 0x2637 \
    or 0x268a <= cp <= 0x268f \
    or 0x2e80 <= cp <= 0x2e99 \
    or 0x2e9b <= cp <= 0x2ef3 \
    or 0x2f00 <= cp <= 0x2fd5 \
    or 0x2ff0 <= cp <= 0x303e \
    or 0x3041 <= cp <= 0x3096 \
    or 0x3099 <= cp <= 0x30ff \
    or 0x3105 <= cp <= 0x312f \
    or 0x3131 <= cp <= 0x318e \
    or 0x3190 <= cp <= 0x31e5 \
    or 0x31ef <= cp <= 0x321e \
    or 0x3220 <= cp <= 0x3247 \
    or 0x3250 <= cp <= 0xa48c \
    or 0xa490 <= cp <= 0xa4c6 \
    or 0xa960 <= cp <= 0xa97c \
    or 0xac00 <= cp <= 0xd7a3 \
    or 0xd7b0 <= cp <= 0xd7c6 \
    or 0xd7cb <= cp <= 0xd7fb \
    or 0xf900 <= cp <= 0xfaff \
    or 0xfe10 <= cp <= 0xfe19 \
    or 0xfe30 <= cp <= 0xfe52 \
    or 0xfe54 <= cp <= 0xfe66 \
    or 0xfe68 <= cp <= 0xfe6b \
    or 0xff01 <= cp <= 0xffbe \
    or 0xffc2 <= cp <= 0xffc7 \
    or 0xffca <= cp <= 0xffcf \
    or 0xffd2 <= cp <= 0xffd7 \
    or 0xffda <= cp <= 0xffdc \
    or 0xffe0 <= cp <= 0xffe6 \
    or 0xffe8 <= cp <= 0xffee \
    or 0x16fe0 <= cp <= 0x16fe4 \
    or 0x16ff0 <= cp <= 0x16ff1 \
    or 0x17000 <= cp <= 0x187f7 \
    or 0x18800 <= cp <= 0x18cd5 \
    or 0x18cff <= cp <= 0x18d08 \
    or 0x1aff0 <= cp <= 0x1aff3 \
    or 0x1aff5 <= cp <= 0x1affb \
    or 0x1affd <= cp <= 0x1affe \
    or 0x1b000 <= cp <= 0x1b122 \
    or cp == 0x1b132 \
    or 0x1b150 <= cp <= 0x1b152 \
    or cp == 0x1b155 \
    or 0x1b164 <= cp <= 0x1b167 \
    or 0x1b170 <= cp <= 0x1b2fb \
    or 0x1d300 <= cp <= 0x1d356 \
    or 0x1d360 <= cp <= 0x1d376 \
    or cp == 0x1f200 \
    or cp == 0x1f202 \
    or 0x1f210 <= cp <= 0x1f219 \
    or 0x1f21b <= cp <= 0x1f22e \
    or 0x1f230 <= cp <= 0x1f231 \
    or cp == 0x1f237 \
    or cp == 0x1f23b \
    or 0x1f240 <= cp <= 0x1f248 \
    or 0x1f260 <= cp <= 0x1f265 \
    or 0x20000 <= cp <= 0x3fffd

SVS following CJK code points

  • U+FE00..U+FE02
  • U+FE0E

EAW is treated as "W" if unassigned (defined by Unicode)

Note

The following result is extracted from https://www.unicode.org/Public/16.0.0/ucd/EastAsianWidth.txt. It is slightly different from https://www.unicode.org/reports/tr11/#Unassigned. U+2FFFE, U+2FFFF, U+3FFFE, and U+3FFFF are missing, but they are "Noncharacter", not "Unassigned" (or "Reserved"). This shows that we do not have to care about whether they are included in the list of CJK code points or not. To simplify the ranges, U+2FFFE and U+2FFFF are merged to U+20000–U+2FFFD here.

  • U+3400..U+4DBF (㐀..䶿)
  • U+4E00..U+9FFF (一..鿿)
  • U+F900..U+FAFF (豈..﫿)
  • U+20000..U+3FFFD (𠀀..𿿽)