Skip to content

Commit cb6ed50

Browse files
committed
non-issue : 从csv构建Emoji的Unicode范围
1 parent ad3ff4c commit cb6ed50

8 files changed

+135
-51
lines changed

composer.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
}
1414
],
1515
"minimum-stability": "stable",
16-
"require": {},
16+
"require": {
17+
"league/csv": "^8.2"
18+
},
1719
"autoload": {
1820
"psr-4": {
1921
"Hidehalo\\Emoji\\": "src/",

composer.lock

+85-28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/Features/EmojiParser.php

+33-9
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,13 @@
22

33
namespace Hidehalo\Emoji\Features;
44

5+
use League\Csv\Reader;
56
use Hidehalo\Emoji\Unicode\Emoji;
67

78
class EmojiParser extends UnicodeParser
89
{
910
/* @link http://unicode.org/emoji/charts/full-emoji-list.html */
1011
private $maps = [
11-
[0x00A9, 0x00AE],
12-
[0x200D, 0x2B55],
13-
[0x3030, 0x303D],
14-
[0x3297, 0x3299],
15-
[0xFE0F, 0xFE0F],
16-
[0xFE30, 0xFE4F],
17-
[0x1F004, 0x1F9E6],
1812
[0xE0062, 0xE007F],
1913
];
2014
/**
@@ -24,6 +18,7 @@ class EmojiParser extends UnicodeParser
2418

2519
public function __construct(array $config = [])
2620
{
21+
$this->maps = array_merge($this->maps, $this->buildMapFromCsv());
2722
$this->pattern = $this->buildRegex($this->maps);
2823
}
2924

@@ -89,13 +84,42 @@ private function buildRegex($maps)
8984
$pattern = '';
9085
if ($maps) {
9186
foreach ($maps as $range) {
92-
$min = $range[0];
93-
$max = $range[1];
87+
if (is_array($range)) {
88+
$min = $range[0];
89+
$max = $range[1];
90+
} else {
91+
$min = $max = $range;
92+
}
9493
$pattern .= $this->getSymbol($min).'-'.$this->getSymbol($max);
9594
}
9695
$pattern = '/['.$pattern.']/u';
9796
}
9897

9998
return $pattern;
10099
}
100+
101+
private function buildMapFromCsv()
102+
{
103+
$reader = Reader::createFromPath(__DIR__.'/../../data/emoji.csv');
104+
$result = $reader->fetchAssoc(['codepoints']);
105+
$maps = [];
106+
foreach ($result as $row) {
107+
$codepoint = trim($row['codepoints']);
108+
if ($codepoint == 'codepoints')
109+
continue;
110+
if ($this->isRange($codepoint)) {
111+
list($min, $max) = explode('..',$codepoint);
112+
$maps[] = [hexdec("0x$min"), hexdec("0x$max")];
113+
} else {
114+
$maps[] = hexdec("0x$codepoint");
115+
}
116+
}
117+
118+
return $maps;
119+
}
120+
121+
private function isRange($codepoints)
122+
{
123+
return strpos($codepoints, '..');
124+
}
101125
}

test/Unit/CoreTest.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ class CoreTest extends TestCase
99
{
1010
public function setUp()
1111
{
12-
$this->raw = 'Hello ';
13-
$this->utf8string = 'Hello [:9787]';
14-
$this->htmlncr = 'Hello ☻';
12+
$this->raw = 'Hello 😂';
13+
$this->utf8string = 'Hello [:128514]';
14+
$this->htmlncr = 'Hello 😂';
1515
}
1616

1717
public function tearDown()

0 commit comments

Comments
 (0)