-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBidi.php
163 lines (140 loc) · 4.65 KB
/
Bidi.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
<?php
/**
* Bidi class file
*
* @package spinitron/precis
* @copyright Copyright (c) 2015 Spinitron LLC
* @license ISC https://opensource.org/licenses/ISC
*/
namespace spinitron\precis;
/**
* Provides static methods to find Unicode Bidi property of characters and apply the IDNA
* Bidi Rule to strings.
*
* @package spinitron\precis
*/
class Bidi
{
use BidiDataTrait;
/**
* Returns the Bidi class of a character.
*
* @param string $char a single-character UTF-8 string
*
* @return string the Bidi class alias (e.g. 'L', 'R', 'ON', etc.) of the character
*/
public static function getClass($char)
{
// Ensure input is only one character.
$char = mb_substr($char, 0, 1, 'UTF-8');
// Get the char's Unicode code point as PHP integer.
$ord = Precis::utf8ord($char);
// Lookup the individual code points.
if (isset(static::$codePoints[$ord])) {
return static::$codePoints[$ord];
}
$class = static::searchRanges(static::$codePointRanges, $ord);
if ($class) {
return $class;
}
if (preg_match('%\p{Cu}%u', $char)) {
$class = static::searchRanges(static::$unassignedDefaultRanges, $ord);
if ($class) {
return $class;
}
// DerivedBidiClass.txt says Default_Ignorable_Code_Point and Noncharacter_Code_Point
// (the same as Precis::CC_IGNORABLE) are BR.
if (preg_match('%[' . Precis::CC_IGNORABLE . ']%u', $char)) {
return 'BR';
}
}
return 'L';
}
/**
* Searches for a code point within ranges to find Bidi properties.
*
* @param array[] $ranges the ranges to search
* @param int $ord code point to search for as an integer
*
* @return string|null the Bidi class alias (e.g. 'L', 'R', 'ON', etc.) of the
* code point or null if was not found in the given ranges
*/
protected static function searchRanges($ranges, $ord)
{
// Binary search the code point ranges.
$low = 0;
$high = count($ranges) - 1;
do {
$mid = (int) floor($low + ($high - $low) / 2);
$range = $ranges[$mid];
if ($range[0] <= $ord) {
if ($ord <= $range[1]) {
return $range[2];
}
if ($ord < $ranges[$mid + 1][0]) {
return 'L';
}
$low = $mid;
} else {
$high = $mid;
}
} while ($high - $low > 0);
return null;
}
/**
* Applies the RFC 5893 Bidi rule to a string.
*
* @param string $string the string to test for the Bidi Rule
*
* @return bool Whether the string passes all conditions of the Bidi rule or not.
*/
public static function rule($string)
{
$chars = preg_split('//u', $string, null, PREG_SPLIT_NO_EMPTY);
// 1. RTL or LTR
// Remove the first char. It is guaranteed to pass test 2/5 after passing test 1.
$firstClass = static::getClass(array_shift($chars));
if ($firstClass === 'R' || $firstClass === 'AL') {
// 3. RTL End char
if (!in_array(static::getClass(end($chars)), ['R', 'AL', 'EN', 'AN'])) {
return false;
}
// Keep state for test 4.
$en = false;
// 2. Classes allowed in RTL
foreach ($chars as $char) {
$class = static::getClass($char);
if (!in_array($class, ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'])) {
return false;
}
if ($class === 'EN') {
$en = true;
}
}
// 4. If EN then no AN
if ($en) {
foreach ($chars as $char) {
if (static::getClass($char) === 'AN') {
return false;
}
}
}
return true;
}
if ($firstClass === 'L') {
// 6. LTR End char.
// Last char passes test 5 if it passes test 6, so remove it.
if (!in_array(static::getClass(array_pop($chars)), ['L', 'EN'])) {
return false;
}
// 5. Classes allowed in LTR.
foreach ($chars as $char) {
if (!in_array(static::getClass($char), ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'])) {
return false;
}
}
return true;
}
return false;
}
}