Skip to content

Commit

Permalink
Update Dictionary Data
Browse files Browse the repository at this point in the history
  • Loading branch information
graphemecluster committed Sep 26, 2024
1 parent 8cd3bfc commit 649d271
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 36 deletions.
50 changes: 36 additions & 14 deletions src/Trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,44 @@ type Node = Map<string, Node> & { v?: string[]; m?: WeakMap<Trie, string[] | nul

const root: Node = new Map();

const a = Array.from(data);
const n = [root];
let s = Array.from(data);
let n = [root];
let l = [0];
for (let i = 1; n.length;) {
const k: string[] = [];
while (a[i].codePointAt(0)! >= 256) k.push(a[i++]);
const f = k.reduce((t, c) => {
const u: Node = new Map();
t.set(c, u);
return u;
}, n[n.length - 1]);
let p = "";
while (a[i].codePointAt(0)! < 123 || a[i] === "|") p += a[i++];
if (p) f.v = p.split("|").map(decodeJyutping);
if (a[i] === "{") i++, n.push(f);
else if (a[i] === "}") i++, n.pop();
let p = n[n.length - 1];
let d = l[l.length - 1];
while (s[i].codePointAt(0)! >= 256) {
const u = new Map();
p.set(s[i++], u);
p = u;
d++;
}
const v: string[] = [];
while (s[i].codePointAt(0)! < 123) {
const w: string[] = [];
for (let c = 0; c < d;) {
w.push(decodeJyutping((s[i++].charCodeAt(0) - 33) * 90 + (s[i++].charCodeAt(0) - 33)));
if (s[i] === "~") i++;
else c++;
}
v.push(w.join(" "));
}
if (v.length) p.v = v;
if (s[i] === "{") {
i++;
n.push(p);
l.push(d);
}
else if (s[i] === "}") {
i++;
n.pop();
l.pop();
}
}
// Release memory
n = undefined!;
l = undefined!;
s = undefined!;

export class Trie {
get(s: string) {
Expand Down
2 changes: 1 addition & 1 deletion src/trie.txt

Large diffs are not rendered by default.

28 changes: 7 additions & 21 deletions src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,25 +205,11 @@ const nucleus = ["aa", "a", "e", "i", "o", "u"];
const rhyme = ["oe", "oen", "oeng", "oet", "oek", "eoi", "eon", "eot", "yu", "yun", "yut", "m", "ng"];
const coda = ["", "i", "u", "m", "n", "ng", "p", "t", "k"];

export function decodeJyutping(s: string) {
return Array.from(iteratePairs(s), ([x, y]) => {
const order = (x.charCodeAt(0) - 33) * 90 + (y.charCodeAt(0) - 33);
const final = ~~((order % 402) / 6);
return (
onset[~~(order / 402)]
+ (final >= 54 ? rhyme[final - 54] : nucleus[~~(final / 9)] + coda[final % 9])
+ ((order % 6) + 1)
);
}).join(" ");
}

function* iteratePairs(s: string) {
const it = s[Symbol.iterator]();
for (;;) {
const x = it.next();
if (x.done) return;
const y = it.next();
if (y.done) return;
yield [x.value, y.value] as const;
}
export function decodeJyutping(id: number) {
const final = ~~((id % 402) / 6);
return (
onset[~~(id / 402)]
+ (final >= 54 ? rhyme[final - 54] : nucleus[~~(final / 9)] + coda[final % 9])
+ ((id % 6) + 1)
);
}

0 comments on commit 649d271

Please sign in to comment.