Update

SkyEye-FAST · Aug 15, 2024 · 9c7d75e · 9c7d75e
1 parent afaf607
commit 9c7d75e
Show file tree

Hide file tree

Showing 7 changed files with 87 additions and 108 deletions.
diff --git a/base.py b/base.py
@@ -53,3 +53,5 @@ def load_json(file: str, folder: str = "data") -> Ldata:
 
 rep_zh: Ldata = load_json("rep_zh")  # 连写的中文转写方案替换修正
 finals: Tuple[str, ...] = tuple("aāááàoōóǒòeēéěè")  # 可能的零声母开头
+
+rep_ja_kk: Ldata = load_json("rep_ja_kk")  # 片假名替换修正
diff --git a/converter.py b/converter.py
@@ -5,7 +5,7 @@
 import re
 import time
 import inspect
-from typing import Dict, List, Set
+from typing import List, Set, Tuple, Callable, Optional
 
 from romajitable import to_kana as tk
 from pypinyin import Style, lazy_pinyin, load_phrases_dict
@@ -26,7 +26,6 @@
 # 初始化其他自定义数据
 fixed_zh_u = load_json("fixed_zh_universal")
 tone_to_ipa: Ldata = {"1": "˥", "2": "˧˥", "3": "˨˩˦", "4": "˥˩", "5": ""}  # IPA声调
-rep_ja_kk: Ldata = load_json("rep_ja_kk")  # 片假名替换修正
 manyoganas_dict: Ldata = load_json("manyogana")  # 万叶假名
 
 
@@ -121,32 +120,34 @@ def segment_str(text: str, auto_cut: bool = True) -> List[str]:
     return jieba.lcut(text) if auto_cut else text.split()
 
 
-def to_katakana(text: str) -> str:
+def to_katakana(text: str, rep: Ldata) -> str:
     """
     将字符串中的英文转写为片假名。
 
     Args:
         text (str): 需要转换的字符串
+        rep (Ldata): 需要替换格式的内容
 
     Returns:
         str: 转换结果
     """
 
-    return replace_multiple(tk(text).katakana, rep_ja_kk)
+    return replace_multiple(tk(text).katakana, rep)
 
 
-def to_manyogana(text: str) -> str:
+def to_manyogana(text: str, rep: Ldata) -> str:
     """
     将字符串中的片假名转写为万叶假名。
 
     Args:
         text (str): 需要转换的字符串
+        rep (Ldata): 需要替换格式的内容
 
     Returns:
         str: 转换结果
     """
 
-    return "".join(manyoganas_dict.get(char, char) for char in to_katakana(text))
+    return "".join(manyoganas_dict.get(char, char) for char in to_katakana(text, rep))
 
 
 def to_pinyin(text: str, rep: Ldata, auto_cut: bool = True) -> str:
@@ -314,22 +315,30 @@ def to_xiaojing(text: str, rep: Ldata, auto_cut: bool = True) -> str:
     return replace_multiple(" ".join(output_list), rep)
 
 
-def save_to_json(input_dict: Ldata, config: Dict) -> None:
-    """将生成的语言文件保存至JSON。
+def convert(
+    input_dict: Ldata,
+    func: Callable[[str], str],
+    fix_dict: Optional[Ldata] = None,
+    auto_cut: bool = True,
+    rep: Ldata = rep_zh,
+) -> Tuple[Ldata, float]:
+    """
+    转换语言数据。
 
     Args:
         input_dict (Ldata): 输入的数据
-        config (Dict): 含有配置的字典
+        func (Callable[[str], str]): 生成语言文件所用的函数
+        fix_dict (Optional[Ldata], optional): 语言文件中需要修复的内容. 默认为None
+        auto_cut (bool, optional): 是否自动分词，默认为True
+        rep (Ldata, optional): 需要替换的内容，默认为rep_zh的内容
+
+    Returns:
+        (Ldata, float): 转换结果及耗时
     """
 
     start_time = time.time()
 
-    func = config["func"]
-
-    auto_cut = config.get("auto_cut", True)
-    rep = config.get("rep", rep_zh)
-
-    output_dict = {}
+    output_dict: Ldata = {}
     for k, v in input_dict.items():
         func_signature = inspect.signature(func)
         kwargs = {}
@@ -339,16 +348,33 @@ def save_to_json(input_dict: Ldata, config: Dict) -> None:
             kwargs["rep"] = rep
         output_dict[k] = func(v, **kwargs)
 
-    output_dict.update(fixed_zh_u)
-    if config.get("fixed_dict"):
-        output_dict.update(config["fixed_dict"])
-    file_path = (
-        P / config.get("output_folder", "output") / f"{config['output_file']}.json"
-    )
-    with open(file_path, "w", encoding="utf-8") as j:
-        json.dump(output_dict, j, indent=2, ensure_ascii=False)
+    if rep is rep_zh:
+        output_dict.update(fixed_zh_u)
+
+    if fix_dict:
+        output_dict.update(fix_dict)
+
     elapsed_time = time.time() - start_time
+
+    return output_dict, elapsed_time
+
+
+def save_to_json(
+    input_data: Tuple[Ldata, float],
+    output_file: str,
+    output_folder: str = "output",
+) -> None:
+    """将生成的语言文件保存至JSON。
+
+    Args:
+        input_data (Tuple[Ldata, float]): 输入的数据
+        output_file (str): 保存的文件名，无格式后缀
+        output_folder (str, optional): 保存的文件夹，默认为“output”
+    """
+
+    input_dict, elapsed_time = input_data
+    file_path = P / output_folder / f"{output_file}.json"
+    with open(file_path, "w", encoding="utf-8") as j:
+        json.dump(input_dict, j, indent=2, ensure_ascii=False)
     size = f"{round(file_path.stat().st_size / 1024, 2)} KB"
-    print(
-        f"已生成语言文件“{config['output_file']}.json”，大小{size}，耗时{elapsed_time:.2f} s。"
-    )
+    print(f"已生成语言文件“{output_file}.json”，大小{size}，耗时{elapsed_time:.2f} s。")
diff --git a/data/rep_ja_kk.json b/data/rep_ja_kk.json
@@ -19,5 +19,6 @@
     "サムエル・åベルグ": "サミュエル・オーバーグ",
     "レナ・ライネ": "レナ・レイン",
     "エン_ウス": "en_us",
-    "パラディストルäド": "パラダイスツリー"
+    "パラディストルäド": "パラダイスツリー",
+    "「・フ4・」":"[ F4 ]"
 }
diff --git a/fix_data.py b/fix_data.py
@@ -4,6 +4,7 @@
 from base import load_json
 from converter import (
     save_to_json,
+    convert,
     to_pinyin,
     to_wadegiles,
     to_romatzyh,
@@ -15,52 +16,27 @@
 
 fixed_zh_source = load_json("fixed_zh_source")
 save_to_json(
-    fixed_zh_source,
-    {
-        "output_file": "fixed_zh_py",
-        "func": to_pinyin,
-        "output_folder": "data",
-        "auto_cut": False,
-        "rep": rep,
-    },
+    convert(fixed_zh_source, to_pinyin, auto_cut=False, rep=rep),
+    "fixed_zh_py",
+    "data",
 )
 save_to_json(
-    fixed_zh_source,
-    {
-        "output_file": "fixed_zh_wg",
-        "func": to_wadegiles,
-        "output_folder": "data",
-        "auto_cut": False,
-        "rep": rep,
-    },
+    convert(fixed_zh_source, to_wadegiles, auto_cut=False, rep=rep),
+    "fixed_zh_wg",
+    "data",
 )
 save_to_json(
-    fixed_zh_source,
-    {
-        "output_file": "fixed_zh_gr",
-        "func": to_romatzyh,
-        "output_folder": "data",
-        "auto_cut": False,
-        "rep": rep,
-    },
+    convert(fixed_zh_source, to_romatzyh, auto_cut=False, rep=rep),
+    "fixed_zh_gr",
+    "data",
 )
 save_to_json(
-    fixed_zh_source,
-    {
-        "output_file": "fixed_zh_cy",
-        "func": to_cyrillic,
-        "output_folder": "data",
-        "auto_cut": False,
-        "rep": rep,
-    },
+    convert(fixed_zh_source, to_cyrillic, auto_cut=False, rep=rep),
+    "fixed_zh_cy",
+    "data",
 )
 save_to_json(
-    fixed_zh_source,
-    {
-        "output_file": "fixed_zh_xj",
-        "func": to_xiaojing,
-        "output_folder": "data",
-        "auto_cut": False,
-        "rep": rep,
-    },
+    convert(fixed_zh_source, to_xiaojing, auto_cut=False, rep=rep),
+    "fixed_zh_xj",
+    "data",
 )
diff --git a/output/ja_kk.json b/output/ja_kk.json
@@ -4684,7 +4684,7 @@
   "known_server_link.status": "スタツス",
   "known_server_link.support": "スッポルト",
   "known_server_link.website": "ヱブシテ",
-  "language.code": "zho-Hans_CN",
+  "language.code": "en_us",
   "language.name": "エングリスホ",
   "language.region": "ウニテド・スタテス",
   "lanServer.otherPlayers": "セッチングス・フォル・オトヘル・プライェルス",
@@ -5547,7 +5547,7 @@
   "painting.minecraft.sunflowers.author": "クリストッフェル・ゼッテルストランド",
   "painting.minecraft.sunflowers.title": "スンフロヱルス",
   "painting.minecraft.sunset.author": "クリストッフェル・ゼッテルストランド",
-  "painting.minecraft.sunset.title": "sunset_dense",
+  "painting.minecraft.sunset.title": "スンセト_デンセ",
   "painting.minecraft.tides.author": "クリストッフェル・ゼッテルストランド",
   "painting.minecraft.tides.title": "チデス",
   "painting.minecraft.unpacked.author": "サラホ・ボエヴィング",

diff --git a/output/ja_my.json b/output/ja_my.json
@@ -4684,7 +4684,7 @@
   "known_server_link.status": "須多川須",
   "known_server_link.support": "須川保流止",
   "known_server_link.website": "恵夫之天",
-  "language.code": "zho-Hans_CN",
+  "language.code": "en_us",
   "language.name": "江尓具利須保",
   "language.region": "宇仁天特・須多天須",
   "lanServer.otherPlayers": "世川千尓具須・不於流・於止部流・不良伊江流須",
@@ -5547,7 +5547,7 @@
   "painting.minecraft.sunflowers.author": "久利須止川不江流・是川天流須止良尓特",
   "painting.minecraft.sunflowers.title": "須尓不呂恵流須",
   "painting.minecraft.sunset.author": "久利須止川不江流・是川天流須止良尓特",
-  "painting.minecraft.sunset.title": "sunset_dense",
+  "painting.minecraft.sunset.title": "須尓世止_代尓世",
   "painting.minecraft.tides.author": "久利須止川不江流・是川天流須止良尓特",
   "painting.minecraft.tides.title": "千代須",
   "painting.minecraft.unpacked.author": "散良保・番江无伊尓具",

diff --git a/pack.py b/pack.py
@@ -4,9 +4,10 @@
 import time
 import zipfile as zf
 
-from base import P, data, fixed_zh
+from base import P, data, fixed_zh, rep_ja_kk
 from converter import (
     save_to_json,
+    convert,
     to_bopomofo,
     to_cyrillic,
     to_ipa,
@@ -26,42 +27,15 @@ def main() -> None:
 
     # 生成语言文件
     main_start_time = time.time()
-    save_to_json(
-        data["en_us"],
-        {"output_file": "ja_kk", "func": to_katakana},
-    )
-    save_to_json(
-        data["en_us"],
-        {"output_file": "ja_my", "func": to_manyogana},
-    )
-    save_to_json(
-        data["zh_cn"],
-        {"output_file": "zh_py", "func": to_pinyin, "fixed_dict": fixed_zh["zh_py"]},
-    )
-    save_to_json(
-        data["zh_cn"],
-        {"output_file": "zh_ipa", "func": to_ipa},
-    )
-    save_to_json(
-        data["zh_cn"],
-        {"output_file": "zh_bpmf", "func": to_bopomofo},
-    )
-    save_to_json(
-        data["zh_cn"],
-        {"output_file": "zh_wg", "func": to_wadegiles, "fixed_dict": fixed_zh["zh_wg"]},
-    )
-    save_to_json(
-        data["zh_cn"],
-        {"output_file": "zh_gr", "func": to_romatzyh, "fixed_dict": fixed_zh["zh_gr"]},
-    )
-    save_to_json(
-        data["zh_cn"],
-        {"output_file": "zh_cy", "func": to_cyrillic, "fixed_dict": fixed_zh["zh_cy"]},
-    )
-    save_to_json(
-        data["zh_cn"],
-        {"output_file": "zh_xj", "func": to_xiaojing, "fixed_dict": fixed_zh["zh_xj"]},
-    )
+    save_to_json(convert(data["en_us"], to_katakana, rep=rep_ja_kk), "ja_kk")
+    save_to_json(convert(data["en_us"], to_manyogana, rep=rep_ja_kk), "ja_my")
+    save_to_json(convert(data["zh_cn"], to_pinyin, fixed_zh["zh_py"]), "zh_py")
+    save_to_json(convert(data["zh_cn"], to_ipa), "zh_ipa")
+    save_to_json(convert(data["zh_cn"], to_bopomofo), "zh_bpmf")
+    save_to_json(convert(data["zh_cn"], to_wadegiles, fixed_zh["zh_wg"]), "zh_wg")
+    save_to_json(convert(data["zh_cn"], to_romatzyh, fixed_zh["zh_gr"]), "zh_gr")
+    save_to_json(convert(data["zh_cn"], to_cyrillic, fixed_zh["zh_cy"]), "zh_cy")
+    save_to_json(convert(data["zh_cn"], to_xiaojing, fixed_zh["zh_xj"]), "zh_xj")
     main_elapsed_time = time.time() - main_start_time
     print(f"\n语言文件生成完毕，共耗时{main_elapsed_time:.2f} s。")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -53,3 +53,5 @@ def load_json(file: str, folder: str = "data") -> Ldata:

		rep_zh: Ldata = load_json("rep_zh") # 连写的中文转写方案替换修正
		finals: Tuple[str, ...] = tuple("aāááàoōóǒòeēéěè") # 可能的零声母开头

		rep_ja_kk: Ldata = load_json("rep_ja_kk") # 片假名替换修正