Update

SkyEye-FAST · Aug 17, 2024 · efb2bef · efb2bef
1 parent a8a0d45
commit efb2bef
Show file tree

Hide file tree

Showing 5 changed files with 6,948 additions and 4 deletions.
diff --git a/base.py b/base.py
@@ -28,6 +28,26 @@ def load_json(file: str, folder: str = "data") -> Ldata:
         return json.load(f)
 
 
+def file_size(p: Path):
+    """
+    计算文件大小。
+
+    Args:
+        p (Path): 需要计算大小的文件路径
+
+    Returns:
+        str: 文件大小
+    """
+
+    size_in_bytes = p.stat().st_size
+    size = (
+        f"{round(size_in_bytes / 1048576, 2)} MB"
+        if size_in_bytes > 1048576
+        else f"{round(size_in_bytes / 1024, 2)} KB"
+    )
+    return size
+
+
 # 读取语言文件
 data: Dict[str, Ldata] = {
     lang_name: load_json(lang_name, "mc_lang/full") for lang_name in ["en_us", "zh_cn"]

diff --git a/converter.py b/converter.py
@@ -13,7 +13,17 @@
 import jieba
 from opencc import OpenCC
 
-from base import P, Ldata, load_json, pinyin_to, gr_values, cy_values, finals, rep_zh
+from base import (
+    P,
+    Ldata,
+    load_json,
+    file_size,
+    pinyin_to,
+    gr_values,
+    cy_values,
+    finals,
+    rep_zh,
+)
 
 # 初始化OpenCC
 opencc_s2c = OpenCC(str(P / "GujiCC" / "opencc" / "s2c.json"))
@@ -124,6 +134,35 @@ def segment_str(text: str, auto_cut: bool = True) -> List[str]:
     return jieba.lcut(text) if auto_cut else text.split()
 
 
+def to_i7h(text: str) -> str:
+    """
+    将字符串中的所有单词缩写。
+    保留单词的首尾字符，中间用字符数替代。
+    长度为2或以下的单词保持不变。
+
+    Args:
+        text (str): 需要转换的字符串
+
+    Returns:
+        str: 转换结果
+    """
+
+    words = re.findall(r"\w+", text)
+    results = []
+
+    for word in words:
+        if len(word) > 2:
+            result = f"{word[0]}{len(word) - 2}{word[-1]}"
+        else:
+            result = word
+        results.append(result)
+
+    for word, result in zip(words, results):
+        text = text.replace(word, result, 1)
+
+    return text
+
+
 def to_katakana(text: str, rep: Ldata) -> str:
     """
     将字符串中的英文转写为片假名。
@@ -394,5 +433,5 @@ def save_to_json(
     file_path = P / output_folder / f"{output_file}.json"
     with open(file_path, "w", encoding="utf-8") as j:
         json.dump(input_dict, j, indent=2, ensure_ascii=False)
-    size = f"{round(file_path.stat().st_size / 1024, 2)} KB"
+    size = file_size(file_path)
     print(f"已生成语言文件“{output_file}.json”，大小{size}，耗时{elapsed_time:.2f} s。")