rewrite

2025-10-07 23:48:48 +08:00 · 2024-07-12 01:14:53 +08:00 · 2024-07-12 01:14:53 +08:00 · 70bfba6e63
commit 70bfba6e63
parent 7e3a340b9a
1 changed files with 73 additions and 81 deletions
--- a/tools/i18n/scan_i18n.py
+++ b/tools/i18n/scan_i18n.py
@ -1,9 +1,14 @@
 import ast
+import code
 import glob
 import json
 import os
 from collections import OrderedDict

+I18N_JSON_DIR    = os.path.join(os.path.dirname(os.path.relpath(__file__)), 'locale')
+DEFAULT_LANGUAGE = "zh_CN"
+TITLE_LEN        = 100
+SHOW_KEYS        = False # 是否显示键信息

 def extract_i18n_strings(node):
    i18n_strings = []
@ -22,93 +27,80 @@ def extract_i18n_strings(node):

    return i18n_strings

-
 # scan the directory for all .py files (recursively)
 # for each file, parse the code into an AST
 # for each AST, extract the i18n strings

-strings = []
-for filename in glob.iglob("**/*.py", recursive=True):
-    with open(filename, "r", encoding="utf-8") as f:
-        code = f.read()
-        if "I18nAuto" in code:
-            tree = ast.parse(code)
-            i18n_strings = extract_i18n_strings(tree)
-            print(filename, len(i18n_strings))
-            strings.extend(i18n_strings)
-code_keys = set(strings)
-"""
-n_i18n.py
-gui_v1.py 26
-app.py 16
-infer-web.py 147
-scan_i18n.py 0
-i18n.py 0
-lib/train/process_ckpt.py 1
-"""
-print("Total unique:", len(code_keys))
+def scan_i18n_strings():
+    strings = []
+    print(" Scanning Files and Extracting i18n Strings ".center(TITLE_LEN, "="))
+    for filename in glob.iglob("**/*.py", recursive=True):
+        with open(filename, "r", encoding="utf-8") as f:
+            code = f.read()
+            if "I18nAuto" in code:
+                tree = ast.parse(code)
+                i18n_strings = extract_i18n_strings(tree)
+                print(f"{filename.ljust(30)}: {len(i18n_strings)}")
+                strings.extend(i18n_strings)

-I18N_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'locale')
-# "i18n/locale"
-DEFAULT_LANGUAGE = "zh_CN"
+    code_keys = set(strings)
+    print(f"{'Total Unique'.ljust(30)}: {len(code_keys)}")
+    return code_keys

-standard_file = os.path.join(I18N_FILE_PATH, DEFAULT_LANGUAGE + ".json")
-with open(standard_file, "r", encoding="utf-8") as f:
-    standard_data = json.load(f, object_pairs_hook=OrderedDict)
-standard_keys = set(standard_data.keys())
-
-# Define the standard file name
-unused_keys = standard_keys - code_keys
-print("Unused keys:", len(unused_keys))
-for unused_key in unused_keys:
-    print("\t", unused_key)
-
-missing_keys = code_keys - standard_keys
-print("Missing keys:", len(missing_keys))
-for missing_key in missing_keys:
-    print("\t", missing_key)
-
-code_keys_dict = OrderedDict()
-for s in strings:
-    code_keys_dict[s] = s
-
-# write back
-with open(standard_file, "w", encoding="utf-8") as f:
-    json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)
-    f.write("\n")
-
-languages_files = [os.path.join(I18N_FILE_PATH, f) for f in os.listdir(I18N_FILE_PATH) if f.endswith(r".json") and f != DEFAULT_LANGUAGE + ".json"]
-
-# print(os.listdir(I18N_FILE_PATH))
-# print(languages_files)
-for language_file in languages_files:
-    print(f"Processing {language_file}".center(100, "="))
-    with open(language_file, "r", encoding="utf-8") as f:
-        language_data = json.load(f, object_pairs_hook=OrderedDict)
-
-    diff = set(standard_data.keys()) - set(language_data.keys())
-    miss = set(language_data.keys()) - set(standard_data.keys())
-
-    for key in diff:
-        language_data[key] = "#!" + key
-        print(f"Added missing key {key} to {language_file}")
-    
-    for key in miss:
-        del language_data[key]
-        print(f"Removed unused key {key} from {language_file}")
-
-    language_data = OrderedDict(
-        sorted(language_data.items(), 
-        key=lambda x: list(standard_data.keys()).index(x[0])))
-
-    for key, value in language_data.items():
+def update_i18n_json(json_file, standard_keys):
+    print(f" Process {json_file} ".center(TITLE_LEN, "="))
+    # 读取 JSON 文件
+    with open(json_file, "r", encoding="utf-8") as f:
+        json_data = json.load(f, object_pairs_hook=OrderedDict)
+    # 打印处理前的 JSON 条目数
+    print(f"{'Total Keys (Before)'.ljust(20)}: {len(json_data)}")
+    # 识别缺失的键并补全
+    miss_keys = set(standard_keys) - set(json_data.keys())
+    if len(miss_keys) > 0:
+        print(f"{'Missing Keys (+)'.ljust(20)}: {len(miss_keys)}")
+        for key in miss_keys:
+            if DEFAULT_LANGUAGE in json_file:
+                # 默认语言的键值相同.
+                json_data[key] = key
+            else:
+                # 其他语言的值设置为 #! + 键名以标注未被翻译.
+                json_data[key] = "#!" + key
+            if SHOW_KEYS:
+                print(f"{'Added Missing Key'.ljust(20)}: {key}")
+    # 识别多余的键并删除
+    diff_keys = set(json_data.keys()) - set(standard_keys)
+    if len(diff_keys) > 0:
+        print(f"{'Unused Keys  (-)'.ljust(20)}: {len(diff_keys)}")    
+        for key in diff_keys:
+            del json_data[key]
+            if SHOW_KEYS:
+                print(f"{'Removed Unused Key'.ljust(20)}: {key}")
+    # 按键顺序排序
+    json_data = OrderedDict(
+        sorted(json_data.items(), 
+        key=lambda x: list(standard_keys).index(x[0])))
+    # 打印处理后的 JSON 条目数
+    print(f"{'Total Keys (After)'.ljust(20)}: {len(json_data)}")
+    # 识别有待翻译的键
+    num_miss_translation = 0
+    for key, value in json_data.items():
        if value.startswith("#!"):
-            print(f"Missing translation for {key} in {language_file}")
-
-    with open(language_file, "w", encoding="utf-8") as f:
-        json.dump(language_data, f, ensure_ascii=False, indent=4, sort_keys=True)
+            num_miss_translation += 1
+            if SHOW_KEYS:
+                print(f"{'Missing Translation'.ljust(20)}: {key}")
+    if num_miss_translation > 0:
+        print(f"{'Missing Translation'.ljust(20)}: {num_miss_translation}")
+    # 将处理后的结果写入 JSON 文件
+    with open(json_file, "w", encoding="utf-8") as f:
+        json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=True)
        f.write("\n")
+    print(f" Updated {json_file} ".center(TITLE_LEN, "=") + '\n')

-    print(f"Updated {language_file}".center(100, "=") + '\n')
-
-print("Finished")
+if __name__ == "__main__":
+    code_keys = scan_i18n_strings()
+    for json_file in os.listdir(I18N_JSON_DIR):
+        if json_file.endswith(r".json"):
+            json_file = os.path.join(I18N_JSON_DIR, json_file)
+            update_i18n_json(json_file, code_keys)
+        else:
+            pass