diff --git a/tools/i18n/locale_diff.py b/tools/i18n/locale_diff.py deleted file mode 100644 index 674f7dd..0000000 --- a/tools/i18n/locale_diff.py +++ /dev/null @@ -1,47 +0,0 @@ -import json -import os -from collections import OrderedDict - -# Define the standard file name -standard_file = "locale/zh_CN.json" - -# Find all JSON files in the directory -dir_path = "locale/" -languages = [ - os.path.join(dir_path, f) - for f in os.listdir(dir_path) - if f.endswith(".json") and f != standard_file -] - -# Load the standard file -with open(standard_file, "r", encoding="utf-8") as f: - standard_data = json.load(f, object_pairs_hook=OrderedDict) - -# Loop through each language file -for lang_file in languages: - # Load the language file - with open(lang_file, "r", encoding="utf-8") as f: - lang_data = json.load(f, object_pairs_hook=OrderedDict) - - # Find the difference between the language file and the standard file - diff = set(standard_data.keys()) - set(lang_data.keys()) - - miss = set(lang_data.keys()) - set(standard_data.keys()) - - # Add any missing keys to the language file - for key in diff: - lang_data[key] = key - - # Del any extra keys to the language file - for key in miss: - del lang_data[key] - - # Sort the keys of the language file to match the order of the standard file - lang_data = OrderedDict( - sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0])) - ) - - # Save the updated language file - with open(lang_file, "w", encoding="utf-8") as f: - json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True) - f.write("\n") diff --git a/tools/i18n/scan_i18n.py b/tools/i18n/scan_i18n.py index f3e52cf..4e483a1 100644 --- a/tools/i18n/scan_i18n.py +++ b/tools/i18n/scan_i18n.py @@ -1,8 +1,14 @@ import ast import glob import json +import os from collections import OrderedDict +I18N_JSON_DIR : os.PathLike = os.path.join(os.path.dirname(os.path.relpath(__file__)), 'locale') +DEFAULT_LANGUAGE: str = "zh_CN" # 默认语言 +TITLE_LEN : int = 60 # 标题显示长度 +KEY_LEN : int = 30 # 键名显示长度 +SHOW_KEYS : bool = False # 是否显示键信息 def extract_i18n_strings(node): i18n_strings = [] @@ -21,55 +27,83 @@ def extract_i18n_strings(node): return i18n_strings +def scan_i18n_strings(): + """ + scan the directory for all .py files (recursively) + for each file, parse the code into an AST + for each AST, extract the i18n strings + """ + strings = [] + print(" Scanning Files and Extracting i18n Strings ".center(TITLE_LEN, "=")) + for filename in glob.iglob("**/*.py", recursive=True): + with open(filename, "r", encoding="utf-8") as f: + code = f.read() + if "I18nAuto" in code: + tree = ast.parse(code) + i18n_strings = extract_i18n_strings(tree) + print(f"{filename.ljust(30)}: {len(i18n_strings)}") + strings.extend(i18n_strings) -# scan the directory for all .py files (recursively) -# for each file, parse the code into an AST -# for each AST, extract the i18n strings + code_keys = set(strings) + print(f"{'Total Unique'.ljust(30)}: {len(code_keys)}") + return code_keys -strings = [] -for filename in glob.iglob("**/*.py", recursive=True): - with open(filename, "r") as f: - code = f.read() - if "I18nAuto" in code: - tree = ast.parse(code) - i18n_strings = extract_i18n_strings(tree) - print(filename, len(i18n_strings)) - strings.extend(i18n_strings) -code_keys = set(strings) -""" -n_i18n.py -gui_v1.py 26 -app.py 16 -infer-web.py 147 -scan_i18n.py 0 -i18n.py 0 -lib/train/process_ckpt.py 1 -""" -print() -print("Total unique:", len(code_keys)) +def update_i18n_json(json_file, standard_keys): + print(f" Process {json_file} ".center(TITLE_LEN, "=")) + # 读取 JSON 文件 + with open(json_file, "r", encoding="utf-8") as f: + json_data = json.load(f, object_pairs_hook=OrderedDict) + # 打印处理前的 JSON 条目数 + len_before = len(json_data) + print(f"{'Total Keys'.ljust(KEY_LEN)}: {len_before}") + # 识别缺失的键并补全 + miss_keys = set(standard_keys) - set(json_data.keys()) + if len(miss_keys) > 0: + print(f"{'Missing Keys (+)'.ljust(KEY_LEN)}: {len(miss_keys)}") + for key in miss_keys: + if DEFAULT_LANGUAGE in json_file: + # 默认语言的键值相同. + json_data[key] = key + else: + # 其他语言的值设置为 #! + 键名以标注未被翻译. + json_data[key] = "#!" + key + if SHOW_KEYS: + print(f"{'Added Missing Key'.ljust(KEY_LEN)}: {key}") + # 识别多余的键并删除 + diff_keys = set(json_data.keys()) - set(standard_keys) + if len(diff_keys) > 0: + print(f"{'Unused Keys (-)'.ljust(KEY_LEN)}: {len(diff_keys)}") + for key in diff_keys: + del json_data[key] + if SHOW_KEYS: + print(f"{'Removed Unused Key'.ljust(KEY_LEN)}: {key}") + # 按键顺序排序 + json_data = OrderedDict( + sorted(json_data.items(), + key=lambda x: list(standard_keys).index(x[0]))) + # 打印处理后的 JSON 条目数 + if len(miss_keys) != 0 or len(diff_keys) != 0: + print(f"{'Total Keys (After)'.ljust(KEY_LEN)}: {len(json_data)}") + # 识别有待翻译的键 + num_miss_translation = 0 + for key, value in json_data.items(): + if value.startswith("#!"): + num_miss_translation += 1 + if SHOW_KEYS: + print(f"{'Missing Translation'.ljust(KEY_LEN)}: {key}") + if num_miss_translation > 0: + print(f"\033[31m{'[Failed] Missing Translation'.ljust(KEY_LEN)}: {num_miss_translation}\033[0m") + else: + print(f"\033[32m[Passed] All Keys Translated\033[0m") + # 将处理后的结果写入 JSON 文件 + with open(json_file, "w", encoding="utf-8") as f: + json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=True) + f.write("\n") + print(f" Updated {json_file} ".center(TITLE_LEN, "=") + '\n') - -standard_file = "i18n/locale/zh_CN.json" -with open(standard_file, "r", encoding="utf-8") as f: - standard_data = json.load(f, object_pairs_hook=OrderedDict) -standard_keys = set(standard_data.keys()) - -# Define the standard file name -unused_keys = standard_keys - code_keys -print("Unused keys:", len(unused_keys)) -for unused_key in unused_keys: - print("\t", unused_key) - -missing_keys = code_keys - standard_keys -print("Missing keys:", len(missing_keys)) -for missing_key in missing_keys: - print("\t", missing_key) - -code_keys_dict = OrderedDict() -for s in strings: - code_keys_dict[s] = s - -# write back -with open(standard_file, "w", encoding="utf-8") as f: - json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) - f.write("\n") +if __name__ == "__main__": + code_keys = scan_i18n_strings() + for json_file in os.listdir(I18N_JSON_DIR): + if json_file.endswith(r".json"): + json_file = os.path.join(I18N_JSON_DIR, json_file) + update_i18n_json(json_file, code_keys) \ No newline at end of file