diff --git a/tools/i18n/scan_i18n.py b/tools/i18n/scan_i18n.py index f3e52cf4..b542c102 100644 --- a/tools/i18n/scan_i18n.py +++ b/tools/i18n/scan_i18n.py @@ -1,6 +1,7 @@ import ast import glob import json +import os from collections import OrderedDict @@ -28,7 +29,7 @@ def extract_i18n_strings(node): strings = [] for filename in glob.iglob("**/*.py", recursive=True): - with open(filename, "r") as f: + with open(filename, "r", encoding="utf-8") as f: code = f.read() if "I18nAuto" in code: tree = ast.parse(code) @@ -45,11 +46,13 @@ scan_i18n.py 0 i18n.py 0 lib/train/process_ckpt.py 1 """ -print() print("Total unique:", len(code_keys)) +I18N_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'locale') +# "i18n/locale" +DEFAULT_LANGUAGE = "zh_CN" -standard_file = "i18n/locale/zh_CN.json" +standard_file = os.path.join(I18N_FILE_PATH, DEFAULT_LANGUAGE + ".json") with open(standard_file, "r", encoding="utf-8") as f: standard_data = json.load(f, object_pairs_hook=OrderedDict) standard_keys = set(standard_data.keys()) @@ -73,3 +76,39 @@ for s in strings: with open(standard_file, "w", encoding="utf-8") as f: json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) f.write("\n") + +languages_files = [os.path.join(I18N_FILE_PATH, f) for f in os.listdir(I18N_FILE_PATH) if f.endswith(r".json") and f != DEFAULT_LANGUAGE + ".json"] + +# print(os.listdir(I18N_FILE_PATH)) +# print(languages_files) +for language_file in languages_files: + print(f"Processing {language_file}".center(100, "=")) + with open(language_file, "r", encoding="utf-8") as f: + language_data = json.load(f, object_pairs_hook=OrderedDict) + + diff = set(standard_data.keys()) - set(language_data.keys()) + miss = set(language_data.keys()) - set(standard_data.keys()) + + for key in diff: + language_data[key] = "#!" + key + print(f"Added missing key {key} to {language_file}") + + for key in miss: + del language_data[key] + print(f"Removed unused key {key} from {language_file}") + + language_data = OrderedDict( + sorted(language_data.items(), + key=lambda x: list(standard_data.keys()).index(x[0]))) + + for key, value in language_data.items(): + if value.startswith("#!"): + print(f"Missing translation for {key} in {language_file}") + + with open(language_file, "w", encoding="utf-8") as f: + json.dump(language_data, f, ensure_ascii=False, indent=4, sort_keys=True) + f.write("\n") + + print(f"Updated {language_file}".center(100, "=") + '\n') + +print("Finished") \ No newline at end of file