This commit is contained in:
starylan 2024-07-12 01:14:53 +08:00
parent 7e3a340b9a
commit 70bfba6e63

View File

@ -1,9 +1,14 @@
import ast
import code
import glob
import json
import os
from collections import OrderedDict
I18N_JSON_DIR = os.path.join(os.path.dirname(os.path.relpath(__file__)), 'locale')
DEFAULT_LANGUAGE = "zh_CN"
TITLE_LEN = 100
SHOW_KEYS = False # 是否显示键信息
def extract_i18n_strings(node):
i18n_strings = []
@ -22,93 +27,80 @@ def extract_i18n_strings(node):
return i18n_strings
# scan the directory for all .py files (recursively)
# for each file, parse the code into an AST
# for each AST, extract the i18n strings
strings = []
for filename in glob.iglob("**/*.py", recursive=True):
with open(filename, "r", encoding="utf-8") as f:
code = f.read()
if "I18nAuto" in code:
tree = ast.parse(code)
i18n_strings = extract_i18n_strings(tree)
print(filename, len(i18n_strings))
strings.extend(i18n_strings)
code_keys = set(strings)
"""
n_i18n.py
gui_v1.py 26
app.py 16
infer-web.py 147
scan_i18n.py 0
i18n.py 0
lib/train/process_ckpt.py 1
"""
print("Total unique:", len(code_keys))
def scan_i18n_strings():
strings = []
print(" Scanning Files and Extracting i18n Strings ".center(TITLE_LEN, "="))
for filename in glob.iglob("**/*.py", recursive=True):
with open(filename, "r", encoding="utf-8") as f:
code = f.read()
if "I18nAuto" in code:
tree = ast.parse(code)
i18n_strings = extract_i18n_strings(tree)
print(f"{filename.ljust(30)}: {len(i18n_strings)}")
strings.extend(i18n_strings)
I18N_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'locale')
# "i18n/locale"
DEFAULT_LANGUAGE = "zh_CN"
code_keys = set(strings)
print(f"{'Total Unique'.ljust(30)}: {len(code_keys)}")
return code_keys
standard_file = os.path.join(I18N_FILE_PATH, DEFAULT_LANGUAGE + ".json")
with open(standard_file, "r", encoding="utf-8") as f:
standard_data = json.load(f, object_pairs_hook=OrderedDict)
standard_keys = set(standard_data.keys())
# Define the standard file name
unused_keys = standard_keys - code_keys
print("Unused keys:", len(unused_keys))
for unused_key in unused_keys:
print("\t", unused_key)
missing_keys = code_keys - standard_keys
print("Missing keys:", len(missing_keys))
for missing_key in missing_keys:
print("\t", missing_key)
code_keys_dict = OrderedDict()
for s in strings:
code_keys_dict[s] = s
# write back
with open(standard_file, "w", encoding="utf-8") as f:
json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)
f.write("\n")
languages_files = [os.path.join(I18N_FILE_PATH, f) for f in os.listdir(I18N_FILE_PATH) if f.endswith(r".json") and f != DEFAULT_LANGUAGE + ".json"]
# print(os.listdir(I18N_FILE_PATH))
# print(languages_files)
for language_file in languages_files:
print(f"Processing {language_file}".center(100, "="))
with open(language_file, "r", encoding="utf-8") as f:
language_data = json.load(f, object_pairs_hook=OrderedDict)
diff = set(standard_data.keys()) - set(language_data.keys())
miss = set(language_data.keys()) - set(standard_data.keys())
for key in diff:
language_data[key] = "#!" + key
print(f"Added missing key {key} to {language_file}")
for key in miss:
del language_data[key]
print(f"Removed unused key {key} from {language_file}")
language_data = OrderedDict(
sorted(language_data.items(),
key=lambda x: list(standard_data.keys()).index(x[0])))
for key, value in language_data.items():
def update_i18n_json(json_file, standard_keys):
print(f" Process {json_file} ".center(TITLE_LEN, "="))
# 读取 JSON 文件
with open(json_file, "r", encoding="utf-8") as f:
json_data = json.load(f, object_pairs_hook=OrderedDict)
# 打印处理前的 JSON 条目数
print(f"{'Total Keys (Before)'.ljust(20)}: {len(json_data)}")
# 识别缺失的键并补全
miss_keys = set(standard_keys) - set(json_data.keys())
if len(miss_keys) > 0:
print(f"{'Missing Keys (+)'.ljust(20)}: {len(miss_keys)}")
for key in miss_keys:
if DEFAULT_LANGUAGE in json_file:
# 默认语言的键值相同.
json_data[key] = key
else:
# 其他语言的值设置为 #! + 键名以标注未被翻译.
json_data[key] = "#!" + key
if SHOW_KEYS:
print(f"{'Added Missing Key'.ljust(20)}: {key}")
# 识别多余的键并删除
diff_keys = set(json_data.keys()) - set(standard_keys)
if len(diff_keys) > 0:
print(f"{'Unused Keys (-)'.ljust(20)}: {len(diff_keys)}")
for key in diff_keys:
del json_data[key]
if SHOW_KEYS:
print(f"{'Removed Unused Key'.ljust(20)}: {key}")
# 按键顺序排序
json_data = OrderedDict(
sorted(json_data.items(),
key=lambda x: list(standard_keys).index(x[0])))
# 打印处理后的 JSON 条目数
print(f"{'Total Keys (After)'.ljust(20)}: {len(json_data)}")
# 识别有待翻译的键
num_miss_translation = 0
for key, value in json_data.items():
if value.startswith("#!"):
print(f"Missing translation for {key} in {language_file}")
with open(language_file, "w", encoding="utf-8") as f:
json.dump(language_data, f, ensure_ascii=False, indent=4, sort_keys=True)
num_miss_translation += 1
if SHOW_KEYS:
print(f"{'Missing Translation'.ljust(20)}: {key}")
if num_miss_translation > 0:
print(f"{'Missing Translation'.ljust(20)}: {num_miss_translation}")
# 将处理后的结果写入 JSON 文件
with open(json_file, "w", encoding="utf-8") as f:
json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=True)
f.write("\n")
print(f" Updated {json_file} ".center(TITLE_LEN, "=") + '\n')
print(f"Updated {language_file}".center(100, "=") + '\n')
print("Finished")
if __name__ == "__main__":
code_keys = scan_i18n_strings()
for json_file in os.listdir(I18N_JSON_DIR):
if json_file.endswith(r".json"):
json_file = os.path.join(I18N_JSON_DIR, json_file)
update_i18n_json(json_file, code_keys)
else:
pass