mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-04-05 19:41:56 +08:00
Rewrite I18n Scan (#1294)
* Update Scan i18n * rewrite * fix details * remove
This commit is contained in:
parent
d65f0ff8b4
commit
ea83174fe0
@ -1,47 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
|
||||
# Define the standard file name
|
||||
standard_file = "locale/zh_CN.json"
|
||||
|
||||
# Find all JSON files in the directory
|
||||
dir_path = "locale/"
|
||||
languages = [
|
||||
os.path.join(dir_path, f)
|
||||
for f in os.listdir(dir_path)
|
||||
if f.endswith(".json") and f != standard_file
|
||||
]
|
||||
|
||||
# Load the standard file
|
||||
with open(standard_file, "r", encoding="utf-8") as f:
|
||||
standard_data = json.load(f, object_pairs_hook=OrderedDict)
|
||||
|
||||
# Loop through each language file
|
||||
for lang_file in languages:
|
||||
# Load the language file
|
||||
with open(lang_file, "r", encoding="utf-8") as f:
|
||||
lang_data = json.load(f, object_pairs_hook=OrderedDict)
|
||||
|
||||
# Find the difference between the language file and the standard file
|
||||
diff = set(standard_data.keys()) - set(lang_data.keys())
|
||||
|
||||
miss = set(lang_data.keys()) - set(standard_data.keys())
|
||||
|
||||
# Add any missing keys to the language file
|
||||
for key in diff:
|
||||
lang_data[key] = key
|
||||
|
||||
# Del any extra keys to the language file
|
||||
for key in miss:
|
||||
del lang_data[key]
|
||||
|
||||
# Sort the keys of the language file to match the order of the standard file
|
||||
lang_data = OrderedDict(
|
||||
sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0]))
|
||||
)
|
||||
|
||||
# Save the updated language file
|
||||
with open(lang_file, "w", encoding="utf-8") as f:
|
||||
json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True)
|
||||
f.write("\n")
|
@ -1,8 +1,14 @@
|
||||
import ast
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
|
||||
I18N_JSON_DIR : os.PathLike = os.path.join(os.path.dirname(os.path.relpath(__file__)), 'locale')
|
||||
DEFAULT_LANGUAGE: str = "zh_CN" # 默认语言
|
||||
TITLE_LEN : int = 60 # 标题显示长度
|
||||
KEY_LEN : int = 30 # 键名显示长度
|
||||
SHOW_KEYS : bool = False # 是否显示键信息
|
||||
|
||||
def extract_i18n_strings(node):
|
||||
i18n_strings = []
|
||||
@ -21,55 +27,83 @@ def extract_i18n_strings(node):
|
||||
|
||||
return i18n_strings
|
||||
|
||||
def scan_i18n_strings():
|
||||
"""
|
||||
scan the directory for all .py files (recursively)
|
||||
for each file, parse the code into an AST
|
||||
for each AST, extract the i18n strings
|
||||
"""
|
||||
strings = []
|
||||
print(" Scanning Files and Extracting i18n Strings ".center(TITLE_LEN, "="))
|
||||
for filename in glob.iglob("**/*.py", recursive=True):
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
code = f.read()
|
||||
if "I18nAuto" in code:
|
||||
tree = ast.parse(code)
|
||||
i18n_strings = extract_i18n_strings(tree)
|
||||
print(f"{filename.ljust(30)}: {len(i18n_strings)}")
|
||||
strings.extend(i18n_strings)
|
||||
|
||||
# scan the directory for all .py files (recursively)
|
||||
# for each file, parse the code into an AST
|
||||
# for each AST, extract the i18n strings
|
||||
code_keys = set(strings)
|
||||
print(f"{'Total Unique'.ljust(30)}: {len(code_keys)}")
|
||||
return code_keys
|
||||
|
||||
strings = []
|
||||
for filename in glob.iglob("**/*.py", recursive=True):
|
||||
with open(filename, "r") as f:
|
||||
code = f.read()
|
||||
if "I18nAuto" in code:
|
||||
tree = ast.parse(code)
|
||||
i18n_strings = extract_i18n_strings(tree)
|
||||
print(filename, len(i18n_strings))
|
||||
strings.extend(i18n_strings)
|
||||
code_keys = set(strings)
|
||||
"""
|
||||
n_i18n.py
|
||||
gui_v1.py 26
|
||||
app.py 16
|
||||
infer-web.py 147
|
||||
scan_i18n.py 0
|
||||
i18n.py 0
|
||||
lib/train/process_ckpt.py 1
|
||||
"""
|
||||
print()
|
||||
print("Total unique:", len(code_keys))
|
||||
def update_i18n_json(json_file, standard_keys):
|
||||
print(f" Process {json_file} ".center(TITLE_LEN, "="))
|
||||
# 读取 JSON 文件
|
||||
with open(json_file, "r", encoding="utf-8") as f:
|
||||
json_data = json.load(f, object_pairs_hook=OrderedDict)
|
||||
# 打印处理前的 JSON 条目数
|
||||
len_before = len(json_data)
|
||||
print(f"{'Total Keys'.ljust(KEY_LEN)}: {len_before}")
|
||||
# 识别缺失的键并补全
|
||||
miss_keys = set(standard_keys) - set(json_data.keys())
|
||||
if len(miss_keys) > 0:
|
||||
print(f"{'Missing Keys (+)'.ljust(KEY_LEN)}: {len(miss_keys)}")
|
||||
for key in miss_keys:
|
||||
if DEFAULT_LANGUAGE in json_file:
|
||||
# 默认语言的键值相同.
|
||||
json_data[key] = key
|
||||
else:
|
||||
# 其他语言的值设置为 #! + 键名以标注未被翻译.
|
||||
json_data[key] = "#!" + key
|
||||
if SHOW_KEYS:
|
||||
print(f"{'Added Missing Key'.ljust(KEY_LEN)}: {key}")
|
||||
# 识别多余的键并删除
|
||||
diff_keys = set(json_data.keys()) - set(standard_keys)
|
||||
if len(diff_keys) > 0:
|
||||
print(f"{'Unused Keys (-)'.ljust(KEY_LEN)}: {len(diff_keys)}")
|
||||
for key in diff_keys:
|
||||
del json_data[key]
|
||||
if SHOW_KEYS:
|
||||
print(f"{'Removed Unused Key'.ljust(KEY_LEN)}: {key}")
|
||||
# 按键顺序排序
|
||||
json_data = OrderedDict(
|
||||
sorted(json_data.items(),
|
||||
key=lambda x: list(standard_keys).index(x[0])))
|
||||
# 打印处理后的 JSON 条目数
|
||||
if len(miss_keys) != 0 or len(diff_keys) != 0:
|
||||
print(f"{'Total Keys (After)'.ljust(KEY_LEN)}: {len(json_data)}")
|
||||
# 识别有待翻译的键
|
||||
num_miss_translation = 0
|
||||
for key, value in json_data.items():
|
||||
if value.startswith("#!"):
|
||||
num_miss_translation += 1
|
||||
if SHOW_KEYS:
|
||||
print(f"{'Missing Translation'.ljust(KEY_LEN)}: {key}")
|
||||
if num_miss_translation > 0:
|
||||
print(f"\033[31m{'[Failed] Missing Translation'.ljust(KEY_LEN)}: {num_miss_translation}\033[0m")
|
||||
else:
|
||||
print(f"\033[32m[Passed] All Keys Translated\033[0m")
|
||||
# 将处理后的结果写入 JSON 文件
|
||||
with open(json_file, "w", encoding="utf-8") as f:
|
||||
json.dump(json_data, f, ensure_ascii=False, indent=4, sort_keys=True)
|
||||
f.write("\n")
|
||||
print(f" Updated {json_file} ".center(TITLE_LEN, "=") + '\n')
|
||||
|
||||
|
||||
standard_file = "i18n/locale/zh_CN.json"
|
||||
with open(standard_file, "r", encoding="utf-8") as f:
|
||||
standard_data = json.load(f, object_pairs_hook=OrderedDict)
|
||||
standard_keys = set(standard_data.keys())
|
||||
|
||||
# Define the standard file name
|
||||
unused_keys = standard_keys - code_keys
|
||||
print("Unused keys:", len(unused_keys))
|
||||
for unused_key in unused_keys:
|
||||
print("\t", unused_key)
|
||||
|
||||
missing_keys = code_keys - standard_keys
|
||||
print("Missing keys:", len(missing_keys))
|
||||
for missing_key in missing_keys:
|
||||
print("\t", missing_key)
|
||||
|
||||
code_keys_dict = OrderedDict()
|
||||
for s in strings:
|
||||
code_keys_dict[s] = s
|
||||
|
||||
# write back
|
||||
with open(standard_file, "w", encoding="utf-8") as f:
|
||||
json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)
|
||||
f.write("\n")
|
||||
if __name__ == "__main__":
|
||||
code_keys = scan_i18n_strings()
|
||||
for json_file in os.listdir(I18N_JSON_DIR):
|
||||
if json_file.endswith(r".json"):
|
||||
json_file = os.path.join(I18N_JSON_DIR, json_file)
|
||||
update_i18n_json(json_file, code_keys)
|
Loading…
x
Reference in New Issue
Block a user