mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2025-10-07 23:48:48 +08:00
121 lines
3.3 KiB
Python
121 lines
3.3 KiB
Python
import ast
|
|
import json
|
|
from collections import OrderedDict
|
|
import os
|
|
|
|
# locale_path = "./i18n/locale" # The path to the i18n locale directory, you can change it to your own path
|
|
# scan_list = ["./",
|
|
# "GPT_SoVITS/",
|
|
# "tools/"
|
|
# ] # The path to the directory you want to scan, you can change it to your own path
|
|
# scan_subfolders = False # Whether to scan subfolders
|
|
|
|
locale_path = "./Inference/i18n/locale"
|
|
scan_list = ["./Inference/"] # The path to the directory you want to scan, you can change it to your own path
|
|
scan_subfolders = True
|
|
|
|
special_words_to_keep = {
|
|
"auto": "自动判断",
|
|
"zh": "中文",
|
|
"en": "英文",
|
|
"ja": "日文",
|
|
"all_zh": "只有中文",
|
|
"all_ja": "只有日文",
|
|
"auto_cut": "智能切分",
|
|
"cut0": "仅凭换行切分",
|
|
"cut1": "凑四句一切",
|
|
"cut2": "凑50字一切",
|
|
"cut3": "按中文句号。切",
|
|
"cut4": "按英文句号.切",
|
|
"cut5": "按标点符号切",
|
|
|
|
}
|
|
|
|
|
|
def extract_i18n_strings(node):
|
|
i18n_strings = []
|
|
|
|
if (
|
|
isinstance(node, ast.Call)
|
|
and isinstance(node.func, ast.Name)
|
|
and node.func.id == "i18n"
|
|
):
|
|
for arg in node.args:
|
|
if isinstance(arg, ast.Str):
|
|
i18n_strings.append(arg.s)
|
|
|
|
for child_node in ast.iter_child_nodes(node):
|
|
i18n_strings.extend(extract_i18n_strings(child_node))
|
|
|
|
return i18n_strings
|
|
|
|
strings = []
|
|
|
|
# for each file, parse the code into an AST
|
|
# for each AST, extract the i18n strings
|
|
def scan_i18n_strings(filename):
|
|
with open(filename, "r", encoding="utf-8") as f:
|
|
code = f.read()
|
|
if "I18nAuto" in code:
|
|
tree = ast.parse(code)
|
|
i18n_strings = extract_i18n_strings(tree)
|
|
print(filename, len(i18n_strings))
|
|
strings.extend(i18n_strings)
|
|
|
|
|
|
# scan the directory for all .py files (recursively)
|
|
if scan_subfolders:
|
|
for folder in scan_list:
|
|
for dirpath, dirnames, filenames in os.walk(folder):
|
|
for filename in [f for f in filenames if f.endswith(".py")]:
|
|
scan_i18n_strings(os.path.join(dirpath, filename))
|
|
else:
|
|
for folder in scan_list:
|
|
for filename in os.listdir(folder):
|
|
if filename.endswith(".py"):
|
|
scan_i18n_strings(os.path.join(folder, filename))
|
|
|
|
code_keys = set(strings)
|
|
"""
|
|
n_i18n.py
|
|
gui_v1.py 26
|
|
app.py 16
|
|
infer-web.py 147
|
|
scan_i18n.py 0
|
|
i18n.py 0
|
|
lib/train/process_ckpt.py 1
|
|
"""
|
|
print()
|
|
print("Total unique:", len(code_keys))
|
|
|
|
|
|
standard_file = os.path.join(locale_path, "zh_CN.json")
|
|
with open(standard_file, "r", encoding="utf-8") as f:
|
|
standard_data = json.load(f, object_pairs_hook=OrderedDict)
|
|
standard_keys = set(standard_data.keys())
|
|
|
|
# Define the standard file name
|
|
unused_keys = standard_keys - code_keys
|
|
print("Unused keys:", len(unused_keys))
|
|
for unused_key in unused_keys:
|
|
print("\t", unused_key)
|
|
|
|
missing_keys = code_keys - standard_keys
|
|
print("Missing keys:", len(missing_keys))
|
|
for missing_key in missing_keys:
|
|
print("\t", missing_key)
|
|
|
|
|
|
|
|
code_keys_dict = OrderedDict()
|
|
for s in strings:
|
|
if s in special_words_to_keep:
|
|
code_keys_dict[s] = special_words_to_keep[s]
|
|
else:
|
|
code_keys_dict[s] = s
|
|
|
|
# write back
|
|
with open(standard_file, "w", encoding="utf-8") as f:
|
|
json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)
|
|
f.write("\n")
|