import srt

def parse_srt_with_lib(content):

    subtitles = list(srt.parse(content))
    return subtitles

def generate_srt_with_lib(subtitles):
    content = srt.compose(subtitles)
    return content

def merge_subtitles_with_lib(subtitles, short_interval, max_interval, max_text_length=30, add_period=True, merge_zero_interval=True):
    # 标点符号
    punctuations = ["。","!", "！", "？", "?", "；", ";",  "…"]
    punctuations_extanded = punctuations
    punctuations_extanded.extend([ "：", ":", "，", ",", "—",])
    
    # 直接合并间隔特别短的字幕
    if merge_zero_interval:
        eps = short_interval
        for i in range(len(subtitles) - 1, 0, -1):
            if subtitles[i-1].content[-1] in punctuations_extanded:
                continue
            if abs(subtitles[i].start.total_seconds() - subtitles[i-1].end.total_seconds()) < eps:
                subtitles[i - 1].end = subtitles[i].end
                subtitles[i - 1].content += subtitles[i].content
                subtitles.pop(i)
                
    merged_subtitles = []
    current_subtitle = None
    for subtitle in subtitles:
        if current_subtitle is None:
            current_subtitle = subtitle
        else:
            current_end = current_subtitle.end.total_seconds()
            next_start = subtitle.start.total_seconds()
            if current_subtitle.content[-1] not in punctuations and (next_start - current_end <= max_interval and count_words_multilang(current_subtitle.content + subtitle.content) < max_text_length):
                current_subtitle.end = subtitle.end
                comma = '，' if current_subtitle.content[-1] not in punctuations_extanded else ''
                current_subtitle.content += comma + subtitle.content
                
            else:
                if add_period and current_subtitle.content[-1] not in punctuations_extanded:
                    current_subtitle.content += '。'
                merged_subtitles.append(current_subtitle)
                current_subtitle = subtitle
    if current_subtitle is not None:
        merged_subtitles.append(current_subtitle)
    # 重新分配id，因为srt.compose需要id连续
    for i, subtitle in enumerate(merged_subtitles, start=1):
        subtitle.index = i
    return merged_subtitles


def count_words_multilang(text):
    # 初始化计数器
    word_count = 0
    in_word = False
    
    for char in text:
        if char.isspace():  # 如果当前字符是空格
            in_word = False
        elif char.isascii() and not in_word:  # 如果是ASCII字符（英文）并且不在单词内
            word_count += 1  # 新的英文单词
            in_word = True
        elif not char.isascii():  # 如果字符非英文
            word_count += 1  # 每个非英文字符单独计为一个字
    
    return word_count

import pydub, os

def slice_audio_with_lib(audio_path, save_folder, format, subtitles, pre_preserve_time, post_preserve_time, pre_silence_time, post_silence_time, language='auto', character='character'):
    list_file = os.path.join(save_folder, 'datamapping.list')
    with open(list_file, 'w', encoding="utf-8") as f:
        for i in range(len(subtitles)):
            subtitle = subtitles[i]
            start = subtitle.start.total_seconds() - pre_preserve_time
            end = subtitle.end.total_seconds() + post_preserve_time
            if i < len(subtitles) - 1:
                next_subtitle = subtitles[i + 1]
                end = min(end, 1.0/2*(subtitle.end.total_seconds()+next_subtitle.start.total_seconds()))
            if i > 0:
                prev_subtitle = subtitles[i - 1]
                start = max(start, 1.0/2*(prev_subtitle.end.total_seconds()+subtitle.start.total_seconds()))
            try:
                audio = pydub.AudioSegment.from_file(audio_path)
                sliced_audio = audio[int(start * 1000):int(end * 1000)]
                file_name = f'{i + 1:03d}.{format}'
                save_path = os.path.join(save_folder, file_name)
                sliced_audio.export(save_path, format=format)
                f.write(f"{file_name}|{character}|{language}|{subtitle.content}\n")
            except Exception as e:
                raise e