# -*- coding: utf-8 -*-
import re
import random
import logging
from common import const, api
from util import manuscript_util

# 话术文本队列
manuscript_list = []


def get_manuscript_list() -> list:
    """获取本次TTS的话术文本
    """

    global manuscript_list
    project = api.get_project()
    if project is None:
        return []

    # 话术文本队列小于每轮TTS生成条数时，获取最新产品话术，泛化后追加到队列尾部
    while len(manuscript_list) < const.TTS_ROUND_COUNT:
        # 泛化最新产品话术
        product_manuscript_list = manuscript_util.gen_manuscript(project['manuscript']['text']).split('\n')

        # 处理随机播报
        if project['manuscript']['is_random']:
            random.shuffle(product_manuscript_list)

        # 长句拆分为子句
        final_manuscript_list = []
        for text in product_manuscript_list:
            text = text.strip()
            if text == '':
                continue

            # 处理尾部加号
            plus_char = ''
            while text.endswith('+'):
                plus_char += text[-1]
                text = text[:-1]

            # 处理尾部标点
            if text[-1] not in ['。', '！', '？']:
                text += '。'

            if len(text) < 30:
                final_manuscript_list.append(text)
            else:
                final_manuscript_list.extend(__split_long_text(text))

            # 恢复尾部加号
            final_manuscript_list[-1] += plus_char

        # 追加到队列尾部
        manuscript_list.extend(final_manuscript_list)
        logging.info(f'话术队列余量不足，补充话术文本：{final_manuscript_list}')

    # 获取本次TTS的话术文本
    tts_text_list = []
    while len(tts_text_list) < const.TTS_ROUND_COUNT:
        tts_text_list.append(manuscript_list.pop(0))

    return tts_text_list


def __split_long_text(text: str, min_target: int = 20, max_limit: int = 25) -> list:
    """长句拆分成接近20字符，不超过25字符的子句
    """

    # 使用正则表达式按中文句号、问号、感叹号、冒号分割句子，并保留标点符号
    sentences = re.split(r'([。！？：])', text)
    # 将标点符号与句子重新组合
    sentences = [''.join(sentences[i:i + 2]).strip() for i in range(0, len(sentences) - 1, 2)]
    sentences = [s for s in sentences if s]  # 去除空字符串

    result = []
    current_chunk = []
    current_length = 0  # 当前分段的字符数

    for sentence in sentences:
        sentence_length = len(sentence)
        # 如果当前分段加上新句子的长度超过限制
        if current_length + sentence_length > max_limit:
            # 提交当前分段
            if current_chunk:
                result.append(''.join(current_chunk))
                current_chunk = []
                current_length = 0
            # 如果当前句子单独超过限制，直接提交
            if sentence_length > max_limit:
                result.append(sentence)
            else:
                current_chunk.append(sentence)
                current_length = sentence_length
        else:
            current_chunk.append(sentence)
            current_length += sentence_length
            # 如果当前分段长度达到目标长度，提交
            if current_length >= min_target:
                result.append(''.join(current_chunk))
                current_chunk = []
                current_length = 0

    # 提交剩余的分段
    if current_chunk:
        result.append(''.join(current_chunk))

    return result
