yzx 6 dias atrás
pai
commit
0f95b82c17

+ 2 - 0
src/main/java/com/telerobot/fs/entity/pojo/AsrProvider.java

@@ -3,6 +3,8 @@ package com.telerobot.fs.entity.pojo;
 public class AsrProvider {
     public static final String ALIYUN = "aliyun";
     public static final String ALI = "ali";
+    public static final String TX = "tx";
+    public static final String TX1 = "tx1";
     public static final String FUN_ASR = "funasr";
     public static final String MICROSOFT = "microsoft";
     public static final String CHINA_TELECOM = "chinatelecom";

+ 25 - 0
src/main/java/com/telerobot/fs/robot/AbstractChatRobot.java

@@ -80,6 +80,7 @@ public abstract class AbstractChatRobot implements IChatRobot {
     protected List<JSONObject> llmRoundMessages = new ArrayList<>();
     protected int ttsTextLength = 0;
     protected ArrayBlockingQueue<String> ttsTextCache = new ArrayBlockingQueue<String>(2000);
+    protected volatile boolean directTtsPlaybackSentInRound = false;
     private final  String[] pauseFlags = new String[]{
             "?", "?",
             ",", ",",
@@ -103,6 +104,12 @@ public abstract class AbstractChatRobot implements IChatRobot {
         return TtsProvider.XFYUN.equalsIgnoreCase(ttsProvider);
     }
 
+    protected boolean useXfyunCloneDirectTts() {
+        return isXfyunTtsProvider()
+                && "clone".equalsIgnoreCase(StringUtils.trimToEmpty(getAccount().ttsModels))
+                && StringUtils.isNotBlank(StringUtils.trimToEmpty(getAccount().voiceCode));
+    }
+
     protected boolean shouldFlushStreamingTtsChunk(String speechContent) {
         if (isXfyunTtsProvider()) {
             return false;
@@ -179,6 +186,12 @@ public abstract class AbstractChatRobot implements IChatRobot {
             return;
         }
 
+        if (useXfyunCloneDirectTts()) {
+            directTtsPlaybackSentInRound = true;
+            logger.info("{} mark direct streaming tts for xfyun clone text {} in current round.",
+                    uuid, text);
+        }
+
         if(TtsChannelState.CLOSED.getCode().equals(ttsChannelState.getCode())) {
             EslConnectionUtil.sendExecuteCommand("speak", buildSpeakCommand(text), uuid);
             ttsChannelState = TtsChannelState.TRYING_OPEN;
@@ -201,6 +214,18 @@ public abstract class AbstractChatRobot implements IChatRobot {
         this.ttsVoiceName = voiceName;
     }
 
+    @Override
+    public void resetRoundTtsPlaybackState() {
+        directTtsPlaybackSentInRound = false;
+    }
+
+    @Override
+    public boolean consumeDirectTtsPlaybackFlag() {
+        boolean sent = directTtsPlaybackSentInRound;
+        directTtsPlaybackSentInRound = false;
+        return sent;
+    }
+
     private String buildSpeakCommand(String text) {
         if (!TtsProvider.XFYUN.equalsIgnoreCase(ttsProvider)) {
             return String.format("%s|%s|%s", ttsProvider, ttsVoiceName, text);

+ 4 - 0
src/main/java/com/telerobot/fs/robot/IChatRobot.java

@@ -72,4 +72,8 @@ public interface IChatRobot {
     void setTtsProvider(String provider);
 
     void setTtsVoiceName(String voiceName);
+
+    void resetRoundTtsPlaybackState();
+
+    boolean consumeDirectTtsPlaybackFlag();
 }

+ 10 - 0
src/main/java/com/telerobot/fs/robot/RobotBase.java

@@ -318,9 +318,19 @@ public abstract class RobotBase implements IEslEventListener {
         if(secsPassedIn6SECS <= 500) {
             waitMills = 3000;
         }
+        if (shouldUseFastVadWaitForXfyun()) {
+            return Math.min(waitMills, 350L);
+        }
         return waitMills < interruptWaitMills ? interruptWaitMills : waitMills;
     }
 
+    protected boolean shouldUseFastVadWaitForXfyun() {
+        if (chatRobot == null || chatRobot.getAccount() == null) {
+            return false;
+        }
+        return TtsProvider.XFYUN.equalsIgnoreCase(StringUtils.trimToEmpty(chatRobot.getAccount().voiceSource));
+    }
+
     /**
      *  当前esl连接池对象;
      */

+ 13 - 5
src/main/java/com/telerobot/fs/robot/RobotChat.java

@@ -791,10 +791,12 @@ public class RobotChat extends RobotBase {
 
 
                 logger.info("{} send question to chatRobot: {}", getTraceId(), question);
+                chatRobot.resetRoundTtsPlaybackState();
                 aiphoneRes = chatRobot.talkWithAiAgent(question, kbQueryExecuted);
                 while ((aiphoneRes == null || aiphoneRes.getStatus_code() == 0)
                         && Llm_max_try_counter.get() < LLM_MAX_TRY) {
                     logger.error("{} llm api error, retry to send question to chatRobot: {}", getTraceId(), question);
+                    chatRobot.resetRoundTtsPlaybackState();
                     aiphoneRes = chatRobot.talkWithAiAgent(question, kbQueryExecuted);
                     Llm_max_try_counter.incrementAndGet();
                     if (checkCallSession()) {
@@ -859,6 +861,7 @@ public class RobotChat extends RobotBase {
                             interactWithRobot();
                             return;
                         }
+                        aiphoneRes.setBody(body);
                     }
 
                     if (checkCallSession()) {
@@ -866,11 +869,19 @@ public class RobotChat extends RobotBase {
                     }
 
                     if (aiphoneRes.getTransferToAgent() == 1) {
+                        if (shouldKeepAiConversationDuringTransferWait() && transferToAgentExecuted) {
+                            logger.info("{} duplicate transfer-to-agent instruction detected during manual-wait, keep current transfer task and continue reply body={}.",
+                                    getTraceId(),
+                                    aiphoneRes.getBody()
+                            );
+                            aiphoneRes.setTransferToAgent(0);
+                        } else {
                         if(!setTransferState()){
                             return;
                         }
                         doTransferToManualAgent(aiphoneRes);
                         return;
+                        }
                     }
 
                     if (aiphoneRes.getClose_phone() == 1) {
@@ -941,11 +952,8 @@ public class RobotChat extends RobotBase {
 
     private void playResponse(LlmAiphoneRes aiphoneRes){
         String ttsFilePathList = aiphoneRes.getTtsFilePathList();
-        if (useXfyunCloneFilePlayback()
-                && talkRound.longValue() == 1
-                && StringUtils.isEmpty(ttsFilePathList)
-                && StringUtils.isNotEmpty(aiphoneRes.getBody())) {
-            logger.info("{} skip duplicate first-round xfyun clone playback, text={}.",
+        if (useXfyunCloneFilePlayback() && chatRobot.consumeDirectTtsPlaybackFlag()) {
+            logger.info("{} skip xfyun clone file playback because response already streamed directly, text={}.",
                     getTraceId(),
                     aiphoneRes.getBody()
             );

+ 4 - 0
src/main/java/com/telerobot/fs/service/AsrResultListener.java

@@ -176,6 +176,10 @@ public class AsrResultListener implements ApplicationListener<ApplicationReadyEv
                 EslConnectionUtil.sendExecuteCommand("set", "aliyun_tts_token=" + token.getToken(), callMonitorInfo.getUuidAgent());
                 EslConnectionUtil.sendExecuteCommand("set", "aliyun_tts_app_key=" + token.getAppkey(), callMonitorInfo.getUuidAgent());
             }
+        } else if (asrProvider.equalsIgnoreCase(AsrProvider.TX)) {
+            logger.info("{} Use Tencent realtime ASR bridge for call transcription.", uuid);
+        } else if (asrProvider.equalsIgnoreCase(AsrProvider.TX1)) {
+            logger.info("{} Use Tencent MPS websocket ASR bridge for call transcription.", uuid);
         }
         logger.info("{} Try to start real-time voice recognition for call-center manual agent. asrProvider={}",uuid, asrProvider);
         EslConnectionUtil.sendExecuteCommand(String.format("start_%s_asr", asrProvider), "hello", callMonitorInfo.getUuid());