Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Google voice Support #385

Merged
merged 8 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ venv*
config.json
QR.png
nohup.out
tmp
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ cd chatgpt-on-wechat/
```bash
pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai

如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
pip3 install SpeechRecognition
--在MacOS中安装ffmpeg,brew install ffmpeg espeak
--在Windows中安装ffmpeg,下载ffmpeg.exe
--在Linux中安装ffmpeg,apt-get install ffmpeg espeak
```
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。

Expand Down Expand Up @@ -112,7 +118,10 @@ cp config-template.json config.json
+ 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix`
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay))

**3.其他配置**
**3.语音识别**
+ 配置`speech_recognition=true`开启语音识别

**4.其他配置**

+ `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351)
+ 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix `
Expand Down
7 changes: 7 additions & 0 deletions bridge/bridge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from bot import bot_factory
from voice import voice_factory


class Bridge(object):
Expand All @@ -7,3 +8,9 @@ def __init__(self):

def fetch_reply_content(self, query, context):
return bot_factory.create_bot("chatGPT").reply(query, context)

def fetch_voice_to_text(self, voiceFile):
return voice_factory.create_voice("google").voiceToText(voiceFile)

def fetch_text_to_voice(self, text):
return voice_factory.create_voice("google").textToVoice(text)
8 changes: 7 additions & 1 deletion channel/channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def startup(self):
"""
raise NotImplementedError

def handle(self, msg):
def handle_text(self, msg):
"""
process received msg
:param msg: message object
Expand All @@ -29,3 +29,9 @@ def send(self, msg, receiver):

def build_reply_content(self, query, context=None):
return Bridge().fetch_reply_content(query, context)

def build_voice_to_text(self, voice_file):
return Bridge().fetch_voice_to_text(voice_file)

def build_text_to_voice(self, text):
return Bridge().fetch_text_to_voice(text)
68 changes: 56 additions & 12 deletions channel/wechat/wechat_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""
wechat channel
"""

import os
import itchat
import json
from itchat.content import *
Expand All @@ -18,7 +20,7 @@

@itchat.msg_register(TEXT)
def handler_single_msg(msg):
WechatChannel().handle(msg)
WechatChannel().handle_text(msg)
return None


Expand All @@ -28,9 +30,19 @@ def handler_group_msg(msg):
return None


@itchat.msg_register(VOICE)
def handler_single_voice(msg):
WechatChannel().handle_voice(msg)
return None


class WechatChannel(Channel):
tmpFilePath = './tmp/'
wanggang1987 marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self):
pass
isExists = os.path.exists(self.tmpFilePath)
if not isExists:
wanggang1987 marked this conversation as resolved.
Show resolved Hide resolved
os.makedirs(self.tmpFilePath)

def startup(self):
# login by scan QRCode
Expand All @@ -39,12 +51,27 @@ def startup(self):
# start message listener
itchat.run()

def handle(self, msg):
logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False))
def handle_voice(self, msg):
if conf().get('speech_recognition') != True :
return
logger.debug("[WX]receive voice msg: ", msg['FileName'])
wanggang1987 marked this conversation as resolved.
Show resolved Hide resolved
thread_pool.submit(self._do_handle_voice, msg)

def _do_handle_voice(self, msg):
fileName = self.tmpFilePath+msg['FileName']
msg.download(fileName)
content = super().build_voice_to_text(fileName)
self._handle_single_msg(msg, content, True)

def handle_text(self, msg):
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
content = msg['Text']
self._handle_single_msg(msg, content, False)

def _handle_single_msg(self, msg, content, is_voice):
from_user_id = msg['FromUserName']
to_user_id = msg['ToUserName'] # 接收人id
other_user_id = msg['User']['UserName'] # 对手方id
content = msg['Text']
match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
if "」\n- - - - - - - - - - - - - - -" in content:
logger.debug("[WX]reference query skipped")
Expand All @@ -60,9 +87,10 @@ def handle(self, msg):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, from_user_id)
else:
thread_pool.submit(self._do_send, content, from_user_id)

elif is_voice:
thread_pool.submit(self._do_send_voice, content, from_user_id)
else :
thread_pool.submit(self._do_send_text, content, from_user_id)
elif to_user_id == other_user_id and match_prefix:
# 自己给好友发送消息
str_list = content.split(match_prefix, 1)
Expand All @@ -72,8 +100,10 @@ def handle(self, msg):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, to_user_id)
elif is_voice:
thread_pool.submit(self._do_send_voice, content, to_user_id)
else:
thread_pool.submit(self._do_send, content, to_user_id)
thread_pool.submit(self._do_send_text, content, to_user_id)


def handle_group(self, msg):
Expand Down Expand Up @@ -105,10 +135,24 @@ def handle_group(self, msg):
thread_pool.submit(self._do_send_group, content, msg)

def send(self, msg, receiver):
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))
itchat.send(msg, toUserName=receiver)
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))

def _do_send(self, query, reply_user_id):
def _do_send_voice(self, query, reply_user_id):
try:
if not query:
return
context = dict()
context['from_user_id'] = reply_user_id
reply_text = super().build_reply_content(query, context)
if reply_text:
replyFile = super().build_text_to_voice(reply_text)
itchat.send_file(replyFile, toUserName=reply_user_id)
logger.info('[WX] sendFile={}, receiver={}'.format(replyFile, reply_user_id))
except Exception as e:
logger.exception(e)

def _do_send_text(self, query, reply_user_id):
try:
if not query:
return
Expand Down Expand Up @@ -138,8 +182,8 @@ def _do_send_img(self, query, reply_user_id):
image_storage.seek(0)

# 图片发送
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
itchat.send_image(image_storage, reply_user_id)
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
except Exception as e:
logger.exception(e)

Expand Down
1 change: 1 addition & 0 deletions config-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
"image_create_prefix": ["画", "看", "找"],
"conversation_max_tokens": 1000,
"speech_recognition": false,
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
"expires_in_seconds": 3600
}
50 changes: 50 additions & 0 deletions voice/google/google_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

"""
google voice service
"""

import subprocess
import time
import speech_recognition
import pyttsx3
from common.log import logger
from voice.voice import Voice


class GoogleVoice(Voice):
tmpFilePath = './tmp/'
recognizer = speech_recognition.Recognizer()
engine = pyttsx3.init()

def __init__(self):
# 语速
self.engine.setProperty('rate', 125)
# 音量
self.engine.setProperty('volume', 1.0)
# 0为男声,1为女声
voices = self.engine.getProperty('voices')
self.engine.setProperty('voice', voices[1].id)

def voiceToText(self, voice_file):
new_file = voice_file.replace('.mp3', '.wav')
subprocess.call('ffmpeg -i ' + voice_file +
' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
with speech_recognition.AudioFile(new_file) as source:
audio = self.recognizer.record(source)
try:
text = self.recognizer.recognize_google(audio, language='zh-CN')
logger.info(
'[Google] voiceToText text={} voice file name={}'.format(text, voice_file))
return text
except speech_recognition.UnknownValueError:
return "抱歉,我听不懂。"
except speech_recognition.RequestError as e:
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e)

def textToVoice(self, text):
textFile = self.tmpFilePath + '语音回复_' + str(int(time.time())) + '.mp3'
self.engine.save_to_file(text, textFile)
self.engine.runAndWait()
logger.info(
'[Google] textToVoice text={} voice file name={}'.format(text, textFile))
return textFile
16 changes: 16 additions & 0 deletions voice/voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Voice service abstract class
"""

class Voice(object):
def voiceToText(self, voice_file):
"""
Send voice to voice service and get text
"""
raise NotImplementedError

def textToVoice(self, text):
"""
Send text to voice service and get voice
"""
raise NotImplementedError
17 changes: 17 additions & 0 deletions voice/voice_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
voice factory
"""

def create_voice(voice_type):
"""
create a voice instance
:param voice_type: voice type code
:return: voice instance
"""
if voice_type == 'xfyun':
from voice.xfyun.xfyun_voice import XfyunVoice
return XfyunVoice()
elif voice_type == 'google':
from voice.google.google_voice import GoogleVoice
return GoogleVoice()
raise RuntimeError
35 changes: 35 additions & 0 deletions voice/xfyun/xfyun_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

"""
科大讯飞 voice service
"""

from voice.voice import Voice

# 科大讯飞语音识别
lfasr_host = 'http://raasr.xfyun.cn/api'
# 请求的接口名
api_prepare = '/prepare'
api_upload = '/upload'
api_merge = '/merge'
api_get_progress = '/getProgress'
api_get_result = '/getResult'
# 文件分片大小10M
file_piece_sice = 10485760
# ——————————————————转写可配置参数————————————————
# 参数可在官网界面(https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html)查看,根据需求可自行在gene_params方法里添加修改
# 转写类型
lfasr_type = 0
# 是否开启分词
has_participle = 'false'
has_seperate = 'true'
# 多候选词个数
max_alternatives = 0
# 子用户标识
suid = ''

class XfyunVoice(Voice):
def __init__(self):
pass

def voiceToText(self, voice_file):
pass