Skip to content

Commit

Permalink
Make recording smooth
Browse files Browse the repository at this point in the history
  • Loading branch information
vietanhdev committed Oct 6, 2024
1 parent e94cb37 commit ffa3dd4
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 33 deletions.
21 changes: 19 additions & 2 deletions llama_assistant/llama_assistant_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,19 +686,36 @@ def start_voice_input(self):
self.mic_button.setStyleSheet(
"""
QPushButton {
background-color: rgba(255, 0, 0, 0.5);
background-color: rgba(240, 150, 20, 0.5);
border: none;
border-radius: 20px;
}
QPushButton:hover {
background-color: rgba(255, 0, 0, 0.6);
background-color: rgba(240, 150, 20, 0.6);
}
"""
)
self.speech_thread = SpeechRecognitionThread()
self.speech_thread.finished.connect(self.on_speech_recognized)
self.speech_thread.error.connect(self.on_speech_error)
self.speech_thread.start()

# Use QTimer to delay the application of the second style
QTimer.singleShot(500, self.update_mic_button_style)

def update_mic_button_style(self):
self.mic_button.setStyleSheet(
"""
QPushButton {
background-color: rgba(255, 0, 0, 0.5);
border: none;
border-radius: 20px;
}
QPushButton:hover {
background-color: rgba(255, 0, 0, 0.6);
}
"""
)

def stop_voice_input(self):
if self.speech_thread and self.speech_thread.isRunning():
Expand Down
75 changes: 44 additions & 31 deletions llama_assistant/speech_recognition_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ class SpeechRecognitionThread(QThread):
finished = pyqtSignal(str)
error = pyqtSignal(str)
WHISPER_THREADS = 1
MAX_RECORDING_TIME = 60 # Maximum recording time in seconds

def __init__(self):
super().__init__()
self.stop_listening = False
self.recording = False

# Initialize Whisper model
self.whisper = Whisper("tiny")
Expand All @@ -31,46 +33,54 @@ def __init__(self):
self.CHANNELS = 1
self.RATE = 16000
self.CHUNK = 1024
self.RECORD_SECONDS = 2 # Reduced to 2 seconds for more frequent transcription

def wait_for_recording(self):
while not self.recording:
time.sleep(0.1)

def run(self):
self.stop_listening = False
audio = pyaudio.PyAudio()
frames = []
start_time = time.time()

try:
stream = audio.open(format=self.FORMAT, channels=self.CHANNELS,
rate=self.RATE, input=True,
frames_per_buffer=self.CHUNK)

print("Always-on microphone activated. Listening...")

while not self.stop_listening:
frames = []
for _ in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)):
data = stream.read(self.CHUNK)
frames.append(data)

# Save audio data to temporary file
tmp_filepath = self.tmp_audio_folder / f"temp_audio_{time.time()}.wav"
wf = wave.open(str(tmp_filepath), 'wb')
wf.setnchannels(self.CHANNELS)
wf.setsampwidth(audio.get_sample_size(self.FORMAT))
wf.setframerate(self.RATE)
wf.writeframes(b''.join(frames))
wf.close()

# Transcribe audio
res = self.whisper.transcribe(str(tmp_filepath))
transcription = self.whisper.extract_text(res)
os.remove(tmp_filepath)

if isinstance(transcription, list):
# Remove all "[BLANK_AUDIO]" from the transcription
transcription = " ".join(transcription)
transcription = re.sub(r"\[BLANK_AUDIO\]", "", transcription)

if transcription.strip(): # Only emit if there's non-empty transcription
self.finished.emit(transcription)
self.recording = True

while not self.stop_listening and (time.time() - start_time) < self.MAX_RECORDING_TIME:
data = stream.read(self.CHUNK)
frames.append(data)

self.recording = False
print("Stopped recording. Processing audio...")

# Save audio data to temporary file
tmp_filepath = self.tmp_audio_folder / f"temp_audio_{time.time()}.wav"
wf = wave.open(str(tmp_filepath), 'wb')
wf.setnchannels(self.CHANNELS)
wf.setsampwidth(audio.get_sample_size(self.FORMAT))
wf.setframerate(self.RATE)
wf.writeframes(b''.join(frames))
wf.close()

# Transcribe audio
res = self.whisper.transcribe(str(tmp_filepath))
transcription = self.whisper.extract_text(res)
os.remove(tmp_filepath)

if isinstance(transcription, list):
# Remove all "[BLANK_AUDIO]" from the transcription
transcription = " ".join(transcription)
transcription = re.sub(r"\[BLANK_AUDIO\]", "", transcription)


if transcription.strip(): # Only emit if there's non-empty transcription
self.finished.emit(transcription)

except Exception as e:
self.error.emit(f"An error occurred: {str(e)}")
Expand Down Expand Up @@ -123,12 +133,15 @@ def start_recognition(self):

def stop_recognition(self):
self.thread.stop()
self.status_label.setText("Always-on speech recognition stopped")
self.start_button.setEnabled(True)
self.status_label.setText("Processing recorded audio...")
self.start_button.setEnabled(False)
self.stop_button.setEnabled(False)

def on_finished(self, text):
self.transcription_label.setText(f"Transcription: {text}")
self.status_label.setText("Speech recognition completed")
self.start_button.setEnabled(True)
self.stop_button.setEnabled(False)

def on_error(self, error_message):
self.status_label.setText(f"Error: {error_message}")
Expand Down

0 comments on commit ffa3dd4

Please sign in to comment.