Skip to content

Commit

Permalink
Update restore_speech_timestamps()
Browse files Browse the repository at this point in the history
  • Loading branch information
jhj0517 committed Dec 17, 2024
1 parent c0a2a37 commit a9d0d45
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions modules/vad/silero_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,6 @@ def collect_chunks(audio: np.ndarray, chunks: List[dict]) -> np.ndarray:

return np.concatenate([audio[chunk["start"]: chunk["end"]] for chunk in chunks])

def get_chunk_index(self, time: float) -> int:
sample = int(time * self.sampling_rate)
return min(
bisect.bisect(self.chunk_end_sample, sample),
len(self.chunk_end_sample) - 1,
)

@staticmethod
def format_timestamp(
seconds: float,
Expand Down Expand Up @@ -260,8 +253,23 @@ def restore_speech_timestamps(
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)

for segment in segments:
segment.start = ts_map.get_original_time(segment.start)
segment.end = ts_map.get_original_time(segment.end)
if segment.words:
words = []
for word in segment.words:
# Ensure the word start and end times are resolved to the same chunk.
middle = (word.start + word.end) / 2
chunk_index = ts_map.get_chunk_index(middle)
word.start = ts_map.get_original_time(word.start, chunk_index)
word.end = ts_map.get_original_time(word.end, chunk_index)
words.append(word)

segment.start = words[0].start
segment.end = words[-1].end
segment.words = words

else:
segment.start = ts_map.get_original_time(segment.start)
segment.end = ts_map.get_original_time(segment.end)

return segments

0 comments on commit a9d0d45

Please sign in to comment.