Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transcription events CLT-343 #406

Merged
merged 15 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions Sources/LiveKit/Core/Room+EngineDelegate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,32 @@ extension Room {
}
}
}

func room(didReceiveTranscriptionPacket packet: Livekit_Transcription) {
// Try to find matching Participant.
guard let participant = allParticipants[Participant.Identity(from: packet.transcribedParticipantIdentity)] else {
log("[Transcription] Could not find participant: \(packet.transcribedParticipantIdentity)", .warning)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hiroshihorie you'll need to include support for transcription events coming down on the LocalParticipant as well, to record the results of STT in the agent

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a quick spike and it works to just change the delegate methods to receive generic Participant and TrackPublication instead of the Remote variants, change these lines up here to the following

        let participant: Participant?
        
        let identity = Participant.Identity(from: packet.transcribedParticipantIdentity)
        if localParticipant.identity == identity {
            participant = localParticipant
        } else {
            participant = _state.read({ $0.remoteParticipants.values.first { $0.identity == identity } })
        }
            
        guard let participant else {
            log("[Transcription] Could not find participant: \(packet.transcribedParticipantIdentity)", .warning)
            return
        }

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you ! will update.

return
}

guard let publication = participant._state.read({ $0.trackPublications[Track.Sid(from: packet.trackID)] }) else {
log("[Transcription] Could not find publication: \(packet.trackID)", .warning)
return
}

let segments = packet.segments.map { $0.toLKType() }

guard !segments.isEmpty else {
log("[Transcription] Received segments are empty", .warning)
return
}

delegates.notify {
$0.room?(self, participant: participant, trackPublication: publication, didReceiveTranscriptionSegments: segments)
}

participant.delegates.notify {
$0.participant?(participant, trackPublication: publication, didReceiveTranscriptionSegments: segments)
}
}
}
1 change: 1 addition & 0 deletions Sources/LiveKit/Core/Room.swift
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ extension Room: DataChannelDelegate {
switch dataPacket.value {
case let .speaker(update): engine(self, didUpdateSpeakers: update.speakers)
case let .user(userPacket): engine(self, didReceiveUserPacket: userPacket)
case let .transcription(packet): room(didReceiveTranscriptionPacket: packet)
default: return
}
}
Expand Down
4 changes: 4 additions & 0 deletions Sources/LiveKit/Protocols/ParticipantDelegate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ public protocol ParticipantDelegate: AnyObject {
@objc optional
func participant(_ participant: Participant, trackPublication: TrackPublication, didUpdateIsMuted isMuted: Bool)

/// Received transcription segments.
@objc optional
func participant(_ participant: Participant, trackPublication: TrackPublication, didReceiveTranscriptionSegments segments: [TranscriptionSegment])

// MARK: - LocalTrackPublication

/// The ``LocalParticipant`` has published a ``LocalTrackPublication``.
Expand Down
4 changes: 4 additions & 0 deletions Sources/LiveKit/Protocols/RoomDelegate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ public protocol RoomDelegate: AnyObject {
@objc optional
func room(_ room: Room, participant: Participant, didUpdateAttributes attributes: [String: String])

/// Received transcription segments.
@objc optional
func room(_ room: Room, participant: Participant, trackPublication: TrackPublication, didReceiveTranscriptionSegments segments: [TranscriptionSegment])

// MARK: - Track Publications

/// The ``LocalParticipant`` has published a ``LocalTrack``.
Expand Down
68 changes: 68 additions & 0 deletions Sources/LiveKit/Types/TranscriptionSegment.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright 2024 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import Foundation

@objc
public class TranscriptionSegment: NSObject {
public let id: String
public let text: String
public let language: String
public let startTime: UInt64
public let endTime: UInt64
public let isFinal: Bool

init(id: String,
text: String,
language: String,
startTime: UInt64,
endTime: UInt64,
isFinal: Bool)
{
self.id = id
self.text = text
self.language = language
self.startTime = startTime
self.endTime = endTime
self.isFinal = isFinal
}

// MARK: - Equal

override public func isEqual(_ object: Any?) -> Bool {
guard let other = object as? Self else { return false }
return id == other.id
}

override public var hash: Int {
var hasher = Hasher()
hasher.combine(id)
return hasher.finalize()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hiroshihorie @lukasIO do you two think it's right to identify and hash these based on ID alone? It doesn't guarantee true equality, because a newer version could be received later that should be used instead.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the intended purpose of the hash?

}
}

// MARK: - Internal

extension Livekit_TranscriptionSegment {
func toLKType() -> TranscriptionSegment {
TranscriptionSegment(id: id,
text: text,
language: language,
startTime: startTime,
endTime: endTime,
isFinal: final)
}
}