diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 283e205ae..1b8087412 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -96,3 +96,122 @@ public extension Sequence where Iterator.Element == AudioLevel { peak: totalSums.peakSum / Float(count)) } } + +public class AudioVisualizeProcessor { + static let bufferSize = 1024 + + // MARK: - Public + + public let minFrequency: Float + public let maxFrequency: Float + public let minDB: Float + public let maxDB: Float + public let bandsCount: Int + public let isCentered: Bool + public let smoothingFactor: Float + + private var bands: [Float]? + + // MARK: - Private + + private let ringBuffer = RingBuffer(size: AudioVisualizeProcessor.bufferSize) + private let processor: FFTProcessor + + public init(minFrequency: Float = 10, + maxFrequency: Float = 8000, + minDB: Float = -32.0, + maxDB: Float = 32.0, + bandsCount: Int = 100, + isCentered: Bool = false, + smoothingFactor: Float = 0.3) // Smoothing factor for smoother transitions + { + self.minFrequency = minFrequency + self.maxFrequency = maxFrequency + self.minDB = minDB + self.maxDB = maxDB + self.bandsCount = bandsCount + self.isCentered = isCentered + self.smoothingFactor = smoothingFactor + + processor = FFTProcessor(bufferSize: Self.bufferSize) + bands = [Float](repeating: 0.0, count: bandsCount) + } + + public func process(pcmBuffer: AVAudioPCMBuffer) -> [Float]? { + guard let pcmBuffer = pcmBuffer.convert(toCommonFormat: .pcmFormatFloat32) else { return nil } + guard let floatChannelData = pcmBuffer.floatChannelData else { return nil } + + // Get the float array. + let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength))) + ringBuffer.write(floats) + + // Get full-size buffer if available, otherwise return + guard let buffer = ringBuffer.read() else { return nil } + + // Process FFT and compute frequency bands + let fftRes = processor.process(buffer: buffer) + let bands = fftRes.computeBands( + minFrequency: minFrequency, + maxFrequency: maxFrequency, + bandsCount: bandsCount, + sampleRate: Float(pcmBuffer.format.sampleRate) + ) + + let headroom = maxDB - minDB + + // Normalize magnitudes (already in decibels) + var normalizedBands = bands.magnitudes.map { magnitude in + let adjustedMagnitude = max(0, magnitude + abs(minDB)) + return min(1.0, adjustedMagnitude / headroom) + } + + // If centering is enabled, rearrange the normalized bands + if isCentered { + normalizedBands.sort(by: >) + normalizedBands = centerBands(normalizedBands) + } + + // Smooth transition using an easing function + self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in + _smoothTransition(from: old, to: new, factor: smoothingFactor) + } + + return self.bands + } + + /// Centers the sorted bands by placing higher values in the middle. + private func centerBands(_ sortedBands: [Float]) -> [Float] { + var centeredBands = [Float](repeating: 0, count: sortedBands.count) + var leftIndex = sortedBands.count / 2 + var rightIndex = leftIndex + + for (index, value) in sortedBands.enumerated() { + if index % 2 == 0 { + // Place value to the right + centeredBands[rightIndex] = value + rightIndex += 1 + } else { + // Place value to the left + leftIndex -= 1 + centeredBands[leftIndex] = value + } + } + + return centeredBands + } + + /// Applies an easing function to smooth the transition. + private func _smoothTransition(from oldValue: Float, to newValue: Float, factor: Float) -> Float { + // Calculate the delta change between the old and new value + let delta = newValue - oldValue + // Apply an ease-in-out cubic easing curve + let easedFactor = _easeInOutCubic(t: factor) + // Calculate and return the smoothed value + return oldValue + delta * easedFactor + } + + /// Easing function: ease-in-out cubic + private func _easeInOutCubic(t: Float) -> Float { + t < 0.5 ? 4 * t * t * t : 1 - pow(-2 * t + 2, 3) / 2 + } +} diff --git a/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift b/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift index d085b3f83..0b84aa0f0 100644 --- a/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift +++ b/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift @@ -39,31 +39,29 @@ public protocol AudioCustomProcessingDelegate { func audioProcessingRelease() } -class AudioCustomProcessingDelegateAdapter: NSObject, LKRTCAudioCustomProcessingDelegate { +class AudioCustomProcessingDelegateAdapter: MulticastDelegate, LKRTCAudioCustomProcessingDelegate { // MARK: - Public public var target: AudioCustomProcessingDelegate? { _state.target } - // MARK: - Internal - - let audioRenderers = MulticastDelegate(label: "AudioRenderer") - // MARK: - Private private struct State { weak var target: AudioCustomProcessingDelegate? } - private var _state: StateSync - - init(target: AudioCustomProcessingDelegate? = nil) { - _state = StateSync(State(target: target)) - } + private var _state = StateSync(State()) public func set(target: AudioCustomProcessingDelegate?) { _state.mutate { $0.target = target } } + init() { + super.init(label: "AudioCustomProcessingDelegateAdapter") + } + + // MARK: - AudioCustomProcessingDelegate + func audioProcessingInitialize(sampleRate sampleRateHz: Int, channels: Int) { target?.audioProcessingInitialize(sampleRate: sampleRateHz, channels: channels) } @@ -73,24 +71,12 @@ class AudioCustomProcessingDelegateAdapter: NSObject, LKRTCAudioCustomProcessing target?.audioProcessingProcess(audioBuffer: lkAudioBuffer) // Convert to pcmBuffer and notify only if an audioRenderer is added. - if audioRenderers.isDelegatesNotEmpty, let pcmBuffer = lkAudioBuffer.toAVAudioPCMBuffer() { - audioRenderers.notify { $0.render(pcmBuffer: pcmBuffer) } + if isDelegatesNotEmpty, let pcmBuffer = lkAudioBuffer.toAVAudioPCMBuffer() { + notify { $0.render(pcmBuffer: pcmBuffer) } } } func audioProcessingRelease() { target?.audioProcessingRelease() } - - // Proxy the equality operators - - override func isEqual(_ object: Any?) -> Bool { - guard let other = object as? AudioCustomProcessingDelegateAdapter else { return false } - return target === other.target - } - - override var hash: Int { - guard let target else { return 0 } - return ObjectIdentifier(target).hashValue - } } diff --git a/Sources/LiveKit/Protocols/AudioRenderer.swift b/Sources/LiveKit/Protocols/AudioRenderer.swift index 535e606aa..7675a00da 100644 --- a/Sources/LiveKit/Protocols/AudioRenderer.swift +++ b/Sources/LiveKit/Protocols/AudioRenderer.swift @@ -29,26 +29,17 @@ public protocol AudioRenderer { func render(pcmBuffer: AVAudioPCMBuffer) } -class AudioRendererAdapter: NSObject, LKRTCAudioRenderer { - private weak var target: AudioRenderer? - private let targetHashValue: Int +class AudioRendererAdapter: MulticastDelegate, LKRTCAudioRenderer { + // + typealias Delegate = AudioRenderer - init(target: AudioRenderer) { - self.target = target - targetHashValue = ObjectIdentifier(target).hashValue + init() { + super.init(label: "AudioRendererAdapter") } - func render(pcmBuffer: AVAudioPCMBuffer) { - target?.render(pcmBuffer: pcmBuffer) - } + // MARK: - LKRTCAudioRenderer - // Proxy the equality operators - override func isEqual(_ object: Any?) -> Bool { - guard let other = object as? AudioRendererAdapter else { return false } - return targetHashValue == other.targetHashValue - } - - override var hash: Int { - targetHashValue + func render(pcmBuffer: AVAudioPCMBuffer) { + notify { $0.render(pcmBuffer: pcmBuffer) } } } diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift new file mode 100755 index 000000000..83ab75cc7 --- /dev/null +++ b/Sources/LiveKit/Support/FFTProcessor.swift @@ -0,0 +1,147 @@ +/* + * Copyright 2024 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Accelerate +import AVFoundation + +extension Float { + var nyquistFrequency: Float { self / 2.0 } +} + +public struct FFTComputeBandsResult { + let count: Int + let magnitudes: [Float] + let frequencies: [Float] +} + +public class FFTResult { + public let magnitudes: [Float] + + init(magnitudes: [Float]) { + self.magnitudes = magnitudes + } + + func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult { + let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency) + var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount) + var bandFrequencies = [Float](repeating: 0.0, count: bandsCount) + + let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate) + let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate) + let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount) + + return magnitudes.withUnsafeBufferPointer { magnitudesPtr in + for i in 0 ..< bandsCount { + let magsStartIdx = vDSP_Length(floorf(Float(i) * ratio)) + magLowerRange + let magsEndIdx = vDSP_Length(floorf(Float(i + 1) * ratio)) + magLowerRange + + let count = magsEndIdx - magsStartIdx + if count > 0 { + var sum: Float = 0 + vDSP_sve(magnitudesPtr.baseAddress! + Int(magsStartIdx), 1, &sum, count) + bandMagnitudes[i] = sum / Float(count) + } else { + bandMagnitudes[i] = magnitudes[Int(magsStartIdx)] + } + + // Compute average frequency + let bandwidth = sampleRate.nyquistFrequency / Float(magnitudes.count) + bandFrequencies[i] = (bandwidth * Float(magsStartIdx) + bandwidth * Float(magsEndIdx)) / 2 + } + + return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies) + } + } + + @inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> vDSP_Length { + vDSP_Length(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency) + } +} + +class FFTProcessor { + public enum WindowType { + case none + case hanning + case hamming + } + + public let bufferSize: vDSP_Length + public let windowType: WindowType + + private let bufferHalfSize: vDSP_Length + private let bufferLog2Size: vDSP_Length + private var window: [Float] = [] + private var fftSetup: FFTSetup + private var realBuffer: [Float] + private var imaginaryBuffer: [Float] + private var zeroDBReference: Float = 1.0 + + init(bufferSize: Int, windowType: WindowType = .hanning) { + self.bufferSize = vDSP_Length(bufferSize) + self.windowType = windowType + + bufferHalfSize = vDSP_Length(bufferSize / 2) + bufferLog2Size = vDSP_Length(log2f(Float(bufferSize))) + + realBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize)) + imaginaryBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize)) + window = [Float](repeating: 1.0, count: Int(bufferSize)) + + fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))! + + switch windowType { + case .none: + break + case .hanning: + vDSP_hann_window(&window, vDSP_Length(bufferSize), Int32(vDSP_HANN_NORM)) + case .hamming: + vDSP_hamm_window(&window, vDSP_Length(bufferSize), 0) + } + } + + deinit { + vDSP_destroy_fftsetup(fftSetup) + } + + func process(buffer: [Float]) -> FFTResult { + precondition(buffer.count == Int(bufferSize), "Input buffer size mismatch.") + + var windowedBuffer = [Float](repeating: 0.0, count: Int(bufferSize)) + + vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, bufferSize) + + return realBuffer.withUnsafeMutableBufferPointer { realPtr in + imaginaryBuffer.withUnsafeMutableBufferPointer { imagPtr in + var complexBuffer = DSPSplitComplex(realp: realPtr.baseAddress!, imagp: imagPtr.baseAddress!) + + windowedBuffer.withUnsafeBufferPointer { bufferPtr in + let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: Int(bufferHalfSize)) + vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, bufferHalfSize) + } + + vDSP_fft_zrip(fftSetup, &complexBuffer, 1, bufferLog2Size, FFTDirection(FFT_FORWARD)) + + var magnitudes = [Float](repeating: 0.0, count: Int(bufferHalfSize)) + vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, bufferHalfSize) + + // Convert magnitudes to decibels + vDSP_vdbcon(magnitudes, 1, &zeroDBReference, &magnitudes, 1, vDSP_Length(magnitudes.count), 1) + + return FFTResult(magnitudes: magnitudes) + } + } + } +} diff --git a/Sources/LiveKit/Support/RingBuffer.swift b/Sources/LiveKit/Support/RingBuffer.swift new file mode 100644 index 000000000..ddbc1e682 --- /dev/null +++ b/Sources/LiveKit/Support/RingBuffer.swift @@ -0,0 +1,51 @@ +/* + * Copyright 2024 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation + +// Simple ring-buffer used for internal audio processing. Not thread-safe. +class RingBuffer { + private var _isFull = false + private var _buffer: [T] + private var _head: Int = 0 + + init(size: Int) { + _buffer = [T](repeating: 0, count: size) + } + + func write(_ value: T) { + _buffer[_head] = value + _head = (_head + 1) % _buffer.count + if _head == 0 { _isFull = true } + } + + func write(_ sequence: [T]) { + for value in sequence { + write(value) + } + } + + func read() -> [T]? { + guard _isFull else { return nil } + + if _head == 0 { + return _buffer // Return the entire buffer if _head is at the start + } else { + // Return the buffer in the correct order + return Array(_buffer[_head ..< _buffer.count] + _buffer[0 ..< _head]) + } + } +} diff --git a/Sources/LiveKit/Track/AudioManager.swift b/Sources/LiveKit/Track/AudioManager.swift index 29d1b2815..f597d71b3 100644 --- a/Sources/LiveKit/Track/AudioManager.swift +++ b/Sources/LiveKit/Track/AudioManager.swift @@ -156,13 +156,13 @@ public class AudioManager: Loggable { // MARK: - AudioProcessingModule private lazy var capturePostProcessingDelegateAdapter: AudioCustomProcessingDelegateAdapter = { - let adapter = AudioCustomProcessingDelegateAdapter(target: nil) + let adapter = AudioCustomProcessingDelegateAdapter() RTC.audioProcessingModule.capturePostProcessingDelegate = adapter return adapter }() private lazy var renderPreProcessingDelegateAdapter: AudioCustomProcessingDelegateAdapter = { - let adapter = AudioCustomProcessingDelegateAdapter(target: nil) + let adapter = AudioCustomProcessingDelegateAdapter() RTC.audioProcessingModule.renderPreProcessingDelegate = adapter return adapter }() @@ -327,11 +327,11 @@ public extension AudioManager { /// Only ``AudioRenderer/render(pcmBuffer:)`` will be called. /// Usage: `AudioManager.shared.add(localAudioRenderer: localRenderer)` func add(localAudioRenderer delegate: AudioRenderer) { - capturePostProcessingDelegateAdapter.audioRenderers.add(delegate: delegate) + capturePostProcessingDelegateAdapter.add(delegate: delegate) } func remove(localAudioRenderer delegate: AudioRenderer) { - capturePostProcessingDelegateAdapter.audioRenderers.remove(delegate: delegate) + capturePostProcessingDelegateAdapter.remove(delegate: delegate) } } @@ -341,10 +341,10 @@ public extension AudioManager { /// To receive buffer for individual tracks, use ``RemoteAudioTrack/add(audioRenderer:)`` instead. /// Usage: `AudioManager.shared.add(remoteAudioRenderer: localRenderer)` func add(remoteAudioRenderer delegate: AudioRenderer) { - renderPreProcessingDelegateAdapter.audioRenderers.add(delegate: delegate) + renderPreProcessingDelegateAdapter.add(delegate: delegate) } func remove(remoteAudioRenderer delegate: AudioRenderer) { - renderPreProcessingDelegateAdapter.audioRenderers.remove(delegate: delegate) + renderPreProcessingDelegateAdapter.remove(delegate: delegate) } } diff --git a/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift b/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift index 6348e3eb4..94821a347 100644 --- a/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift +++ b/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift @@ -14,6 +14,7 @@ * limitations under the License. */ +import AVFoundation import CoreMedia #if swift(>=5.9) @@ -36,6 +37,8 @@ public class RemoteAudioTrack: Track, RemoteTrack, AudioTrack { } } + private lazy var _adapter = AudioRendererAdapter() + init(name: String, source: Track.Source, track: LKRTCMediaStreamTrack, @@ -48,14 +51,29 @@ public class RemoteAudioTrack: Track, RemoteTrack, AudioTrack { reportStatistics: reportStatistics) } - public func add(audioRenderer: AudioRenderer) { + deinit { + // Directly remove the adapter without unnecessary checks guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } - audioTrack.add(AudioRendererAdapter(target: audioRenderer)) + audioTrack.remove(_adapter) + } + + public func add(audioRenderer: AudioRenderer) { + let wasEmpty = _adapter.countDelegates == 0 + _adapter.add(delegate: audioRenderer) + // Attach adapter only if it wasn't attached before + if wasEmpty { + guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } + audioTrack.add(_adapter) + } } public func remove(audioRenderer: AudioRenderer) { - guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } - audioTrack.remove(AudioRendererAdapter(target: audioRenderer)) + _adapter.remove(delegate: audioRenderer) + // Remove adapter only if there are no more delegates + if _adapter.countDelegates == 0 { + guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } + audioTrack.remove(_adapter) + } } // MARK: - Internal