Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio visualization helpers #474

Merged
merged 27 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
19906ee
Create FloatRingBuffer.swift
hiroshihorie Sep 4, 2024
cc30821
Processor
hiroshihorie Sep 4, 2024
a5ba133
Optimize
hiroshihorie Sep 4, 2024
6c26a4a
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 10, 2024
c09022d
Optimize
hiroshihorie Sep 10, 2024
3492254
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 20, 2024
6ebd537
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 24, 2024
fec59e1
Apply windowType
hiroshihorie Sep 24, 2024
fa2b5d0
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Sep 29, 2024
6588a95
centering, min max db param
hiroshihorie Sep 29, 2024
0a8f253
smoothing 1
hiroshihorie Sep 29, 2024
8e3dbe9
smoothing 2
hiroshihorie Sep 29, 2024
053aa0c
Optimize
hiroshihorie Sep 29, 2024
ebaf87d
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 2, 2024
e5c6df2
Optimize
hiroshihorie Oct 3, 2024
0c41cfa
logarithmic
hiroshihorie Oct 3, 2024
df8f3a1
Generic ring buffer
hiroshihorie Oct 3, 2024
d039b2c
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 7, 2024
ec06595
convert
hiroshihorie Oct 8, 2024
398e365
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 8, 2024
a69566d
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 19, 2024
883a640
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 24, 2024
f84d836
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 24, 2024
b118f59
fix audio processing adapter
hiroshihorie Oct 27, 2024
6935094
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 27, 2024
da40d9f
Merge branch 'hiroshi/audio-visualize' of https://github.com/livekit/…
hiroshihorie Oct 27, 2024
0bb44be
Merge branch 'main' into hiroshi/audio-visualize
hiroshihorie Oct 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 120 additions & 1 deletion Sources/LiveKit/Convenience/AudioProcessing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ public extension LKAudioBuffer {
guard let targetBufferPointer = pcmBuffer.floatChannelData else { return nil }

// Optimized version
var normalizationFactor: Float = 1.0 / 32768.0
let factor = Float(Int16.max)
var normalizationFactor: Float = 1.0 / factor // Or use 32768.0

for i in 0 ..< channels {
vDSP_vsmul(rawBuffer(forChannel: i),
Expand Down Expand Up @@ -98,3 +99,121 @@ public extension Sequence where Iterator.Element == AudioLevel {
peak: totalSums.peakSum / Float(count))
}
}

public class AudioVisualizeProcessor {
static let bufferSize = 1024

// MARK: - Public

public let minFrequency: Float
public let maxFrequency: Float
public let minDB: Float
public let maxDB: Float
public let bandsCount: Int
public let isCentered: Bool
public let smoothingFactor: Float

private var bands: [Float]?

// MARK: - Private

private let ringBuffer = RingBuffer<Float>(size: AudioVisualizeProcessor.bufferSize)
private let processor: FFTProcessor

public init(minFrequency: Float = 10,
maxFrequency: Float = 8000,
minDB: Float = -32.0,
maxDB: Float = 32.0,
bandsCount: Int = 100,
isCentered: Bool = false,
smoothingFactor: Float = 0.3) // Smoothing factor for smoother transitions
{
self.minFrequency = minFrequency
self.maxFrequency = maxFrequency
self.minDB = minDB
self.maxDB = maxDB
self.bandsCount = bandsCount
self.isCentered = isCentered
self.smoothingFactor = smoothingFactor

processor = FFTProcessor(bufferSize: Self.bufferSize)
bands = [Float](repeating: 0.0, count: bandsCount)
}

public func process(pcmBuffer: AVAudioPCMBuffer) -> [Float]? {
guard let floatChannelData = pcmBuffer.floatChannelData else { return nil }

// Get the float array.
let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength)))
ringBuffer.write(floats)

// Get full-size buffer if available, otherwise return
guard let buffer = ringBuffer.read() else { return nil }

// Process FFT and compute frequency bands
let fftRes = processor.process(buffer: buffer)
let bands = fftRes.computeBands(
minFrequency: minFrequency,
maxFrequency: maxFrequency,
bandsCount: bandsCount,
sampleRate: Float(pcmBuffer.format.sampleRate)
)

let headroom = maxDB - minDB

// Normalize magnitudes (already in decibels)
var normalizedBands = bands.magnitudes.map { magnitude in
let adjustedMagnitude = max(0, magnitude + abs(minDB))
return min(1.0, adjustedMagnitude / headroom)
}

// If centering is enabled, rearrange the normalized bands
if isCentered {
normalizedBands.sort(by: >)
normalizedBands = centerBands(normalizedBands)
}

// Smooth transition using an easing function
self.bands = zip(self.bands ?? [], normalizedBands).map { old, new in
_smoothTransition(from: old, to: new, factor: smoothingFactor)
}

return self.bands
}

/// Centers the sorted bands by placing higher values in the middle.
private func centerBands(_ sortedBands: [Float]) -> [Float] {
var centeredBands = [Float](repeating: 0, count: sortedBands.count)
var leftIndex = sortedBands.count / 2
var rightIndex = leftIndex

for (index, value) in sortedBands.enumerated() {
if index % 2 == 0 {
// Place value to the right
centeredBands[rightIndex] = value
rightIndex += 1
} else {
// Place value to the left
leftIndex -= 1
centeredBands[leftIndex] = value
}
}

return centeredBands
}

/// Applies an easing function to smooth the transition.
private func _smoothTransition(from oldValue: Float, to newValue: Float, factor: Float) -> Float {
// Calculate the delta change between the old and new value
let delta = newValue - oldValue
// Apply an ease-in-out cubic easing curve
let easedFactor = _easeInOutCubic(t: factor)
// Calculate and return the smoothed value
return oldValue + delta * easedFactor
}

/// Easing function: ease-in-out cubic
private func _easeInOutCubic(t: Float) -> Float {
t < 0.5 ? 4 * t * t * t : 1 - pow(-2 * t + 2, 3) / 2
}
}
25 changes: 8 additions & 17 deletions Sources/LiveKit/Protocols/AudioRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,17 @@ public protocol AudioRenderer {
func render(pcmBuffer: AVAudioPCMBuffer)
}

class AudioRendererAdapter: NSObject, LKRTCAudioRenderer {
private weak var target: AudioRenderer?
private let targetHashValue: Int
class AudioRendererAdapter: MulticastDelegate<AudioRenderer>, LKRTCAudioRenderer {
//
typealias Delegate = AudioRenderer

init(target: AudioRenderer) {
self.target = target
targetHashValue = ObjectIdentifier(target).hashValue
init() {
super.init(label: "AudioRendererAdapter")
}

func render(pcmBuffer: AVAudioPCMBuffer) {
target?.render(pcmBuffer: pcmBuffer)
}
// MARK: - LKRTCAudioRenderer

// Proxy the equality operators
override func isEqual(_ object: Any?) -> Bool {
guard let other = object as? AudioRendererAdapter else { return false }
return targetHashValue == other.targetHashValue
}

override var hash: Int {
targetHashValue
func render(pcmBuffer: AVAudioPCMBuffer) {
notify { $0.render(pcmBuffer: pcmBuffer) }
}
}
147 changes: 147 additions & 0 deletions Sources/LiveKit/Support/FFTProcessor.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Copyright 2024 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import Accelerate
import AVFoundation

extension Float {
var nyquistFrequency: Float { self / 2.0 }
}

public struct FFTComputeBandsResult {
let count: Int
let magnitudes: [Float]
let frequencies: [Float]
}

public class FFTResult {
public let magnitudes: [Float]

init(magnitudes: [Float]) {
self.magnitudes = magnitudes
}

func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult {
let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency)
var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount)
var bandFrequencies = [Float](repeating: 0.0, count: bandsCount)

let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate)
let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate)
let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount)

return magnitudes.withUnsafeBufferPointer { magnitudesPtr in
for i in 0 ..< bandsCount {
let magsStartIdx = vDSP_Length(floorf(Float(i) * ratio)) + magLowerRange
let magsEndIdx = vDSP_Length(floorf(Float(i + 1) * ratio)) + magLowerRange

let count = magsEndIdx - magsStartIdx
if count > 0 {
var sum: Float = 0
vDSP_sve(magnitudesPtr.baseAddress! + Int(magsStartIdx), 1, &sum, count)
bandMagnitudes[i] = sum / Float(count)
} else {
bandMagnitudes[i] = magnitudes[Int(magsStartIdx)]
}

// Compute average frequency
let bandwidth = sampleRate.nyquistFrequency / Float(magnitudes.count)
bandFrequencies[i] = (bandwidth * Float(magsStartIdx) + bandwidth * Float(magsEndIdx)) / 2
}

return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies)
}
}

@inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> vDSP_Length {
vDSP_Length(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency)
}
}

class FFTProcessor {
public enum WindowType {
case none
case hanning
case hamming
}

public let bufferSize: vDSP_Length
public let windowType: WindowType

private let bufferHalfSize: vDSP_Length
private let bufferLog2Size: vDSP_Length
private var window: [Float] = []
private var fftSetup: FFTSetup
private var realBuffer: [Float]
private var imaginaryBuffer: [Float]
private var zeroDBReference: Float = 1.0

init(bufferSize: Int, windowType: WindowType = .hanning) {
self.bufferSize = vDSP_Length(bufferSize)
self.windowType = windowType

bufferHalfSize = vDSP_Length(bufferSize / 2)
bufferLog2Size = vDSP_Length(log2f(Float(bufferSize)))

realBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize))
imaginaryBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize))
window = [Float](repeating: 1.0, count: Int(bufferSize))

fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))!

switch windowType {
case .none:
break
case .hanning:
vDSP_hann_window(&window, vDSP_Length(bufferSize), Int32(vDSP_HANN_NORM))
case .hamming:
vDSP_hamm_window(&window, vDSP_Length(bufferSize), 0)
}
}

deinit {
vDSP_destroy_fftsetup(fftSetup)
}

func process(buffer: [Float]) -> FFTResult {
precondition(buffer.count == Int(bufferSize), "Input buffer size mismatch.")

var windowedBuffer = [Float](repeating: 0.0, count: Int(bufferSize))

vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, bufferSize)

return realBuffer.withUnsafeMutableBufferPointer { realPtr in
imaginaryBuffer.withUnsafeMutableBufferPointer { imagPtr in
var complexBuffer = DSPSplitComplex(realp: realPtr.baseAddress!, imagp: imagPtr.baseAddress!)

windowedBuffer.withUnsafeBufferPointer { bufferPtr in
let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: Int(bufferHalfSize))
vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, bufferHalfSize)
}

vDSP_fft_zrip(fftSetup, &complexBuffer, 1, bufferLog2Size, FFTDirection(FFT_FORWARD))

var magnitudes = [Float](repeating: 0.0, count: Int(bufferHalfSize))
vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, bufferHalfSize)

// Convert magnitudes to decibels
vDSP_vdbcon(magnitudes, 1, &zeroDBReference, &magnitudes, 1, vDSP_Length(magnitudes.count), 1)

return FFTResult(magnitudes: magnitudes)
}
}
}
}
51 changes: 51 additions & 0 deletions Sources/LiveKit/Support/RingBuffer.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2024 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import Foundation

// Simple ring-buffer used for internal audio processing. Not thread-safe.
class RingBuffer<T: Numeric> {
private var _isFull = false
private var _buffer: [T]
private var _head: Int = 0

init(size: Int) {
_buffer = [T](repeating: 0, count: size)
}

func write(_ value: T) {
_buffer[_head] = value
_head = (_head + 1) % _buffer.count
if _head == 0 { _isFull = true }
}

func write(_ sequence: [T]) {
for value in sequence {
write(value)
}
}

func read() -> [T]? {
guard _isFull else { return nil }

if _head == 0 {
return _buffer // Return the entire buffer if _head is at the start
} else {
// Return the buffer in the correct order
return Array(_buffer[_head ..< _buffer.count] + _buffer[0 ..< _head])
}
}
}
Loading
Loading