NavidromeApp/iOS/Views/Visualizer/OfflineAudioAnalyzer.swift

198 lines
7.7 KiB
Swift
Raw Normal View History

import Foundation
import AVFoundation
import Accelerate
/// Processes an entire audio file faster than real-time, producing per-frame FFT data
/// that can be cached and played back in sync with the audio.
actor OfflineAudioAnalyzer {
static let shared = OfflineAudioAnalyzer()
/// Progress callback (0.0 to 1.0)
typealias ProgressCallback = @Sendable (Float) -> Void
/// Analyze an audio file and return an array of FFT frames.
/// Each frame is an array of `pointsCount` floats (0.0-1.0) representing frequency band amplitudes.
func analyze(
url: URL,
pointsCount: Int = 20,
fps: Double = 30.0,
cutoff: Int = 90,
eqBoostFactor: Float = 3.5,
progress: ProgressCallback? = nil
) throws -> [[Float]] {
let file = try AVAudioFile(forReading: url)
let format = file.processingFormat
let sampleRate = format.sampleRate
let totalFrames = file.length
// How many audio frames per visualizer frame
let audioFramesPerVisFrame = AVAudioFrameCount(sampleRate / fps)
// Use power-of-2 buffer for FFT
let fftSize = 1024
let bufferSize = max(AVAudioFrameCount(fftSize), audioFramesPerVisFrame)
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: bufferSize) else {
throw NSError(domain: "OfflineAnalyzer", code: 1, userInfo: [NSLocalizedDescriptionKey: "Failed to create buffer"])
}
let log2n = vDSP_Length(log2(Double(fftSize)))
guard let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) else {
throw NSError(domain: "OfflineAnalyzer", code: 2, userInfo: [NSLocalizedDescriptionKey: "Failed to create FFT setup"])
}
defer { vDSP_destroy_fftsetup(fftSetup) }
let halfSize = fftSize / 2
var visualizerData: [[Float]] = []
// Estimate total frames for progress
let estimatedVisFrames = Int(Double(totalFrames) / Double(audioFramesPerVisFrame))
visualizerData.reserveCapacity(estimatedVisFrames)
// Reusable buffers
var window = [Float](repeating: 0, count: fftSize)
vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
var frameIndex = 0
while file.framePosition < totalFrames {
// Read a chunk
let framesToRead = min(bufferSize, AVAudioFrameCount(totalFrames - file.framePosition))
buffer.frameLength = 0
try file.read(into: buffer, frameCount: framesToRead)
guard let channelData = buffer.floatChannelData?[0] else { continue }
let actualFrames = Int(buffer.frameLength)
guard actualFrames >= fftSize else {
// Pad with zeros for the last partial buffer
if actualFrames > 0 {
let frame = processFFTFrame(
channelData: channelData,
frameCount: actualFrames,
fftSize: fftSize,
halfSize: halfSize,
window: window,
fftSetup: fftSetup,
pointsCount: pointsCount,
cutoff: cutoff,
eqBoostFactor: eqBoostFactor
)
visualizerData.append(frame)
}
break
}
// Process one or more vis frames from this buffer
var sampleOffset = 0
while sampleOffset + fftSize <= actualFrames {
let frame = processFFTFrame(
channelData: channelData.advanced(by: sampleOffset),
frameCount: fftSize,
fftSize: fftSize,
halfSize: halfSize,
window: window,
fftSetup: fftSetup,
pointsCount: pointsCount,
cutoff: cutoff,
eqBoostFactor: eqBoostFactor
)
visualizerData.append(frame)
sampleOffset += Int(audioFramesPerVisFrame)
frameIndex += 1
// Report progress every 50 frames
if frameIndex % 50 == 0, let progress = progress {
let pct = Float(file.framePosition) / Float(totalFrames)
progress(pct)
}
}
}
progress?(1.0)
return visualizerData
}
/// Process a single FFT frame from raw audio samples
private func processFFTFrame(
channelData: UnsafePointer<Float>,
frameCount: Int,
fftSize: Int,
halfSize: Int,
window: [Float],
fftSetup: FFTSetup,
pointsCount: Int,
cutoff: Int,
eqBoostFactor: Float
) -> [Float] {
let n = min(frameCount, fftSize)
// 1. Apply Hann window
var windowed = [Float](repeating: 0, count: fftSize)
if n < fftSize {
// Zero-pad if short
for i in 0..<n { windowed[i] = channelData[i] * window[i] }
} else {
vDSP_vmul(channelData, 1, window, 1, &windowed, 1, vDSP_Length(fftSize))
}
// 2. FFT
var realp = [Float](repeating: 0, count: halfSize)
var imagp = [Float](repeating: 0, count: halfSize)
var magnitudes = [Float](repeating: 0, count: halfSize)
realp.withUnsafeMutableBufferPointer { realpBuf in
imagp.withUnsafeMutableBufferPointer { imagpBuf in
var splitComplex = DSPSplitComplex(
realp: realpBuf.baseAddress!,
imagp: imagpBuf.baseAddress!
)
windowed.withUnsafeBytes { raw in
let ptr = raw.bindMemory(to: DSPComplex.self).baseAddress!
vDSP_ctoz(ptr, 2, &splitComplex, 1, vDSP_Length(halfSize))
}
vDSP_fft_zrip(fftSetup, &splitComplex, 1, vDSP_Length(log2(Double(fftSize))), FFTDirection(FFT_FORWARD))
vDSP_zvmags(&splitComplex, 1, &magnitudes, 1, vDSP_Length(halfSize))
}
}
// 3. Normalize
let fftSizeF = Float(fftSize)
var scale: Float = 1.0 / (fftSizeF * fftSizeF)
vDSP_vsmul(magnitudes, 1, &scale, &magnitudes, 1, vDSP_Length(halfSize))
// sqrt for perceptual amplitude
for i in 0..<halfSize {
magnitudes[i] = sqrt(magnitudes[i])
}
// 4. Logarithmic binning with EQ boost
var framePoints = [Float](repeating: 0, count: pointsCount)
let maxUsefulBin = min(halfSize - 1, cutoff)
for i in 0..<pointsCount {
let normalizedIndex = Float(i + 1) / Float(pointsCount)
let logIndex = log10(normalizedIndex * 9.0 + 1.0)
let centerBin = logIndex * Float(maxUsefulBin)
let binWidth = max(1.0, Float(maxUsefulBin) / Float(pointsCount) * logIndex)
let startBin = max(1, Int(centerBin - binWidth / 2))
let endBin = min(maxUsefulBin, Int(centerBin + binWidth / 2))
var sum: Float = 0
var countInBand = 0
for j in startBin...endBin where j < magnitudes.count {
sum += magnitudes[j]
countInBand += 1
}
let average = countInBand > 0 ? (sum / Float(countInBand)) : 0
let eqBoost: Float = 1.0 + (Float(i) / Float(pointsCount)) * eqBoostFactor
framePoints[i] = average * eqBoost
}
return framePoints
}
}