198 lines
7.7 KiB
Swift
198 lines
7.7 KiB
Swift
|
|
import Foundation
|
||
|
|
import AVFoundation
|
||
|
|
import Accelerate
|
||
|
|
|
||
|
|
/// Processes an entire audio file faster than real-time, producing per-frame FFT data
|
||
|
|
/// that can be cached and played back in sync with the audio.
|
||
|
|
actor OfflineAudioAnalyzer {
|
||
|
|
static let shared = OfflineAudioAnalyzer()
|
||
|
|
|
||
|
|
/// Progress callback (0.0 to 1.0)
|
||
|
|
typealias ProgressCallback = @Sendable (Float) -> Void
|
||
|
|
|
||
|
|
/// Analyze an audio file and return an array of FFT frames.
|
||
|
|
/// Each frame is an array of `pointsCount` floats (0.0-1.0) representing frequency band amplitudes.
|
||
|
|
func analyze(
|
||
|
|
url: URL,
|
||
|
|
pointsCount: Int = 20,
|
||
|
|
fps: Double = 30.0,
|
||
|
|
cutoff: Int = 90,
|
||
|
|
eqBoostFactor: Float = 3.5,
|
||
|
|
progress: ProgressCallback? = nil
|
||
|
|
) throws -> [[Float]] {
|
||
|
|
let file = try AVAudioFile(forReading: url)
|
||
|
|
let format = file.processingFormat
|
||
|
|
let sampleRate = format.sampleRate
|
||
|
|
let totalFrames = file.length
|
||
|
|
|
||
|
|
// How many audio frames per visualizer frame
|
||
|
|
let audioFramesPerVisFrame = AVAudioFrameCount(sampleRate / fps)
|
||
|
|
|
||
|
|
// Use power-of-2 buffer for FFT
|
||
|
|
let fftSize = 1024
|
||
|
|
let bufferSize = max(AVAudioFrameCount(fftSize), audioFramesPerVisFrame)
|
||
|
|
|
||
|
|
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: bufferSize) else {
|
||
|
|
throw NSError(domain: "OfflineAnalyzer", code: 1, userInfo: [NSLocalizedDescriptionKey: "Failed to create buffer"])
|
||
|
|
}
|
||
|
|
|
||
|
|
let log2n = vDSP_Length(log2(Double(fftSize)))
|
||
|
|
guard let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) else {
|
||
|
|
throw NSError(domain: "OfflineAnalyzer", code: 2, userInfo: [NSLocalizedDescriptionKey: "Failed to create FFT setup"])
|
||
|
|
}
|
||
|
|
defer { vDSP_destroy_fftsetup(fftSetup) }
|
||
|
|
|
||
|
|
let halfSize = fftSize / 2
|
||
|
|
var visualizerData: [[Float]] = []
|
||
|
|
|
||
|
|
// Estimate total frames for progress
|
||
|
|
let estimatedVisFrames = Int(Double(totalFrames) / Double(audioFramesPerVisFrame))
|
||
|
|
visualizerData.reserveCapacity(estimatedVisFrames)
|
||
|
|
|
||
|
|
// Reusable buffers
|
||
|
|
var window = [Float](repeating: 0, count: fftSize)
|
||
|
|
vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
|
||
|
|
|
||
|
|
var frameIndex = 0
|
||
|
|
|
||
|
|
while file.framePosition < totalFrames {
|
||
|
|
// Read a chunk
|
||
|
|
let framesToRead = min(bufferSize, AVAudioFrameCount(totalFrames - file.framePosition))
|
||
|
|
buffer.frameLength = 0
|
||
|
|
try file.read(into: buffer, frameCount: framesToRead)
|
||
|
|
|
||
|
|
guard let channelData = buffer.floatChannelData?[0] else { continue }
|
||
|
|
let actualFrames = Int(buffer.frameLength)
|
||
|
|
guard actualFrames >= fftSize else {
|
||
|
|
// Pad with zeros for the last partial buffer
|
||
|
|
if actualFrames > 0 {
|
||
|
|
let frame = processFFTFrame(
|
||
|
|
channelData: channelData,
|
||
|
|
frameCount: actualFrames,
|
||
|
|
fftSize: fftSize,
|
||
|
|
halfSize: halfSize,
|
||
|
|
window: window,
|
||
|
|
fftSetup: fftSetup,
|
||
|
|
pointsCount: pointsCount,
|
||
|
|
cutoff: cutoff,
|
||
|
|
eqBoostFactor: eqBoostFactor
|
||
|
|
)
|
||
|
|
visualizerData.append(frame)
|
||
|
|
}
|
||
|
|
break
|
||
|
|
}
|
||
|
|
|
||
|
|
// Process one or more vis frames from this buffer
|
||
|
|
var sampleOffset = 0
|
||
|
|
while sampleOffset + fftSize <= actualFrames {
|
||
|
|
let frame = processFFTFrame(
|
||
|
|
channelData: channelData.advanced(by: sampleOffset),
|
||
|
|
frameCount: fftSize,
|
||
|
|
fftSize: fftSize,
|
||
|
|
halfSize: halfSize,
|
||
|
|
window: window,
|
||
|
|
fftSetup: fftSetup,
|
||
|
|
pointsCount: pointsCount,
|
||
|
|
cutoff: cutoff,
|
||
|
|
eqBoostFactor: eqBoostFactor
|
||
|
|
)
|
||
|
|
visualizerData.append(frame)
|
||
|
|
sampleOffset += Int(audioFramesPerVisFrame)
|
||
|
|
frameIndex += 1
|
||
|
|
|
||
|
|
// Report progress every 50 frames
|
||
|
|
if frameIndex % 50 == 0, let progress = progress {
|
||
|
|
let pct = Float(file.framePosition) / Float(totalFrames)
|
||
|
|
progress(pct)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
progress?(1.0)
|
||
|
|
return visualizerData
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Process a single FFT frame from raw audio samples
|
||
|
|
private func processFFTFrame(
|
||
|
|
channelData: UnsafePointer<Float>,
|
||
|
|
frameCount: Int,
|
||
|
|
fftSize: Int,
|
||
|
|
halfSize: Int,
|
||
|
|
window: [Float],
|
||
|
|
fftSetup: FFTSetup,
|
||
|
|
pointsCount: Int,
|
||
|
|
cutoff: Int,
|
||
|
|
eqBoostFactor: Float
|
||
|
|
) -> [Float] {
|
||
|
|
let n = min(frameCount, fftSize)
|
||
|
|
|
||
|
|
// 1. Apply Hann window
|
||
|
|
var windowed = [Float](repeating: 0, count: fftSize)
|
||
|
|
if n < fftSize {
|
||
|
|
// Zero-pad if short
|
||
|
|
for i in 0..<n { windowed[i] = channelData[i] * window[i] }
|
||
|
|
} else {
|
||
|
|
vDSP_vmul(channelData, 1, window, 1, &windowed, 1, vDSP_Length(fftSize))
|
||
|
|
}
|
||
|
|
|
||
|
|
// 2. FFT
|
||
|
|
var realp = [Float](repeating: 0, count: halfSize)
|
||
|
|
var imagp = [Float](repeating: 0, count: halfSize)
|
||
|
|
var magnitudes = [Float](repeating: 0, count: halfSize)
|
||
|
|
|
||
|
|
realp.withUnsafeMutableBufferPointer { realpBuf in
|
||
|
|
imagp.withUnsafeMutableBufferPointer { imagpBuf in
|
||
|
|
var splitComplex = DSPSplitComplex(
|
||
|
|
realp: realpBuf.baseAddress!,
|
||
|
|
imagp: imagpBuf.baseAddress!
|
||
|
|
)
|
||
|
|
|
||
|
|
windowed.withUnsafeBytes { raw in
|
||
|
|
let ptr = raw.bindMemory(to: DSPComplex.self).baseAddress!
|
||
|
|
vDSP_ctoz(ptr, 2, &splitComplex, 1, vDSP_Length(halfSize))
|
||
|
|
}
|
||
|
|
|
||
|
|
vDSP_fft_zrip(fftSetup, &splitComplex, 1, vDSP_Length(log2(Double(fftSize))), FFTDirection(FFT_FORWARD))
|
||
|
|
vDSP_zvmags(&splitComplex, 1, &magnitudes, 1, vDSP_Length(halfSize))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 3. Normalize
|
||
|
|
let fftSizeF = Float(fftSize)
|
||
|
|
var scale: Float = 1.0 / (fftSizeF * fftSizeF)
|
||
|
|
vDSP_vsmul(magnitudes, 1, &scale, &magnitudes, 1, vDSP_Length(halfSize))
|
||
|
|
|
||
|
|
// sqrt for perceptual amplitude
|
||
|
|
for i in 0..<halfSize {
|
||
|
|
magnitudes[i] = sqrt(magnitudes[i])
|
||
|
|
}
|
||
|
|
|
||
|
|
// 4. Logarithmic binning with EQ boost
|
||
|
|
var framePoints = [Float](repeating: 0, count: pointsCount)
|
||
|
|
let maxUsefulBin = min(halfSize - 1, cutoff)
|
||
|
|
|
||
|
|
for i in 0..<pointsCount {
|
||
|
|
let normalizedIndex = Float(i + 1) / Float(pointsCount)
|
||
|
|
let logIndex = log10(normalizedIndex * 9.0 + 1.0)
|
||
|
|
let centerBin = logIndex * Float(maxUsefulBin)
|
||
|
|
let binWidth = max(1.0, Float(maxUsefulBin) / Float(pointsCount) * logIndex)
|
||
|
|
|
||
|
|
let startBin = max(1, Int(centerBin - binWidth / 2))
|
||
|
|
let endBin = min(maxUsefulBin, Int(centerBin + binWidth / 2))
|
||
|
|
|
||
|
|
var sum: Float = 0
|
||
|
|
var countInBand = 0
|
||
|
|
for j in startBin...endBin where j < magnitudes.count {
|
||
|
|
sum += magnitudes[j]
|
||
|
|
countInBand += 1
|
||
|
|
}
|
||
|
|
|
||
|
|
let average = countInBand > 0 ? (sum / Float(countInBand)) : 0
|
||
|
|
let eqBoost: Float = 1.0 + (Float(i) / Float(pointsCount)) * eqBoostFactor
|
||
|
|
framePoints[i] = average * eqBoost
|
||
|
|
}
|
||
|
|
|
||
|
|
return framePoints
|
||
|
|
}
|
||
|
|
}
|