257 lines
11 KiB
Swift
257 lines
11 KiB
Swift
import Foundation
|
|
import AVFoundation
|
|
import Accelerate
|
|
|
|
/// Processes an entire audio file faster than real-time, producing per-frame FFT data
|
|
/// that can be cached and played back in sync with the audio.
|
|
/// Also optionally extracts SmartDJ profile data (silence boundaries + LUFS) in the same pass.
|
|
actor OfflineAudioAnalyzer {
|
|
static let shared = OfflineAudioAnalyzer()
|
|
|
|
typealias ProgressCallback = @Sendable (Float) -> Void
|
|
|
|
// MARK: - Combined Analysis Result
|
|
|
|
struct CombinedResult {
|
|
let visFrames: [[Float]]
|
|
let silenceEnd: Double? // leading silence end in seconds
|
|
let silenceStart: Double? // trailing silence start in seconds
|
|
let loudnessLUFS: Double? // approximate integrated loudness
|
|
}
|
|
|
|
// MARK: - Visualizer-only (legacy entry point)
|
|
|
|
func analyze(
|
|
url: URL,
|
|
pointsCount: Int = 20,
|
|
fps: Double = 30.0,
|
|
cutoff: Int = 90,
|
|
eqBoostFactor: Float = 3.5,
|
|
progress: ProgressCallback? = nil
|
|
) throws -> [[Float]] {
|
|
let r = try analyzeWithSmartDJ(url: url, pointsCount: pointsCount, fps: fps,
|
|
cutoff: cutoff, eqBoostFactor: eqBoostFactor,
|
|
extractSmartDJ: false, progress: progress)
|
|
return r.visFrames
|
|
}
|
|
|
|
// MARK: - Combined pass: vis frames + SmartDJ profile in one file read
|
|
|
|
/// Reads the file once, producing visualiser frames AND silence/loudness data.
|
|
/// Set `extractSmartDJ: false` to skip the SmartDJ computation and save time.
|
|
func analyzeWithSmartDJ(
|
|
url: URL,
|
|
pointsCount: Int = 20,
|
|
fps: Double = 30.0,
|
|
cutoff: Int = 90,
|
|
eqBoostFactor: Float = 3.5,
|
|
extractSmartDJ: Bool = true,
|
|
progress: ProgressCallback? = nil
|
|
) throws -> CombinedResult {
|
|
|
|
let file = try AVAudioFile(forReading: url)
|
|
let format = file.processingFormat
|
|
let sampleRate = format.sampleRate
|
|
let totalFrames = file.length
|
|
let durationSec = Double(totalFrames) / sampleRate
|
|
|
|
let audioFramesPerVisFrame = AVAudioFrameCount(sampleRate / fps)
|
|
let fftSize = 1024
|
|
let bufferSize = max(AVAudioFrameCount(fftSize), audioFramesPerVisFrame)
|
|
|
|
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: bufferSize) else {
|
|
throw NSError(domain: "OfflineAnalyzer", code: 1, userInfo: [NSLocalizedDescriptionKey: "Failed to create buffer"])
|
|
}
|
|
|
|
let log2n = vDSP_Length(log2(Double(fftSize)))
|
|
guard let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) else {
|
|
throw NSError(domain: "OfflineAnalyzer", code: 2, userInfo: [NSLocalizedDescriptionKey: "Failed to create FFT setup"])
|
|
}
|
|
defer { vDSP_destroy_fftsetup(fftSetup) }
|
|
|
|
let halfSize = fftSize / 2
|
|
var visualizerData: [[Float]] = []
|
|
let estimatedVisFrames = Int(Double(totalFrames) / Double(audioFramesPerVisFrame))
|
|
visualizerData.reserveCapacity(estimatedVisFrames)
|
|
|
|
var window = [Float](repeating: 0, count: fftSize)
|
|
vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
|
|
|
|
// SmartDJ state
|
|
let silenceThreshold: Float = 0.008 // RMS below this = silence
|
|
var leadingSilenceEndSec: Double? = nil // first non-silent moment
|
|
var trailingSilenceStartSec: Double? = nil // last non-silent moment
|
|
var sumSquares: Double = 0.0
|
|
var sampleCount: Int64 = 0
|
|
var filePositionSec: Double { Double(file.framePosition) / sampleRate }
|
|
|
|
var frameIndex = 0
|
|
|
|
while file.framePosition < totalFrames {
|
|
let framesToRead = min(bufferSize, AVAudioFrameCount(totalFrames - file.framePosition))
|
|
buffer.frameLength = 0
|
|
try file.read(into: buffer, frameCount: framesToRead)
|
|
|
|
guard let channelData = buffer.floatChannelData?[0] else { continue }
|
|
let actualFrames = Int(buffer.frameLength)
|
|
let chunkStartSec = filePositionSec - Double(actualFrames) / sampleRate
|
|
|
|
// ── SmartDJ: RMS per chunk ──────────────────────────────────────
|
|
if extractSmartDJ && actualFrames > 0 {
|
|
var rms: Float = 0
|
|
vDSP_rmsqv(channelData, 1, &rms, vDSP_Length(actualFrames))
|
|
if rms > silenceThreshold {
|
|
if leadingSilenceEndSec == nil {
|
|
leadingSilenceEndSec = chunkStartSec
|
|
}
|
|
trailingSilenceStartSec = chunkStartSec + Double(actualFrames) / sampleRate
|
|
}
|
|
// Accumulate for integrated loudness
|
|
var sumSq: Float = 0
|
|
vDSP_measqv(channelData, 1, &sumSq, vDSP_Length(actualFrames))
|
|
sumSquares += Double(sumSq) * Double(actualFrames)
|
|
sampleCount += Int64(actualFrames)
|
|
}
|
|
|
|
// ── Visualiser FFT frames ────────────────────────────────────────
|
|
guard actualFrames >= fftSize else {
|
|
if actualFrames > 0 {
|
|
visualizerData.append(processFFTFrame(
|
|
channelData: channelData, frameCount: actualFrames,
|
|
fftSize: fftSize, halfSize: halfSize, window: window,
|
|
fftSetup: fftSetup, pointsCount: pointsCount,
|
|
cutoff: cutoff, eqBoostFactor: eqBoostFactor))
|
|
}
|
|
break
|
|
}
|
|
|
|
var sampleOffset = 0
|
|
while sampleOffset + fftSize <= actualFrames {
|
|
visualizerData.append(processFFTFrame(
|
|
channelData: channelData.advanced(by: sampleOffset),
|
|
frameCount: fftSize, fftSize: fftSize, halfSize: halfSize,
|
|
window: window, fftSetup: fftSetup, pointsCount: pointsCount,
|
|
cutoff: cutoff, eqBoostFactor: eqBoostFactor))
|
|
sampleOffset += Int(audioFramesPerVisFrame)
|
|
frameIndex += 1
|
|
if frameIndex % 50 == 0 {
|
|
progress?(Float(file.framePosition) / Float(totalFrames))
|
|
}
|
|
}
|
|
}
|
|
|
|
progress?(1.0)
|
|
|
|
// ── Compute approximate integrated LUFS ──────────────────────────────
|
|
// Uses mean square → dBFS as a simplified approximation of BS.1770.
|
|
// Not true K-weighted LUFS but accurate enough for volume normalisation.
|
|
var loudnessLUFS: Double? = nil
|
|
if extractSmartDJ && sampleCount > 0 {
|
|
let meanSquare = sumSquares / Double(sampleCount)
|
|
if meanSquare > 0 {
|
|
let lufs = 20.0 * log10(sqrt(meanSquare))
|
|
loudnessLUFS = lufs
|
|
}
|
|
}
|
|
|
|
// Guard silence detections: must be within plausible range
|
|
let safeLeading: Double? = {
|
|
guard let t = leadingSilenceEndSec, t > 0.05, t < durationSec * 0.25 else { return nil }
|
|
return t
|
|
}()
|
|
let safeTrailing: Double? = {
|
|
guard let t = trailingSilenceStartSec, t < durationSec - 0.5, t > durationSec * 0.5 else { return nil }
|
|
return t
|
|
}()
|
|
|
|
return CombinedResult(
|
|
visFrames: visualizerData,
|
|
silenceEnd: safeLeading,
|
|
silenceStart: safeTrailing,
|
|
loudnessLUFS: loudnessLUFS
|
|
)
|
|
}
|
|
|
|
/// Process a single FFT frame from raw audio samples
|
|
private func processFFTFrame(
|
|
channelData: UnsafePointer<Float>,
|
|
frameCount: Int,
|
|
fftSize: Int,
|
|
halfSize: Int,
|
|
window: [Float],
|
|
fftSetup: FFTSetup,
|
|
pointsCount: Int,
|
|
cutoff: Int,
|
|
eqBoostFactor: Float
|
|
) -> [Float] {
|
|
let n = min(frameCount, fftSize)
|
|
|
|
// 1. Apply Hann window
|
|
var windowed = [Float](repeating: 0, count: fftSize)
|
|
if n < fftSize {
|
|
// Zero-pad if short
|
|
for i in 0..<n { windowed[i] = channelData[i] * window[i] }
|
|
} else {
|
|
vDSP_vmul(channelData, 1, window, 1, &windowed, 1, vDSP_Length(fftSize))
|
|
}
|
|
|
|
// 2. FFT
|
|
var realp = [Float](repeating: 0, count: halfSize)
|
|
var imagp = [Float](repeating: 0, count: halfSize)
|
|
var magnitudes = [Float](repeating: 0, count: halfSize)
|
|
|
|
realp.withUnsafeMutableBufferPointer { realpBuf in
|
|
imagp.withUnsafeMutableBufferPointer { imagpBuf in
|
|
var splitComplex = DSPSplitComplex(
|
|
realp: realpBuf.baseAddress!,
|
|
imagp: imagpBuf.baseAddress!
|
|
)
|
|
|
|
windowed.withUnsafeBytes { raw in
|
|
let ptr = raw.bindMemory(to: DSPComplex.self).baseAddress!
|
|
vDSP_ctoz(ptr, 2, &splitComplex, 1, vDSP_Length(halfSize))
|
|
}
|
|
|
|
vDSP_fft_zrip(fftSetup, &splitComplex, 1, vDSP_Length(log2(Double(fftSize))), FFTDirection(FFT_FORWARD))
|
|
vDSP_zvmags(&splitComplex, 1, &magnitudes, 1, vDSP_Length(halfSize))
|
|
}
|
|
}
|
|
|
|
// 3. Normalize
|
|
let fftSizeF = Float(fftSize)
|
|
var scale: Float = 1.0 / (fftSizeF * fftSizeF)
|
|
vDSP_vsmul(magnitudes, 1, &scale, &magnitudes, 1, vDSP_Length(halfSize))
|
|
|
|
// sqrt for perceptual amplitude
|
|
for i in 0..<halfSize {
|
|
magnitudes[i] = sqrt(magnitudes[i])
|
|
}
|
|
|
|
// 4. Logarithmic binning with EQ boost
|
|
var framePoints = [Float](repeating: 0, count: pointsCount)
|
|
let maxUsefulBin = min(halfSize - 1, cutoff)
|
|
|
|
for i in 0..<pointsCount {
|
|
let normalizedIndex = Float(i + 1) / Float(pointsCount)
|
|
let logIndex = log10(normalizedIndex * 9.0 + 1.0)
|
|
let centerBin = logIndex * Float(maxUsefulBin)
|
|
let binWidth = max(1.0, Float(maxUsefulBin) / Float(pointsCount) * logIndex)
|
|
|
|
let startBin = max(1, Int(centerBin - binWidth / 2))
|
|
let endBin = min(maxUsefulBin, Int(centerBin + binWidth / 2))
|
|
|
|
var sum: Float = 0
|
|
var countInBand = 0
|
|
for j in startBin...endBin where j < magnitudes.count {
|
|
sum += magnitudes[j]
|
|
countInBand += 1
|
|
}
|
|
|
|
let average = countInBand > 0 ? (sum / Float(countInBand)) : 0
|
|
let eqBoost: Float = 1.0 + (Float(i) / Float(pointsCount)) * eqBoostFactor
|
|
framePoints[i] = average * eqBoost
|
|
}
|
|
|
|
return framePoints
|
|
}
|
|
}
|