import Foundation import AVFoundation import Accelerate /// Processes an entire audio file faster than real-time, producing per-frame level data /// that can be cached and played back in sync with the audio. /// Also optionally extracts SmartDJ profile data (silence boundaries + LUFS) in the same pass. actor OfflineAudioAnalyzer { static let shared = OfflineAudioAnalyzer() typealias ProgressCallback = @Sendable (Float) -> Void // MARK: - Combined Analysis Result struct CombinedResult { let visFrames: [[Float]] let silenceEnd: Double? // leading silence end in seconds let silenceStart: Double? // trailing silence start in seconds let loudnessLUFS: Double? // approximate integrated loudness } // MARK: - Visualizer-only (legacy entry point) func analyze( url: URL, pointsCount: Int = 20, fps: Double = 30.0, cutoff: Int = 90, progress: ProgressCallback? = nil ) throws -> [[Float]] { let r = try analyzeWithSmartDJ(url: url, pointsCount: pointsCount, fps: fps, cutoff: cutoff, extractSmartDJ: false, progress: progress) return r.visFrames } // MARK: - Combined pass: vis frames + SmartDJ profile in one file read func analyzeWithSmartDJ( url: URL, pointsCount: Int = 20, fps: Double = 30.0, cutoff: Int = 90, extractSmartDJ: Bool = true, progress: ProgressCallback? = nil ) throws -> CombinedResult { let file = try AVAudioFile(forReading: url) let format = file.processingFormat let sampleRate = format.sampleRate let totalFrames = file.length let durationSec = Double(totalFrames) / sampleRate // FFT parameters — always 1024 regardless of fps let fftSize = 1024 let halfSize = fftSize / 2 let log2n = vDSP_Length(log2(Double(fftSize))) guard let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) else { throw NSError(domain: "OfflineAnalyzer", code: 2, userInfo: [NSLocalizedDescriptionKey: "Failed to create FFT setup"]) } defer { vDSP_destroy_fftsetup(fftSetup) } // Hann window var window = [Float](repeating: 0, count: fftSize) vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM)) // How many AUDIO samples between each vis frame. // This is independent of fftSize — the FFT window always uses fftSize // samples but hops forward by hopSize each frame, giving proper overlap // when fps is high (hopSize < fftSize) without skipping samples when fps // is low (hopSize > fftSize). let hopSize = Int(max(1, sampleRate / fps)) // Read in chunks large enough to hold at least one full FFT window. // Using 4× hopSize so we get several vis frames per disk read. let readChunkSamples = max(fftSize * 2, hopSize * 4) guard let readBuffer = AVAudioPCMBuffer( pcmFormat: format, frameCapacity: AVAudioFrameCount(readChunkSamples) ) else { throw NSError(domain: "OfflineAnalyzer", code: 1, userInfo: [NSLocalizedDescriptionKey: "Failed to create buffer"]) } // Ring buffer: always holds the last `fftSize` samples plus one chunk ahead let ringCapacity = readChunkSamples + fftSize var ring = [Float](repeating: 0, count: ringCapacity) var ringHead = 0 // next write position (mod ringCapacity) var totalSamplesInRing = 0 // total samples ever written to ring let estimatedFrames = Int(durationSec * fps) + 1 var rawFrames: [[Float]] = [] rawFrames.reserveCapacity(estimatedFrames) // SmartDJ accumulators let silenceThreshold: Float = 0.008 var leadingSilenceEndSec: Double? = nil var trailingSilenceStartSec: Double? = nil var sumSquares: Double = 0 var sampleCountLUFS: Int64 = 0 // Sliding window state var nextFrameSample = 0 // the audio sample index at which to take the next vis frame while file.framePosition < totalFrames { // Cooperatively cancel if the app backgrounded mid-analysis try Task.checkCancellation() let toRead = min(AVAudioFrameCount(readChunkSamples), AVAudioFrameCount(totalFrames - file.framePosition)) readBuffer.frameLength = 0 try file.read(into: readBuffer, frameCount: toRead) let chunkStart = Int(file.framePosition) - Int(readBuffer.frameLength) let chunkLen = Int(readBuffer.frameLength) guard chunkLen > 0, let ch = readBuffer.floatChannelData?[0] else { continue } // Write chunk into ring buffer for i in 0.. silenceThreshold { if leadingSilenceEndSec == nil { leadingSilenceEndSec = chunkSec } trailingSilenceStartSec = chunkSec + Double(chunkLen) / sampleRate } var sumSq: Float = 0 vDSP_measqv(ch, 1, &sumSq, vDSP_Length(chunkLen)) sumSquares += Double(sumSq) * Double(chunkLen) sampleCountLUFS += Int64(chunkLen) } // Generate vis frames for all frame positions inside this chunk let chunkEnd = chunkStart + chunkLen while nextFrameSample < chunkEnd { // Check cancellation every frame — the inner loop is the hot path try Task.checkCancellation() // We need fftSize samples ending at nextFrameSample + fftSize/2 // (centre the FFT window on the frame position for better transient response) let windowStart = nextFrameSample - fftSize / 2 let windowEnd = windowStart + fftSize // Skip if we don't have enough samples yet guard windowEnd <= Int(file.framePosition) else { break } guard windowStart >= 0 else { nextFrameSample += hopSize continue } // Extract fftSize samples from ring buffer // The ring buffer contains samples [totalSamplesInRing-ringCapacity ... totalSamplesInRing] // (clamped to what we've written so far) let ringTail = totalSamplesInRing - ringCapacity guard windowStart >= ringTail else { nextFrameSample += hopSize continue } var windowSamples = [Float](repeating: 0, count: fftSize) for j in 0.. 0 { let meanSquare = sumSquares / Double(sampleCountLUFS) if meanSquare > 0 { loudnessLUFS = 20.0 * log10(sqrt(meanSquare)) } } // ── Silence guard ──────────────────────────────────────────────────── let safeLeading: Double? = { guard let t = leadingSilenceEndSec, t > 0.05, t < durationSec * 0.25 else { return nil } return t }() let safeTrailing: Double? = { guard let t = trailingSilenceStartSec, t < durationSec - 0.5, t > durationSec * 0.5 else { return nil } return t }() return CombinedResult( visFrames: smoothed, silenceEnd: safeLeading, silenceStart: safeTrailing, loudnessLUFS: loudnessLUFS ) } // MARK: - FFT Frame private func computeFFTFrame( samples: [Float], fftSize: Int, halfSize: Int, window: [Float], fftSetup: FFTSetup, pointsCount: Int, cutoff: Int ) -> [Float] { // Apply Hann window var windowed = [Float](repeating: 0, count: fftSize) vDSP_vmul(samples, 1, window, 1, &windowed, 1, vDSP_Length(fftSize)) // FFT var realp = [Float](repeating: 0, count: halfSize) var imagp = [Float](repeating: 0, count: halfSize) var magnitudes = [Float](repeating: 0, count: halfSize) realp.withUnsafeMutableBufferPointer { rb in imagp.withUnsafeMutableBufferPointer { ib in var sc = DSPSplitComplex(realp: rb.baseAddress!, imagp: ib.baseAddress!) windowed.withUnsafeBytes { raw in vDSP_ctoz(raw.bindMemory(to: DSPComplex.self).baseAddress!, 2, &sc, 1, vDSP_Length(halfSize)) } let log2n = vDSP_Length(log2(Double(fftSize))) vDSP_fft_zrip(fftSetup, &sc, 1, log2n, FFTDirection(FFT_FORWARD)) vDSP_zvmags(&sc, 1, &magnitudes, 1, vDSP_Length(halfSize)) } } // Normalize: divide by N², then sqrt for perceptual amplitude let n2 = Float(fftSize) * Float(fftSize) var scale = 1.0 / n2 vDSP_vsmul(magnitudes, 1, &scale, &magnitudes, 1, vDSP_Length(halfSize)) for i in 0.. 0 ? sum / Float(count) : 0 } return frame } // MARK: - Post-processing /// Normalize all frames so the 95th-percentile peak maps to 0.8. /// This keeps loud transients visible without clipping, and ensures a quiet /// song fills the visualizer at the same apparent height as a loud one. private func normalizeFrames(_ frames: [[Float]]) -> [[Float]] { guard !frames.isEmpty else { return frames } // Collect all non-zero values to find the 95th percentile var allValues: [Float] = [] allValues.reserveCapacity(frames.count * (frames.first?.count ?? 1)) for frame in frames { for v in frame where v > 0 { allValues.append(v) } } guard !allValues.isEmpty else { return frames } allValues.sort() let p95idx = min(Int(Float(allValues.count) * 0.95), allValues.count - 1) let p95 = allValues[p95idx] guard p95 > 0 else { return frames } let scale = 0.8 / p95 return frames.map { frame in frame.map { min(1.0, $0 * scale) } } } /// Bake temporal smoothing into the frames so pre-analyzed playback /// looks identical to the live FFT path (which smooths in updateDisplayLevels). private func smoothFrames(_ frames: [[Float]], viscosity: Float) -> [[Float]] { guard frames.count > 1 else { return frames } var result = frames var prev = frames[0] for i in 1..