NavidromeApp/iOS/Views/Visualizer/OfflineAudioAnalyzer.swift

import Foundation
import AVFoundation
import Accelerate

/// Processes an entire audio file faster than real-time, producing per-frame FFT data
/// that can be cached and played back in sync with the audio.
/// Also optionally extracts SmartDJ profile data (silence boundaries + LUFS) in the same pass.
actor OfflineAudioAnalyzer {
    static let shared = OfflineAudioAnalyzer()

    typealias ProgressCallback = @Sendable (Float) -> Void

    // MARK: - Combined Analysis Result

    struct CombinedResult {
        let visFrames: [[Float]]
        let silenceEnd: Double?    // leading silence end in seconds
        let silenceStart: Double?  // trailing silence start in seconds
        let loudnessLUFS: Double?  // approximate integrated loudness
    }

    // MARK: - Visualizer-only (legacy entry point)

    func analyze(
        url: URL,
        pointsCount: Int = 20,
        fps: Double = 30.0,
        cutoff: Int = 90,
        eqBoostFactor: Float = 3.5,
        progress: ProgressCallback? = nil
    ) throws -> [[Float]] {
        let r = try analyzeWithSmartDJ(url: url, pointsCount: pointsCount, fps: fps,
                                       cutoff: cutoff, eqBoostFactor: eqBoostFactor,
                                       extractSmartDJ: false, progress: progress)
        return r.visFrames
    }

    // MARK: - Combined pass: vis frames + SmartDJ profile in one file read

    /// Reads the file once, producing visualiser frames AND silence/loudness data.
    /// Set `extractSmartDJ: false` to skip the SmartDJ computation and save time.
    func analyzeWithSmartDJ(
        url: URL,
        pointsCount: Int = 20,
        fps: Double = 30.0,
        cutoff: Int = 90,
        eqBoostFactor: Float = 3.5,
        extractSmartDJ: Bool = true,
        progress: ProgressCallback? = nil
    ) throws -> CombinedResult {

        let file = try AVAudioFile(forReading: url)
        let format = file.processingFormat
        let sampleRate = format.sampleRate
        let totalFrames = file.length
        let durationSec = Double(totalFrames) / sampleRate

        let audioFramesPerVisFrame = AVAudioFrameCount(sampleRate / fps)
        let fftSize = 1024
        let bufferSize = max(AVAudioFrameCount(fftSize), audioFramesPerVisFrame)

        guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: bufferSize) else {
            throw NSError(domain: "OfflineAnalyzer", code: 1, userInfo: [NSLocalizedDescriptionKey: "Failed to create buffer"])
        }

        let log2n = vDSP_Length(log2(Double(fftSize)))
        guard let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) else {
            throw NSError(domain: "OfflineAnalyzer", code: 2, userInfo: [NSLocalizedDescriptionKey: "Failed to create FFT setup"])
        }
        defer { vDSP_destroy_fftsetup(fftSetup) }

        let halfSize = fftSize / 2
        var visualizerData: [[Float]] = []
        let estimatedVisFrames = Int(Double(totalFrames) / Double(audioFramesPerVisFrame))
        visualizerData.reserveCapacity(estimatedVisFrames)

        var window = [Float](repeating: 0, count: fftSize)
        vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))

        // SmartDJ state
        let silenceThreshold: Float = 0.008  // RMS below this = silence
        var leadingSilenceEndSec: Double? = nil     // first non-silent moment
        var trailingSilenceStartSec: Double? = nil  // last non-silent moment
        var sumSquares: Double = 0.0
        var sampleCount: Int64 = 0
        var filePositionSec: Double { Double(file.framePosition) / sampleRate }

        var frameIndex = 0

        while file.framePosition < totalFrames {
            let framesToRead = min(bufferSize, AVAudioFrameCount(totalFrames - file.framePosition))
            buffer.frameLength = 0
            try file.read(into: buffer, frameCount: framesToRead)

            guard let channelData = buffer.floatChannelData?[0] else { continue }
            let actualFrames = Int(buffer.frameLength)
            let chunkStartSec = filePositionSec - Double(actualFrames) / sampleRate

            // ── SmartDJ: RMS per chunk ──────────────────────────────────────
            if extractSmartDJ && actualFrames > 0 {
                var rms: Float = 0
                vDSP_rmsqv(channelData, 1, &rms, vDSP_Length(actualFrames))
                if rms > silenceThreshold {
                    if leadingSilenceEndSec == nil {
                        leadingSilenceEndSec = chunkStartSec
                    }
                    trailingSilenceStartSec = chunkStartSec + Double(actualFrames) / sampleRate
                }
                // Accumulate for integrated loudness
                var sumSq: Float = 0
                vDSP_measqv(channelData, 1, &sumSq, vDSP_Length(actualFrames))
                sumSquares += Double(sumSq) * Double(actualFrames)
                sampleCount += Int64(actualFrames)
            }

            // ── Visualiser FFT frames ────────────────────────────────────────
            guard actualFrames >= fftSize else {
                if actualFrames > 0 {
                    visualizerData.append(processFFTFrame(
                        channelData: channelData, frameCount: actualFrames,
                        fftSize: fftSize, halfSize: halfSize, window: window,
                        fftSetup: fftSetup, pointsCount: pointsCount,
                        cutoff: cutoff, eqBoostFactor: eqBoostFactor))
                }
                break
            }

            var sampleOffset = 0
            while sampleOffset + fftSize <= actualFrames {
                visualizerData.append(processFFTFrame(
                    channelData: channelData.advanced(by: sampleOffset),
                    frameCount: fftSize, fftSize: fftSize, halfSize: halfSize,
                    window: window, fftSetup: fftSetup, pointsCount: pointsCount,
                    cutoff: cutoff, eqBoostFactor: eqBoostFactor))
                sampleOffset += Int(audioFramesPerVisFrame)
                frameIndex += 1
                if frameIndex % 50 == 0 {
                    progress?(Float(file.framePosition) / Float(totalFrames))
                }
            }
        }

        progress?(1.0)

        // ── Compute approximate integrated LUFS ──────────────────────────────
        // Uses mean square → dBFS as a simplified approximation of BS.1770.
        // Not true K-weighted LUFS but accurate enough for volume normalisation.
        var loudnessLUFS: Double? = nil
        if extractSmartDJ && sampleCount > 0 {
            let meanSquare = sumSquares / Double(sampleCount)
            if meanSquare > 0 {
                let lufs = 20.0 * log10(sqrt(meanSquare))
                loudnessLUFS = lufs
            }
        }

        // Guard silence detections: must be within plausible range
        let safeLeading: Double? = {
            guard let t = leadingSilenceEndSec, t > 0.05, t < durationSec * 0.25 else { return nil }
            return t
        }()
        let safeTrailing: Double? = {
            guard let t = trailingSilenceStartSec, t < durationSec - 0.5, t > durationSec * 0.5 else { return nil }
            return t
        }()

        return CombinedResult(
            visFrames: visualizerData,
            silenceEnd: safeLeading,
            silenceStart: safeTrailing,
            loudnessLUFS: loudnessLUFS
        )
    }

    /// Process a single FFT frame from raw audio samples
    private func processFFTFrame(
        channelData: UnsafePointer<Float>,
        frameCount: Int,
        fftSize: Int,
        halfSize: Int,
        window: [Float],
        fftSetup: FFTSetup,
        pointsCount: Int,
        cutoff: Int,
        eqBoostFactor: Float
    ) -> [Float] {
        let n = min(frameCount, fftSize)

        // 1. Apply Hann window
        var windowed = [Float](repeating: 0, count: fftSize)
        if n < fftSize {
            // Zero-pad if short
            for i in 0..<n { windowed[i] = channelData[i] * window[i] }
        } else {
            vDSP_vmul(channelData, 1, window, 1, &windowed, 1, vDSP_Length(fftSize))
        }

        // 2. FFT
        var realp = [Float](repeating: 0, count: halfSize)
        var imagp = [Float](repeating: 0, count: halfSize)
        var magnitudes = [Float](repeating: 0, count: halfSize)

        realp.withUnsafeMutableBufferPointer { realpBuf in
            imagp.withUnsafeMutableBufferPointer { imagpBuf in
                var splitComplex = DSPSplitComplex(
                    realp: realpBuf.baseAddress!,
                    imagp: imagpBuf.baseAddress!
                )

                windowed.withUnsafeBytes { raw in
                    let ptr = raw.bindMemory(to: DSPComplex.self).baseAddress!
                    vDSP_ctoz(ptr, 2, &splitComplex, 1, vDSP_Length(halfSize))
                }

                vDSP_fft_zrip(fftSetup, &splitComplex, 1, vDSP_Length(log2(Double(fftSize))), FFTDirection(FFT_FORWARD))
                vDSP_zvmags(&splitComplex, 1, &magnitudes, 1, vDSP_Length(halfSize))
            }
        }

        // 3. Normalize
        let fftSizeF = Float(fftSize)
        var scale: Float = 1.0 / (fftSizeF * fftSizeF)
        vDSP_vsmul(magnitudes, 1, &scale, &magnitudes, 1, vDSP_Length(halfSize))

        // sqrt for perceptual amplitude
        for i in 0..<halfSize {
            magnitudes[i] = sqrt(magnitudes[i])
        }

        // 4. Logarithmic binning with EQ boost
        var framePoints = [Float](repeating: 0, count: pointsCount)
        let maxUsefulBin = min(halfSize - 1, cutoff)

        for i in 0..<pointsCount {
            let normalizedIndex = Float(i + 1) / Float(pointsCount)
            let logIndex = log10(normalizedIndex * 9.0 + 1.0)
            let centerBin = logIndex * Float(maxUsefulBin)
            let binWidth = max(1.0, Float(maxUsefulBin) / Float(pointsCount) * logIndex)

            let startBin = max(1, Int(centerBin - binWidth / 2))
            let endBin = min(maxUsefulBin, Int(centerBin + binWidth / 2))

            var sum: Float = 0
            var countInBand = 0
            for j in startBin...endBin where j < magnitudes.count {
                sum += magnitudes[j]
                countInBand += 1
            }

            let average = countInBand > 0 ? (sum / Float(countInBand)) : 0
            let eqBoost: Float = 1.0 + (Float(i) / Float(pointsCount)) * eqBoostFactor
            framePoints[i] = average * eqBoost
        }

        return framePoints
    }
}