More Fixes and Radio improvements

This commit is contained in:
Dallas Groot 2026-04-09 23:39:52 -07:00
parent 9bf94c90b7
commit b1101a6ea3
4 changed files with 357 additions and 242 deletions

View file

@ -305,7 +305,9 @@ class AudioPlayer: NSObject, ObservableObject {
#endif
}
/// Play a radio stream from a direct URL
/// Play a radio stream from a direct URL.
/// Automatically starts the stream buffer so timeshift is available immediately.
/// Destroys any existing buffer first to prevent audio bleed between stations.
func playRadio(song: Song, streamURL: URL) {
alog("Radio: \(song.title)\(streamURL.absoluteString)")
currentSong = song
@ -313,10 +315,12 @@ class AudioPlayer: NSObject, ObservableObject {
queueIndex = 0
isRadioStream = true
radioStreamURL = streamURL
#if os(iOS)
// Resolve playlist URLs (.pls, .m3u, .asx) to actual stream URLs, then play
// Buffering is NOT started automatically user taps LIVE to enable it
// Always destroy the previous buffer cleanly before starting a new station.
// Prevents stale bytes / state from the old station bleeding into the new one.
RadioStreamBuffer.shared.stopBuffering()
if RadioStreamBuffer.isPlaylistURL(streamURL) {
alog("Radio: resolving playlist URL...")
Task {
@ -324,10 +328,18 @@ class AudioPlayer: NSObject, ObservableObject {
await MainActor.run {
self.radioStreamURL = resolved
self.playWithAVPlayer(resolved)
// Auto-start buffer immediately after resolution (non-HLS only)
// isHLSStream is set inside startBuffering if the URL is M3U8
RadioStreamBuffer.shared.startBuffering(
url: resolved,
stationName: song.title
)
}
}
} else {
playWithAVPlayer(streamURL)
// Auto-start buffer will mark isHLSStream=true and bail if it's HLS
RadioStreamBuffer.shared.startBuffering(url: streamURL, stationName: song.title)
}
#else
playWithAVPlayer(streamURL)
@ -337,6 +349,19 @@ class AudioPlayer: NSObject, ObservableObject {
/// Whether current playback is a radio stream (for buffer/scrub behavior)
@Published var isRadioStream = false
private(set) var radioStreamURL: URL?
#if os(iOS)
/// Toggle recording the current radio stream on/off.
/// No-op for HLS streams or when not buffering.
func toggleRecording() {
let buf = RadioStreamBuffer.shared
if buf.isRecording {
_ = buf.stopRecording()
} else {
buf.startRecording()
}
}
#endif
#if os(iOS)
/// When true, AVPlayer is playing a snapshot file rather than the live stream

View file

@ -201,7 +201,6 @@ struct NowPlayingView: View {
@State private var showEllipsisMenu = false
@State private var isStarred = false
@State private var availablePlaylists: [Playlist] = []
@State private var isRecording = false
@State private var shazamResult: String?
@State private var shazamArtworkURL: URL?
@State private var shazamPreviewURL: URL?
@ -746,19 +745,24 @@ struct NowPlayingView: View {
}
// MARK: - Progress Bar
private var progressBar: some View {
VStack(spacing: 6) {
if isRadio {
// Radio always shows LIVE indicator, buffer bar when available
if isRecordedRadio {
// Feature 4: recorded radio uses the standard seek bar, not the buffer bar
normalProgressBar
} else if isLiveRadio {
radioProgressBar
} else {
// Normal song progress bar
normalProgressBar
}
}
.padding(.horizontal, isLandscape ? 20 : 30)
}
// Convenience flags used across progress bar and transport
private var isRecordedRadio: Bool { isRadio && !audioPlayer.isRadioStream }
private var isLiveRadio: Bool { isRadio && audioPlayer.isRadioStream }
private var normalProgressBar: some View {
VStack(spacing: 6) {
@ -792,34 +796,38 @@ struct NowPlayingView: View {
private var radioProgressBar: some View {
VStack(spacing: 6) {
if radioBuffer.isBuffering {
if radioBuffer.isHLSStream {
// HLS: fully greyed, no scrubber thumb, no time labels
Capsule()
.fill(Color.white.opacity(0.08))
.frame(height: 4)
HStack {
Text("--:--")
.font(.system(size: 11, weight: .medium))
.foregroundColor(.gray.opacity(0.35))
Spacer()
Text("--:--")
.font(.system(size: 11))
.foregroundColor(.gray.opacity(0.35))
}
} else if radioBuffer.isBuffering {
GeometryReader { geo in
let bufferSec = radioBuffer.estimatedBufferSeconds
let maxSec = radioBuffer.maxBufferDuration
// fillPct: how much of the 20-min bar is filled
let fillPct = min(max(bufferSec / maxSec, 0), 1.0)
// Playhead position:
// - When live (not in snapshot): at the live edge = fillPct
// - When in snapshot: player.currentTime / maxSec
// - When dragging: use dragPosition
let playheadPct: CGFloat = {
if isDraggingSlider { return dragPosition }
if audioPlayer.isPlayingFromBuffer {
// playbackTime tracks audioPlayer.currentTime via @Published binding
return CGFloat(min(playbackTime / maxSec, fillPct))
}
return fillPct // live edge
return fillPct
}()
ZStack(alignment: .leading) {
// Track background
Capsule().fill(Color.white.opacity(0.1)).frame(height: 4)
// Buffer fill
Capsule().fill(accentPink.opacity(0.3))
.frame(width: geo.size.width * fillPct, height: 4)
// Playhead dot
Circle()
.fill(radioBuffer.isLive ? accentPink : Color.white)
.frame(width: isDraggingSlider ? 16 : 12,
@ -829,19 +837,15 @@ struct NowPlayingView: View {
}
.contentShape(Rectangle())
.gesture(
// minimumDistance: 8 prevents a simple tap from triggering a seek
DragGesture(minimumDistance: 8)
.onChanged { v in
isDraggingSlider = true
// Clamp drag to the filled (available) region only
let pct = min(max(v.location.x / geo.size.width, 0), fillPct)
dragPosition = pct
}
.onEnded { v in
isDraggingSlider = false
let pct = min(max(v.location.x / geo.size.width, 0), fillPct)
// Convert bar fraction buffer position in seconds
// (pct * maxSec gives position within the 20-min timeline)
let seekPos = pct * maxSec
audioPlayer.radioSeekBack(to: seekPos)
}
@ -849,10 +853,8 @@ struct NowPlayingView: View {
}
.frame(height: 24)
// Time labels
HStack {
if isDraggingSlider {
// Show how far behind live the drag position would be
let dragPosSec = dragPosition * radioBuffer.maxBufferDuration
let behindLive = max(0, radioBuffer.estimatedBufferSeconds - dragPosSec)
Text(behindLive < 1 ? "LIVE" : "-" + formatTime(behindLive))
@ -863,32 +865,26 @@ struct NowPlayingView: View {
.font(.system(size: 11, weight: .semibold))
.foregroundColor(accentPink)
} else {
// How far behind live we are = bufferSec - current snapshot position
let behindLive = max(0, radioBuffer.estimatedBufferSeconds - playbackTime)
Text("-" + formatTime(behindLive))
.font(.system(size: 11, weight: .medium))
.foregroundColor(.gray)
}
Spacer()
Text(formatTime(radioBuffer.bufferedDuration))
.font(.system(size: 11))
.foregroundColor(.gray)
}
} else {
// No buffer active greyed static bar
// Buffer not yet ready greyed static bar (brief state on auto-start)
Capsule()
.fill(Color.white.opacity(0.1))
.frame(height: 4)
HStack {
Text("LIVE")
Text("Buffering...")
.font(.system(size: 11, weight: .medium))
.foregroundColor(.gray.opacity(0.5))
Spacer()
Text("Enable buffer to seek")
.font(.system(size: 11))
.foregroundColor(.gray.opacity(0.4))
}
}
}
@ -899,12 +895,19 @@ struct NowPlayingView: View {
private var transportControls: some View {
let iconSize: CGFloat = isLandscape ? 24 : 30
let playSize: CGFloat = isLandscape ? 34 : 42
let isRecordedRadio = isRadio && !audioPlayer.isRadioStream
let isLiveRadio = isRadio && audioPlayer.isRadioStream
let showTimeshiftControls = isLiveRadio && radioBuffer.isBuffering
// Dynamic disable states (Feature 2)
let bufferSec = radioBuffer.estimatedBufferSeconds
let currentPos: TimeInterval = audioPlayer.isPlayingFromBuffer ? playbackTime : bufferSec
let secondsBehindLive = bufferSec - currentPos
let canRewind30 = !radioBuffer.isHLSStream && bufferSec >= 30
let canForward15 = !radioBuffer.isHLSStream && secondsBehindLive >= 15
// Feature 4: recorded radio also gets timeshift transport (30s/15s), not prev/next
let showTimeshiftLeft = (isLiveRadio && radioBuffer.isBuffering) || isRecordedRadio
let showTimeshiftRight = showTimeshiftLeft
return VStack(spacing: 4) {
// LIVE indicator above transport only for live radio
if isLiveRadio {
liveIndicator.padding(.bottom, 4)
}
@ -912,18 +915,25 @@ struct NowPlayingView: View {
HStack(spacing: 0) {
Spacer()
if isRecordedRadio {
Button(action: { audioPlayer.seek(to: max(0, audioPlayer.currentTime - 5)) }) {
Image(systemName: "gobackward.5").font(.system(size: iconSize)).foregroundColor(.white)
}.frame(width: 60, height: 50)
} else if showTimeshiftControls {
// 30s rewind
Button(action: { audioPlayer.radioSkip(by: -30) }) {
Image(systemName: "gobackward.30").font(.system(size: iconSize)).foregroundColor(.white)
}.frame(width: 60, height: 50)
// Left button
if showTimeshiftLeft {
Button(action: {
if isRecordedRadio {
audioPlayer.seek(to: max(0, audioPlayer.currentTime - 30))
} else {
audioPlayer.radioSkip(by: -30)
}
}) {
Image(systemName: "gobackward.30")
.font(.system(size: iconSize))
.foregroundColor(canRewind30 || isRecordedRadio ? .white : .gray.opacity(0.35))
}
.frame(width: 60, height: 50)
.disabled(isLiveRadio && !canRewind30)
} else if !isLiveRadio {
Button(action: { audioPlayer.previous() }) {
Image(systemName: "backward.fill").font(.system(size: iconSize)).foregroundColor(.white)
Image(systemName: "backward.fill")
.font(.system(size: iconSize)).foregroundColor(.white)
}.frame(width: 60, height: 50)
} else {
Color.clear.frame(width: 60, height: 50)
@ -938,21 +948,25 @@ struct NowPlayingView: View {
Spacer()
if isRecordedRadio {
Button(action: { audioPlayer.seek(to: min(audioPlayer.duration, audioPlayer.currentTime + 5)) }) {
Image(systemName: "goforward.5").font(.system(size: iconSize)).foregroundColor(.white)
}.frame(width: 60, height: 50)
} else if showTimeshiftControls {
// 15s forward (capped at live edge)
Button(action: { audioPlayer.radioSkip(by: 15) }) {
Image(systemName: "goforward.15").font(.system(size: iconSize))
.foregroundColor(radioBuffer.isLive ? .gray.opacity(0.4) : .white)
// Right button
if showTimeshiftRight {
Button(action: {
if isRecordedRadio {
audioPlayer.seek(to: min(audioPlayer.duration, audioPlayer.currentTime + 15))
} else {
audioPlayer.radioSkip(by: 15)
}
}) {
Image(systemName: "goforward.15")
.font(.system(size: iconSize))
.foregroundColor(canForward15 || isRecordedRadio ? .white : .gray.opacity(0.35))
}
.frame(width: 60, height: 50)
.disabled(radioBuffer.isLive)
.disabled(isLiveRadio && !canForward15)
} else if !isLiveRadio {
Button(action: { audioPlayer.next() }) {
Image(systemName: "forward.fill").font(.system(size: iconSize)).foregroundColor(.white)
Image(systemName: "forward.fill")
.font(.system(size: iconSize)).foregroundColor(.white)
}.frame(width: 60, height: 50)
} else {
Color.clear.frame(width: 60, height: 50)
@ -963,68 +977,63 @@ struct NowPlayingView: View {
}
}
/// LIVE button four visual states:
/// hls (HLS stream) grey pill with slash icon, non-tappable
/// idle (no buffer) grey pill, tap to start 20-min buffer
/// buffering + at live edge solid red pill (content is live)
/// buffering + scrubbed back red outline with slow pulse glow, tap = go live
/// REC button repurposed from LIVE now that buffering is automatic.
/// HLS stream greyed "LIVE" pill with slash icon, non-tappable
/// Not buffering yet greyed pill (brief transient while auto-buffer starts)
/// Buffering, idle grey "REC" pill, tap to start recording
/// Recording active solid red pill with pulsing glow, shows elapsed time, tap to stop
@State private var livePulse = false
private var liveIndicator: some View {
let isHLS = radioBuffer.isHLSStream
let buffering = radioBuffer.isBuffering
let atLive = radioBuffer.isLive
let recording = radioBuffer.isRecording
return Button(action: {
guard !isHLS else { return }
if buffering && !atLive {
audioPlayer.radioGoLive()
} else if !buffering {
if let url = audioPlayer.radioStreamURL {
RadioStreamBuffer.shared.startBuffering(
url: url,
stationName: audioPlayer.currentSong?.title ?? "Radio"
)
}
}
guard !isHLS, buffering else { return }
audioPlayer.toggleRecording()
}) {
HStack(spacing: 5) {
if isHLS {
Image(systemName: "slash.circle")
.font(.system(size: 9, weight: .bold))
.foregroundColor(.gray.opacity(0.5))
} else {
} else if recording {
Circle()
.fill(buffering ? Color.red : Color.gray.opacity(0.4))
.fill(Color.red)
.frame(width: 7, height: 7)
} else {
Image(systemName: "record.circle")
.font(.system(size: 9, weight: .bold))
.foregroundColor(buffering ? .white : .gray.opacity(0.4))
}
Text("LIVE")
Text(recording ? radioBuffer.recordingTimeFormatted : "REC")
.font(.system(size: 12, weight: .bold))
.foregroundColor(isHLS ? .gray.opacity(0.4) : buffering ? .white : .gray)
.foregroundColor(isHLS ? .gray.opacity(0.4) : recording ? .white : buffering ? .white.opacity(0.7) : .gray)
}
.padding(.horizontal, 14)
.padding(.vertical, 6)
.background(
RoundedRectangle(cornerRadius: 6)
.fill(isHLS
? Color.white.opacity(0.04)
: buffering && atLive ? Color.red.opacity(0.85) : Color.white.opacity(0.06)
.fill(recording
? Color.red.opacity(0.85)
: isHLS || !buffering
? Color.white.opacity(0.04)
: Color.white.opacity(0.06)
)
)
.overlay(
RoundedRectangle(cornerRadius: 6)
.stroke(
isHLS ? Color.white.opacity(0.1)
: buffering && !atLive ? Color.red.opacity(0.9) : Color.clear,
recording ? Color.red.opacity(0.9) : Color.clear,
lineWidth: 1.5
)
)
.shadow(
color: !isHLS && buffering && !atLive ? Color.red.opacity(livePulse ? 0.7 : 0.2) : .clear,
color: recording ? Color.red.opacity(livePulse ? 0.7 : 0.2) : .clear,
radius: livePulse ? 14 : 6
)
.animation(.easeInOut(duration: 1.6).repeatForever(autoreverses: true), value: livePulse)
// Tooltip below for HLS
.overlay(alignment: .bottom) {
if isHLS {
Text("HLS — no timeshift")
@ -1034,9 +1043,9 @@ struct NowPlayingView: View {
}
}
}
.disabled(isHLS)
.disabled(isHLS || !buffering)
.onAppear { livePulse = true }
.onChange(of: buffering) { _, val in livePulse = val }
.onChange(of: recording) { _, val in livePulse = val }
}
// MARK: - Bottom Controls

View file

@ -88,7 +88,7 @@ class VisualizerSettings: ObservableObject {
// Advanced
viscosity = { let v = d.double(forKey: "vis_viscosity"); return v > 0 ? v : 0.25 }()
frequencyCutoff = { let v = d.integer(forKey: "vis_freq_cutoff"); return v > 0 ? v : 80 }()
baseMultiplier = { let v = d.double(forKey: "vis_base_mult"); return v > 0 ? v : 40.0 }()
baseMultiplier = { let v = d.double(forKey: "vis_base_mult"); return v > 0 ? v : 25.0 }()
depthOffset = { let v = d.double(forKey: "vis_depth_offset"); return v > 0 ? v : 15.0 }()
depthOpacity = { let v = d.double(forKey: "vis_depth_opacity"); return v > 0 ? v : 0.2 }()
idleAmplitude = { let v = d.double(forKey: "vis_idle_amp"); return v > 0 ? v : 0.03 }()
@ -266,7 +266,9 @@ struct MitsuhaVisualizerView: View {
let logIndex = log10(normalizedIndex * 9.0 + 1.0)
let centerBin = logIndex * Float(maxUsefulBin)
let binWidth = max(1.0, Float(maxUsefulBin) / Float(count) * logIndex)
// Bass bins are wider average more FFT bins for heavy, smooth low-end response
// matching original Mitsuha behaviour where bass rolls and treble reacts sharply
let binWidth = max(1.0, Float(maxUsefulBin) / Float(count))
let startBin = max(1, Int(centerBin - binWidth / 2))
let endBin = min(maxUsefulBin, Int(centerBin + binWidth / 2))
@ -279,9 +281,7 @@ struct MitsuhaVisualizerView: View {
}
let averageInBand = countInBand > 0 ? (sum / Float(countInBand)) : 0
let eqBoost: Float = 1.0 + (Float(i) / Float(count)) * 3.5
targetLevels[i] = min(1.0, averageInBand * Float(settings.baseMultiplier) * sens * eqBoost)
targetLevels[i] = min(1.0, averageInBand * Float(settings.baseMultiplier) * sens)
}
}
@ -295,14 +295,20 @@ struct MitsuhaVisualizerView: View {
// Dynamic Gain / Peak Follower
let frameMax = targetLevels.max() ?? 0.0
if frameMax > box.peakFollower {
box.peakFollower = box.peakFollower + (frameMax - box.peakFollower) * 0.3
} else {
box.peakFollower = max(box.peakFollower * 0.997, 0.005)
}
if settings.dynamicGainEnabled {
let normFactor = Float(1.0) / max(box.peakFollower, 0.005)
if frameMax > box.peakFollower {
// Fast attack catch loud transients in 3 frames
box.peakFollower = box.peakFollower + (frameMax - box.peakFollower) * 0.3
} else {
// Decay to 0.5 in ~2 seconds fast enough to track quiet passages
let decayPerSec: Float = 0.5
let decayThisFrame = pow(decayPerSec, dt)
box.peakFollower = max(box.peakFollower * decayThisFrame, 0.01)
}
let normFactor = Float(1.0) / max(box.peakFollower, 0.01)
targetLevels = targetLevels.map { min($0 * normFactor, 1.0) }
} else {
box.peakFollower = max(box.peakFollower, frameMax)
}
if box.displayLevels.count != count {
@ -893,11 +899,13 @@ struct VisualizerSettingsView: View {
private func resetDefaults() {
settings.enabled = true; settings.nowPlayingEnabled = true; settings.miniPlayerEnabled = true
settings.style = .wave; settings.numberOfPoints = 10; settings.sensitivity = 1.5
settings.fps = 60; settings.realAudioAnalysis = true; settings.dynamicGainEnabled = true
settings.fps = 60; settings.realAudioAnalysis = true; settings.dynamicGainEnabled = false
settings.waveOffsetTop = 0
settings.barSpacing = 5; settings.barCornerRadius = 0; settings.lineThickness = 5
settings.colorMode = .dynamic; settings.alpha = 0.6; settings.viscosity = 0.25
settings.frequencyCutoff = 80; settings.baseMultiplier = 40.0
// baseMultiplier reduced from 40 25 after removing eqBoost (no longer need
// to compensate for the treble under-amplification the boost was masking)
settings.frequencyCutoff = 80; settings.baseMultiplier = 25.0
settings.depthOffset = 15.0; settings.depthOpacity = 0.2; settings.idleAmplitude = 0.03
settings.waveStrokeThickness = 1.5; settings.nowPlayingHeightPct = 0.50; settings.miniPlayerHeight = 48.0
settings.miniOpacity = 0.5; settings.miniAmplitude = 0.7; settings.miniIdleAmplitude = 0.03

View file

@ -2,7 +2,7 @@ import Foundation
import AVFoundation
import Accelerate
/// Processes an entire audio file faster than real-time, producing per-frame FFT data
/// Processes an entire audio file faster than real-time, producing per-frame level data
/// that can be cached and played back in sync with the audio.
/// Also optionally extracts SmartDJ profile data (silence boundaries + LUFS) in the same pass.
actor OfflineAudioAnalyzer {
@ -26,25 +26,20 @@ actor OfflineAudioAnalyzer {
pointsCount: Int = 20,
fps: Double = 30.0,
cutoff: Int = 90,
eqBoostFactor: Float = 3.5,
progress: ProgressCallback? = nil
) throws -> [[Float]] {
let r = try analyzeWithSmartDJ(url: url, pointsCount: pointsCount, fps: fps,
cutoff: cutoff, eqBoostFactor: eqBoostFactor,
extractSmartDJ: false, progress: progress)
cutoff: cutoff, extractSmartDJ: false, progress: progress)
return r.visFrames
}
// MARK: - Combined pass: vis frames + SmartDJ profile in one file read
/// Reads the file once, producing visualiser frames AND silence/loudness data.
/// Set `extractSmartDJ: false` to skip the SmartDJ computation and save time.
func analyzeWithSmartDJ(
url: URL,
pointsCount: Int = 20,
fps: Double = 30.0,
cutoff: Int = 90,
eqBoostFactor: Float = 3.5,
extractSmartDJ: Bool = true,
progress: ProgressCallback? = nil
) throws -> CombinedResult {
@ -55,203 +50,281 @@ actor OfflineAudioAnalyzer {
let totalFrames = file.length
let durationSec = Double(totalFrames) / sampleRate
let audioFramesPerVisFrame = AVAudioFrameCount(sampleRate / fps)
// FFT parameters always 1024 regardless of fps
let fftSize = 1024
let bufferSize = max(AVAudioFrameCount(fftSize), audioFramesPerVisFrame)
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: bufferSize) else {
throw NSError(domain: "OfflineAnalyzer", code: 1, userInfo: [NSLocalizedDescriptionKey: "Failed to create buffer"])
}
let halfSize = fftSize / 2
let log2n = vDSP_Length(log2(Double(fftSize)))
guard let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) else {
throw NSError(domain: "OfflineAnalyzer", code: 2, userInfo: [NSLocalizedDescriptionKey: "Failed to create FFT setup"])
throw NSError(domain: "OfflineAnalyzer", code: 2,
userInfo: [NSLocalizedDescriptionKey: "Failed to create FFT setup"])
}
defer { vDSP_destroy_fftsetup(fftSetup) }
let halfSize = fftSize / 2
var visualizerData: [[Float]] = []
let estimatedVisFrames = Int(Double(totalFrames) / Double(audioFramesPerVisFrame))
visualizerData.reserveCapacity(estimatedVisFrames)
// Hann window
var window = [Float](repeating: 0, count: fftSize)
vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
// SmartDJ state
let silenceThreshold: Float = 0.008 // RMS below this = silence
var leadingSilenceEndSec: Double? = nil // first non-silent moment
var trailingSilenceStartSec: Double? = nil // last non-silent moment
var sumSquares: Double = 0.0
var sampleCount: Int64 = 0
var filePositionSec: Double { Double(file.framePosition) / sampleRate }
// How many AUDIO samples between each vis frame.
// This is independent of fftSize the FFT window always uses fftSize
// samples but hops forward by hopSize each frame, giving proper overlap
// when fps is high (hopSize < fftSize) without skipping samples when fps
// is low (hopSize > fftSize).
let hopSize = Int(max(1, sampleRate / fps))
var frameIndex = 0
// Read in chunks large enough to hold at least one full FFT window.
// Using 4× hopSize so we get several vis frames per disk read.
let readChunkSamples = max(fftSize * 2, hopSize * 4)
guard let readBuffer = AVAudioPCMBuffer(
pcmFormat: format,
frameCapacity: AVAudioFrameCount(readChunkSamples)
) else {
throw NSError(domain: "OfflineAnalyzer", code: 1,
userInfo: [NSLocalizedDescriptionKey: "Failed to create buffer"])
}
// Ring buffer: always holds the last `fftSize` samples plus one chunk ahead
let ringCapacity = readChunkSamples + fftSize
var ring = [Float](repeating: 0, count: ringCapacity)
var ringHead = 0 // next write position (mod ringCapacity)
var totalSamplesInRing = 0 // total samples ever written to ring
let estimatedFrames = Int(durationSec * fps) + 1
var rawFrames: [[Float]] = []
rawFrames.reserveCapacity(estimatedFrames)
// SmartDJ accumulators
let silenceThreshold: Float = 0.008
var leadingSilenceEndSec: Double? = nil
var trailingSilenceStartSec: Double? = nil
var sumSquares: Double = 0
var sampleCountLUFS: Int64 = 0
// Sliding window state
var nextFrameSample = 0 // the audio sample index at which to take the next vis frame
while file.framePosition < totalFrames {
let framesToRead = min(bufferSize, AVAudioFrameCount(totalFrames - file.framePosition))
buffer.frameLength = 0
try file.read(into: buffer, frameCount: framesToRead)
let toRead = min(AVAudioFrameCount(readChunkSamples),
AVAudioFrameCount(totalFrames - file.framePosition))
readBuffer.frameLength = 0
try file.read(into: readBuffer, frameCount: toRead)
let chunkStart = Int(file.framePosition) - Int(readBuffer.frameLength)
let chunkLen = Int(readBuffer.frameLength)
guard chunkLen > 0, let ch = readBuffer.floatChannelData?[0] else { continue }
guard let channelData = buffer.floatChannelData?[0] else { continue }
let actualFrames = Int(buffer.frameLength)
let chunkStartSec = filePositionSec - Double(actualFrames) / sampleRate
// Write chunk into ring buffer
for i in 0..<chunkLen {
ring[ringHead] = ch[i]
ringHead = (ringHead + 1) % ringCapacity
}
totalSamplesInRing += chunkLen
// SmartDJ: RMS per chunk
if extractSmartDJ && actualFrames > 0 {
// SmartDJ RMS & loudness over this chunk
if extractSmartDJ {
var rms: Float = 0
vDSP_rmsqv(channelData, 1, &rms, vDSP_Length(actualFrames))
vDSP_rmsqv(ch, 1, &rms, vDSP_Length(chunkLen))
let chunkSec = Double(chunkStart) / sampleRate
if rms > silenceThreshold {
if leadingSilenceEndSec == nil {
leadingSilenceEndSec = chunkStartSec
}
trailingSilenceStartSec = chunkStartSec + Double(actualFrames) / sampleRate
if leadingSilenceEndSec == nil { leadingSilenceEndSec = chunkSec }
trailingSilenceStartSec = chunkSec + Double(chunkLen) / sampleRate
}
// Accumulate for integrated loudness
var sumSq: Float = 0
vDSP_measqv(channelData, 1, &sumSq, vDSP_Length(actualFrames))
sumSquares += Double(sumSq) * Double(actualFrames)
sampleCount += Int64(actualFrames)
vDSP_measqv(ch, 1, &sumSq, vDSP_Length(chunkLen))
sumSquares += Double(sumSq) * Double(chunkLen)
sampleCountLUFS += Int64(chunkLen)
}
// Visualiser FFT frames
guard actualFrames >= fftSize else {
if actualFrames > 0 {
visualizerData.append(processFFTFrame(
channelData: channelData, frameCount: actualFrames,
fftSize: fftSize, halfSize: halfSize, window: window,
fftSetup: fftSetup, pointsCount: pointsCount,
cutoff: cutoff, eqBoostFactor: eqBoostFactor))
// Generate vis frames for all frame positions inside this chunk
let chunkEnd = chunkStart + chunkLen
while nextFrameSample < chunkEnd {
// We need fftSize samples ending at nextFrameSample + fftSize/2
// (centre the FFT window on the frame position for better transient response)
let windowStart = nextFrameSample - fftSize / 2
let windowEnd = windowStart + fftSize
// Skip if we don't have enough samples yet
guard windowEnd <= Int(file.framePosition) else { break }
guard windowStart >= 0 else {
nextFrameSample += hopSize
continue
}
break
}
var sampleOffset = 0
while sampleOffset + fftSize <= actualFrames {
visualizerData.append(processFFTFrame(
channelData: channelData.advanced(by: sampleOffset),
frameCount: fftSize, fftSize: fftSize, halfSize: halfSize,
window: window, fftSetup: fftSetup, pointsCount: pointsCount,
cutoff: cutoff, eqBoostFactor: eqBoostFactor))
sampleOffset += Int(audioFramesPerVisFrame)
frameIndex += 1
if frameIndex % 50 == 0 {
progress?(Float(file.framePosition) / Float(totalFrames))
// Extract fftSize samples from ring buffer
// The ring buffer contains samples [totalSamplesInRing-ringCapacity ... totalSamplesInRing]
// (clamped to what we've written so far)
let ringTail = totalSamplesInRing - ringCapacity
guard windowStart >= ringTail else {
nextFrameSample += hopSize
continue
}
var windowSamples = [Float](repeating: 0, count: fftSize)
for j in 0..<fftSize {
let absIdx = windowStart + j
let ringIdx = (ringHead - (totalSamplesInRing - absIdx) + ringCapacity * 1000) % ringCapacity
windowSamples[j] = ring[ringIdx]
}
let frame = computeFFTFrame(
samples: windowSamples,
fftSize: fftSize, halfSize: halfSize,
window: window, fftSetup: fftSetup,
pointsCount: pointsCount, cutoff: cutoff
)
rawFrames.append(frame)
nextFrameSample += hopSize
if rawFrames.count % 100 == 0 {
progress?(Float(nextFrameSample) / Float(totalFrames))
}
}
}
progress?(1.0)
// Compute approximate integrated LUFS
// Uses mean square dBFS as a simplified approximation of BS.1770.
// Not true K-weighted LUFS but accurate enough for volume normalisation.
// Normalize frames to 01 using 95th-percentile peak
// This ensures consistent amplitude across songs regardless of mastering
// level, and makes pre-analyzed playback feel identical to the live FFT
// path which also normalises against a peak follower.
let normalized = normalizeFrames(rawFrames)
// Apply per-frame temporal smoothing
// The live path smooths in updateDisplayLevels. Pre-analyzed frames need
// the same treatment baked in so playback isn't jittery.
let viscosity: Float = 0.25 // matches default VisualizerSettings.viscosity
let smoothed = smoothFrames(normalized, viscosity: viscosity)
// LUFS
var loudnessLUFS: Double? = nil
if extractSmartDJ && sampleCount > 0 {
let meanSquare = sumSquares / Double(sampleCount)
if extractSmartDJ && sampleCountLUFS > 0 {
let meanSquare = sumSquares / Double(sampleCountLUFS)
if meanSquare > 0 {
let lufs = 20.0 * log10(sqrt(meanSquare))
loudnessLUFS = lufs
loudnessLUFS = 20.0 * log10(sqrt(meanSquare))
}
}
// Guard silence detections: must be within plausible range
// Silence guard
let safeLeading: Double? = {
guard let t = leadingSilenceEndSec, t > 0.05, t < durationSec * 0.25 else { return nil }
return t
}()
let safeTrailing: Double? = {
guard let t = trailingSilenceStartSec, t < durationSec - 0.5, t > durationSec * 0.5 else { return nil }
guard let t = trailingSilenceStartSec,
t < durationSec - 0.5, t > durationSec * 0.5 else { return nil }
return t
}()
return CombinedResult(
visFrames: visualizerData,
visFrames: smoothed,
silenceEnd: safeLeading,
silenceStart: safeTrailing,
loudnessLUFS: loudnessLUFS
)
}
/// Process a single FFT frame from raw audio samples
private func processFFTFrame(
channelData: UnsafePointer<Float>,
frameCount: Int,
// MARK: - FFT Frame
private func computeFFTFrame(
samples: [Float],
fftSize: Int,
halfSize: Int,
window: [Float],
fftSetup: FFTSetup,
pointsCount: Int,
cutoff: Int,
eqBoostFactor: Float
cutoff: Int
) -> [Float] {
let n = min(frameCount, fftSize)
// 1. Apply Hann window
// Apply Hann window
var windowed = [Float](repeating: 0, count: fftSize)
if n < fftSize {
// Zero-pad if short
for i in 0..<n { windowed[i] = channelData[i] * window[i] }
} else {
vDSP_vmul(channelData, 1, window, 1, &windowed, 1, vDSP_Length(fftSize))
}
// 2. FFT
vDSP_vmul(samples, 1, window, 1, &windowed, 1, vDSP_Length(fftSize))
// FFT
var realp = [Float](repeating: 0, count: halfSize)
var imagp = [Float](repeating: 0, count: halfSize)
var magnitudes = [Float](repeating: 0, count: halfSize)
realp.withUnsafeMutableBufferPointer { realpBuf in
imagp.withUnsafeMutableBufferPointer { imagpBuf in
var splitComplex = DSPSplitComplex(
realp: realpBuf.baseAddress!,
imagp: imagpBuf.baseAddress!
)
realp.withUnsafeMutableBufferPointer { rb in
imagp.withUnsafeMutableBufferPointer { ib in
var sc = DSPSplitComplex(realp: rb.baseAddress!, imagp: ib.baseAddress!)
windowed.withUnsafeBytes { raw in
let ptr = raw.bindMemory(to: DSPComplex.self).baseAddress!
vDSP_ctoz(ptr, 2, &splitComplex, 1, vDSP_Length(halfSize))
vDSP_ctoz(raw.bindMemory(to: DSPComplex.self).baseAddress!,
2, &sc, 1, vDSP_Length(halfSize))
}
vDSP_fft_zrip(fftSetup, &splitComplex, 1, vDSP_Length(log2(Double(fftSize))), FFTDirection(FFT_FORWARD))
vDSP_zvmags(&splitComplex, 1, &magnitudes, 1, vDSP_Length(halfSize))
let log2n = vDSP_Length(log2(Double(fftSize)))
vDSP_fft_zrip(fftSetup, &sc, 1, log2n, FFTDirection(FFT_FORWARD))
vDSP_zvmags(&sc, 1, &magnitudes, 1, vDSP_Length(halfSize))
}
}
// 3. Normalize
let fftSizeF = Float(fftSize)
var scale: Float = 1.0 / (fftSizeF * fftSizeF)
// Normalize: divide by N², then sqrt for perceptual amplitude
let n2 = Float(fftSize) * Float(fftSize)
var scale = 1.0 / n2
vDSP_vsmul(magnitudes, 1, &scale, &magnitudes, 1, vDSP_Length(halfSize))
// sqrt for perceptual amplitude
for i in 0..<halfSize {
magnitudes[i] = sqrt(magnitudes[i])
}
// 4. Logarithmic binning with EQ boost
var framePoints = [Float](repeating: 0, count: pointsCount)
let maxUsefulBin = min(halfSize - 1, cutoff)
for i in 0..<halfSize { magnitudes[i] = sqrt(magnitudes[i]) }
// Log-spaced binning uniform bin width, matching the fixed live-path binning.
// No eqBoost: frequency bands are weighted equally, matching the original Mitsuha behaviour.
var frame = [Float](repeating: 0, count: pointsCount)
let maxBin = min(halfSize - 1, cutoff)
let uniformBinWidth = max(1, maxBin / pointsCount)
for i in 0..<pointsCount {
let normalizedIndex = Float(i + 1) / Float(pointsCount)
let logIndex = log10(normalizedIndex * 9.0 + 1.0)
let centerBin = logIndex * Float(maxUsefulBin)
let binWidth = max(1.0, Float(maxUsefulBin) / Float(pointsCount) * logIndex)
let startBin = max(1, Int(centerBin - binWidth / 2))
let endBin = min(maxUsefulBin, Int(centerBin + binWidth / 2))
let nIdx = Float(i + 1) / Float(pointsCount)
let logIdx = log10(nIdx * 9.0 + 1.0) // 01 log-spaced
let centerBin = Int(logIdx * Float(maxBin))
let lo = max(1, centerBin - uniformBinWidth / 2)
let hi = min(maxBin, centerBin + uniformBinWidth / 2)
var sum: Float = 0
var countInBand = 0
for j in startBin...endBin where j < magnitudes.count {
var count = 0
for j in lo...hi where j < magnitudes.count {
sum += magnitudes[j]
countInBand += 1
count += 1
}
let average = countInBand > 0 ? (sum / Float(countInBand)) : 0
let eqBoost: Float = 1.0 + (Float(i) / Float(pointsCount)) * eqBoostFactor
framePoints[i] = average * eqBoost
frame[i] = count > 0 ? sum / Float(count) : 0
}
return framePoints
return frame
}
// MARK: - Post-processing
/// Normalize all frames so the 95th-percentile peak maps to 0.8.
/// This keeps loud transients visible without clipping, and ensures a quiet
/// song fills the visualizer at the same apparent height as a loud one.
private func normalizeFrames(_ frames: [[Float]]) -> [[Float]] {
guard !frames.isEmpty else { return frames }
// Collect all non-zero values to find the 95th percentile
var allValues: [Float] = []
allValues.reserveCapacity(frames.count * (frames.first?.count ?? 1))
for frame in frames {
for v in frame where v > 0 { allValues.append(v) }
}
guard !allValues.isEmpty else { return frames }
allValues.sort()
let p95idx = min(Int(Float(allValues.count) * 0.95), allValues.count - 1)
let p95 = allValues[p95idx]
guard p95 > 0 else { return frames }
let scale = 0.8 / p95
return frames.map { frame in frame.map { min(1.0, $0 * scale) } }
}
/// Bake temporal smoothing into the frames so pre-analyzed playback
/// looks identical to the live FFT path (which smooths in updateDisplayLevels).
private func smoothFrames(_ frames: [[Float]], viscosity: Float) -> [[Float]] {
guard frames.count > 1 else { return frames }
var result = frames
var prev = frames[0]
for i in 1..<frames.count {
var smoothed = [Float](repeating: 0, count: prev.count)
let n = min(prev.count, frames[i].count)
for j in 0..<n {
smoothed[j] = prev[j] + (frames[i][j] - prev[j]) * viscosity
}
result[i] = smoothed
prev = smoothed
}
return result
}
}