package utils
import (
"image"
"math"
"strings"
"sync"
"github.com/madelynnblue/go-dsp/window"
)
var (
hannCache = map[int][]float64{}
hannCacheMu sync.RWMutex
)
func getCachedHannWindow(size int) []float64 {
hannCacheMu.RLock()
if w, ok := hannCache[size]; ok {
hannCacheMu.RUnlock()
return w
}
hannCacheMu.RUnlock()
hannCacheMu.Lock()
defer hannCacheMu.Unlock()
if w, ok := hannCache[size]; ok {
return w
}
w := window.Hann(size)
hannCache[size] = w
return w
}
const DefaultMaxSampleRate = 16000
type SpectrogramConfig struct {
WindowSize int HopSize int SampleRate int }
func DefaultSpectrogramConfig(sampleRate int) SpectrogramConfig {
return SpectrogramConfig{
WindowSize: 512,
HopSize: 256, SampleRate: sampleRate,
}
}
func GenerateSpectrogram(samples []float64, cfg SpectrogramConfig) [][]uint8 {
if len(samples) < cfg.WindowSize {
return nil
}
hannWindow := getCachedHannWindow(cfg.WindowSize)
numFrames := (len(samples)-cfg.WindowSize)/cfg.HopSize + 1
if numFrames <= 0 {
return nil
}
numFreqBins := cfg.WindowSize/2 + 1
powerFlat := make([]float64, numFreqBins*numFrames)
frameData := make([]float64, cfg.WindowSize)
scratch := make([]complex128, cfg.WindowSize)
framePower := make([]float64, numFreqBins)
for frame := range numFrames {
start := frame * cfg.HopSize
for i := 0; i < cfg.WindowSize; i++ {
frameData[i] = samples[start+i] * hannWindow[i]
}
PowerSpectrumFFT(frameData, framePower, scratch)
for bin := range numFreqBins {
powerFlat[bin*numFrames+frame] = framePower[bin]
}
}
return normalizeFlat(powerFlat, numFreqBins, numFrames)
}
func normalizeFlat(power []float64, rows, cols int) [][]uint8 {
if rows == 0 || cols == 0 {
return nil
}
minNonZero := math.MaxFloat64
for _, val := range power {
if val > 0 && val < minNonZero {
minNonZero = val
}
}
if minNonZero == math.MaxFloat64 {
minNonZero = 1e-20 }
minDB := math.MaxFloat64
maxDB := -math.MaxFloat64
for i, val := range power {
if val <= 0 {
val = minNonZero
}
db := 10.0 * math.Log10(val)
power[i] = db
if db < minDB {
minDB = db
}
if db > maxDB {
maxDB = db
}
}
rangeDB := maxDB - minDB
if rangeDB == 0 {
rangeDB = 1
}
scale := 255.0 / rangeDB
resultFlat := make([]uint8, rows*cols)
result := make([][]uint8, rows)
for i := range result {
srcRow := rows - 1 - i
result[i] = resultFlat[i*cols : (i+1)*cols]
srcOff := srcRow * cols
for j := range cols {
result[i][j] = uint8((power[srcOff+j] - minDB) * scale)
}
}
return result
}
func ExtractSegmentSamples(samples []float64, sampleRate int, startSec, endSec float64) []float64 {
startIdx := int(startSec * float64(sampleRate))
endIdx := int(endSec * float64(sampleRate))
if startIdx < 0 {
startIdx = 0
}
if endIdx > len(samples) {
endIdx = len(samples)
}
if startIdx >= endIdx {
return nil
}
return samples[startIdx:endIdx]
}
func GenerateSegmentSpectrogram(dataFilePath string, startTime, endTime float64, color bool, imgSize int) (image.Image, error) {
wavPath := strings.TrimSuffix(dataFilePath, ".data")
segSamples, sampleRate, err := ReadWAVSegmentSamples(wavPath, startTime, endTime)
if err != nil {
return nil, err
}
if len(segSamples) == 0 {
return nil, nil
}
spectSampleRate := sampleRate
if sampleRate > DefaultMaxSampleRate {
segSamples = ResampleRate(segSamples, sampleRate, DefaultMaxSampleRate)
spectSampleRate = DefaultMaxSampleRate
}
config := DefaultSpectrogramConfig(spectSampleRate)
spectrogram := GenerateSpectrogram(segSamples, config)
if spectrogram == nil {
return nil, nil
}
var img image.Image
if color {
colorData := ApplyL4Colormap(spectrogram)
img = CreateRGBImage(colorData)
} else {
img = CreateGrayscaleImage(spectrogram)
}
if img == nil {
return nil, nil
}
imgSize = ClampImageSize(imgSize)
return ResizeImage(img, imgSize, imgSize), nil
}