LQLC7S3ADBR4O2JYVUSQJD65U3HG4ADOQBGB4F7KQCXUMNKMNEKAC // ReadWAVSamples reads audio samples from a WAV file and returns them as float64.// Mono files: returns single channel.// Stereo files: returns left channel only.// Samples are normalized to the range -1.0 to 1.0.func ReadWAVSamples(filepath string) ([]float64, int, error) {
// ReadWAVSegmentSamples reads a specific time range of audio samples from a WAV file.// If startSec < 0, it starts from 0.// If endSec <= 0 or endSec > duration, it reads to the end.func ReadWAVSegmentSamples(filepath string, startSec, endSec float64) ([]float64, int, error) {
// Read audio dataif _, err := file.Seek(dataOffset, 0); err != nil {return nil, 0, fmt.Errorf("failed to seek to data: %w", err)
bytesPerSample := bitsPerSample / 8blockAlign := bytesPerSample * channelsstartOffset := int64(0)var readSize int64if startSec > 0 {startSample := int64(startSec * float64(sampleRate))startOffset = startSample * int64(blockAlign)if startOffset > dataSize {startOffset = dataSize}
audioData := make([]byte, dataSize)
if endSec > 0 {endSample := int64(endSec * float64(sampleRate))endOffset := endSample * int64(blockAlign)if endOffset > dataSize {endOffset = dataSize}if endOffset > startOffset {readSize = endOffset - startOffset} else {readSize = 0}} else {readSize = dataSize - startOffset}if readSize == 0 {return []float64{}, sampleRate, nil}if _, err := file.Seek(dataOffset+startOffset, io.SeekStart); err != nil {return nil, 0, fmt.Errorf("failed to seek to data segment: %w", err)}audioData := make([]byte, readSize)
// ReadWAVSamples reads audio samples from a WAV file and returns them as float64.// Mono files: returns single channel.// Stereo files: returns left channel only.// Samples are normalized to the range -1.0 to 1.0.func ReadWAVSamples(filepath string) ([]float64, int, error) {return ReadWAVSegmentSamples(filepath, 0, 0)}
}return nil}// GetDatasetType returns the type of a dataset// Returns: (type, exists, error)func GetDatasetType(db *sql.DB, datasetID string) (string, bool, error) {var datasetType stringerr := db.QueryRow("SELECT type FROM dataset WHERE id = ?", datasetID).Scan(&datasetType)if err == sql.ErrNoRows {return "", false, nil}if err != nil {return "", false, err}return datasetType, true, nil}// ValidateDatasetTypeForImport checks that a dataset is 'structured' type for file imports// Returns error if dataset doesn't exist or is not 'structured'func ValidateDatasetTypeForImport(db *sql.DB, datasetID string) error {datasetType, exists, err := GetDatasetType(db, datasetID)if err != nil {return fmt.Errorf("failed to query dataset type: %w", err)}if !exists {return fmt.Errorf("dataset not found: %s", datasetID)}if datasetType != "structured" {return fmt.Errorf("dataset '%s' is type '%s' - file imports only support 'structured' datasets", datasetID, datasetType)}return nil}// ValidateDatasetTypeUnstructured checks that a dataset is 'unstructured' type// Returns error if dataset doesn't exist or is not 'unstructured'func ValidateDatasetTypeUnstructured(db *sql.DB, datasetID string) error {datasetType, exists, err := GetDatasetType(db, datasetID)if err != nil {return fmt.Errorf("failed to query dataset type: %w", err)}if !exists {return fmt.Errorf("dataset not found: %s", datasetID)}if datasetType != "unstructured" {return fmt.Errorf("dataset '%s' is type '%s' - this command only supports 'unstructured' datasets", datasetID, datasetType)}return nil}// ValidateLocationBelongsToDataset checks that a location belongs to a specific dataset// Returns error if location doesn't exist or belongs to a different datasetfunc ValidateLocationBelongsToDataset(db *sql.DB, locationID, datasetID string) error {var locationDatasetID stringerr := db.QueryRow("SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)if err == sql.ErrNoRows {return fmt.Errorf("location not found or inactive: %s", locationID)}if err != nil {return fmt.Errorf("failed to query location: %w", err)}if locationDatasetID != datasetID {return fmt.Errorf("location %s does not belong to dataset %s", locationID, datasetID)
query := `SELECT label FROM species WHERE label IN (` + Placeholders(len(speciesLabels)) + `) AND active = true`
query := `SELECT label FROM species WHERE label IN (` + db.Placeholders(len(speciesLabels)) + `) AND active = true`
query := `SELECT id, name FROM filter WHERE name IN (` + utils.Placeholders(len(names)) + `) AND active = true`
query := `SELECT id, name FROM filter WHERE name IN (` + db.Placeholders(len(names)) + `) AND active = true`
query := `SELECT id, label FROM species WHERE label IN (` + utils.Placeholders(len(dbSpeciesList)) + `) AND active = true`
query := `SELECT id, label FROM species WHERE label IN (` + db.Placeholders(len(dbSpeciesList)) + `) AND active = true`
Here is a brief report based on a comprehensive review of the utils directory.### 1. Duplicated Functionality- Audio Processing (Float64 to 16-bit PCM): wav_writer.go and audio_player.go contain identical,duplicated logic for converting float64 arrays into int16 LittleEndian bytes (including the [-1.0,1.0] bounds clamping and * 32767 scaling).- Timestamp Resolution Strategy: cluster_import.go (batchProcessFiles) manually reimplements theentire fallback chain for timestamp resolution (AudioMoth → Filename → File Modification Time). Thisexact logic is already neatly abstracted in file_import.go as ResolveTimestamp().- Directory Scanning: cluster_import.go implements its own scanClusterFiles to find .wav files, whiledata_file.go implements FindDataFiles to find .data files. These could be consolidated into ageneric, reusable directory walker.### 2. Untested CodeOverall statement coverage for utils is at 46.5%. The following areas are completely untested (0%coverage):- Files with 0% coverage:- audio_player.go (Audio playback and context management)- config.go (JSON config file loading/parsing)- cluster_import.go (The core batch import logic and database transactions)- spectrogram.go (FFT operations, windowing, and image rendering)- wav_writer.go (WAV encoding logic)- Untested Critical Functions in otherwise tested files:- file_import.go: ProcessSingleFile, CheckDuplicateHash- mapping.go: ValidateMappingAgainstDB, Classify, ValidateCoversSpecies, Classes, Placeholders- validation.go: GetDatasetType, ValidateDatasetTypeForImport, ValidateDatasetTypeUnstructured,ValidateLocationBelongsToDataset### 3. Inconsistent Standards- Memory Inefficiency in Spectrograms: In spectrogram.go, GenerateSegmentSpectrogram loads the entireWAV file into memory using ReadWAVSamples(wavPath) before calling ExtractSegmentSamples. If a userrequests a 3-second segment from a 500MB continuous recording, the process will unnecessarilyallocate the whole file.- Path Construction: In data_file.go (FindDataFiles), paths are constructed using stringconcatenation (folder+"/"+name) instead of standard library utilities (filepath.Join), which ishandled correctly elsewhere in the codebase.- Separation of Concerns (DB vs Utils): Pure utility files are tightly coupled with the database. Forexample, validation.go mixes pure string/numeric assertions (ValidateShortID) with stateful databasequeries (e.g. ValidateLocationBelongsToDataset). Similarly, mapping.go containsValidateMappingAgainstDB. These queries belong in a db package or should rely on injected interfacesrather than hardcoding *sql.DB dependencies into utils/.- Misplaced Helpers: The SQL utility function Placeholders(n int) is randomly declared insidemapping.go instead of a dedicated database or query utility file.
package dbimport ("database/sql""fmt")// GetDatasetType returns the type of a dataset// Returns: (type, exists, error)func GetDatasetType(database *sql.DB, datasetID string) (string, bool, error) {var datasetType stringerr := database.QueryRow("SELECT type FROM dataset WHERE id = ?", datasetID).Scan(&datasetType)if err == sql.ErrNoRows {return "", false, nil}if err != nil {return "", false, err}return datasetType, true, nil}// ValidateDatasetTypeForImport checks that a dataset is 'structured' type for file imports// Returns error if dataset doesn't exist or is not 'structured'func ValidateDatasetTypeForImport(database *sql.DB, datasetID string) error {datasetType, exists, err := GetDatasetType(database, datasetID)if err != nil {return fmt.Errorf("failed to query dataset type: %w", err)}if !exists {return fmt.Errorf("dataset not found: %s", datasetID)}if datasetType != "structured" {return fmt.Errorf("dataset '%s' is type '%s' - file imports only support 'structured' datasets", datasetID, datasetType)}return nil}// ValidateDatasetTypeUnstructured checks that a dataset is 'unstructured' type// Returns error if dataset doesn't exist or is not 'unstructured'func ValidateDatasetTypeUnstructured(database *sql.DB, datasetID string) error {datasetType, exists, err := GetDatasetType(database, datasetID)if err != nil {return fmt.Errorf("failed to query dataset type: %w", err)}if !exists {return fmt.Errorf("dataset not found: %s", datasetID)}if datasetType != "unstructured" {return fmt.Errorf("dataset '%s' is type '%s' - this command only supports 'unstructured' datasets", datasetID, datasetType)}return nil}// ValidateLocationBelongsToDataset checks that a location belongs to a specific dataset// Returns error if location doesn't exist or belongs to a different datasetfunc ValidateLocationBelongsToDataset(database *sql.DB, locationID, datasetID string) error {var locationDatasetID stringerr := database.QueryRow("SELECT dataset_id FROM location WHERE id = ? AND active = true", locationID).Scan(&locationDatasetID)if err == sql.ErrNoRows {return fmt.Errorf("location not found or inactive: %s", locationID)}if err != nil {return fmt.Errorf("failed to query location: %w", err)}if locationDatasetID != datasetID {return fmt.Errorf("location %s does not belong to dataset %s", locationID, datasetID)}return nil}
package dbimport "strings"// Placeholders generates SQL placeholder string for IN clausesfunc Placeholders(n int) string {if n == 0 {return ""}ph := make([]string, n)for i := range ph {ph[i] = "?"}return strings.Join(ph, ", ")}