JAT3DXOLENZZGXE2NYFF3TVQAQIXMMNYO234ETKQGC2CRHJVZERQC I4CMOMXFJ3Y4AY5LPA7MDLWVHJ674IRFYLXCEXCC5ZARLCWSKCAAC SMWSHUOWFAP4GURQK3IBTSCIASVETCKBJWZLZKHH4WVOCRWW3BRAC KZKLAINJJWZ64T5MUZT34LJVQIKBTKZ6EJGD7C7TTSSDGCHEDPMAC 54GPBNIXPKRBLD6GS4W6PTZTSQEO4M5HFEGMCVMBZSUFB57VGODAC GPQSOVBPY7VTPHD75R6VWSNITPOL3AECF4DHJB32MF5Z72NV7YMQC BZ6KQRYDMP4PWYJRL62XXIUXLTBKEASIKSAJIQPZS6DKDSYKA76QC YE6BZJUKQ7VMYEKKI3WSKTZEBR5NWUUDIN6PGE4W7OTPIY5N3NJQC if species != "" && label.Species != species {continue
}return false}// labelMatchesFilters checks if a single label matches all filter criteria.func labelMatchesFilters(label *Label, filter, species, callType string, certainty int) bool {if filter != "" && label.Filter != filter {return false}if species != "" && label.Species != species {return false}if callType == CallTypeNone {if label.CallType != "" {return false
if callType == CallTypeNone {if label.CallType != "" {continue}} else if callType != "" && label.CallType != callType {continue}if certainty >= 0 && label.Certainty != certainty {continue}return true
} else if callType != "" && label.CallType != callType {return false}if certainty >= 0 && label.Certainty != certainty {return false
}// Get WAV pathwavPath := strings.TrimSuffix(df.FilePath, ".data")// Get basename without path and extensionbasename := wavPath[strings.LastIndex(wavPath, "/")+1:]basename = strings.TrimSuffix(basename, ".wav")// Calculate integer times for filenamestartInt := int(seg.StartTime)endInt := int(seg.EndTime)if seg.EndTime > float64(endInt) {endInt++ // ceil
return fmt.Errorf("failed to get working directory: %w", err)}baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)pngPath := filepath.Join(cwd, baseName+".png")wavOutPath := filepath.Join(cwd, baseName+".wav")// Check if files already existif _, err := os.Stat(pngPath); err == nil {return fmt.Errorf("file already exists: %s", pngPath)}if _, err := os.Stat(wavOutPath); err == nil {return fmt.Errorf("file already exists: %s", wavOutPath)
return err
// Generate spectrogram (224px, color)config := utils.DefaultSpectrogramConfig(outputSampleRate)spectrogram := utils.GenerateSpectrogram(segSamples, config)if spectrogram == nil {return fmt.Errorf("failed to generate spectrogram")
// Generate spectrogram imageresized, err := generateClipSpectrogram(segSamples, outputSampleRate)if err != nil {return err
colorData := utils.ApplyL4Colormap(spectrogram)img := utils.CreateRGBImage(colorData)if img == nil {return fmt.Errorf("failed to create image")
// Write output filesif err := writeClipPNG(resized, pngPath); err != nil {return err}if err := utils.WriteWAVFile(wavOutPath, segSamples, outputSampleRate); err != nil {return fmt.Errorf("failed to write WAV: %w", err)
}return nil}// buildClipPaths constructs output file paths for a clip and checks they don't already exist.func buildClipPaths(df *utils.DataFile, seg *utils.Segment, prefix string) (pngPath, wavOutPath string, err error) {wavPath := strings.TrimSuffix(df.FilePath, ".data")basename := wavPath[strings.LastIndex(wavPath, "/")+1:]basename = strings.TrimSuffix(basename, ".wav")startInt := int(seg.StartTime)endInt := int(seg.EndTime)if seg.EndTime > float64(endInt) {endInt++}cwd, err := os.Getwd()if err != nil {return "", "", fmt.Errorf("failed to get working directory: %w", err)
// Write WAVif err := utils.WriteWAVFile(wavOutPath, segSamples, outputSampleRate); err != nil {return fmt.Errorf("failed to write WAV: %w", err)
baseName := fmt.Sprintf("%s_%s_%d_%d", prefix, basename, startInt, endInt)pngPath = filepath.Join(cwd, baseName+".png")wavOutPath = filepath.Join(cwd, baseName+".wav")if _, err := os.Stat(pngPath); err == nil {return "", "", fmt.Errorf("file already exists: %s", pngPath)}if _, err := os.Stat(wavOutPath); err == nil {return "", "", fmt.Errorf("file already exists: %s", wavOutPath)}return pngPath, wavOutPath, nil}// generateClipSpectrogram generates a 224px color spectrogram image from audio samples.func generateClipSpectrogram(segSamples []float64, sampleRate int) (image.Image, error) {config := utils.DefaultSpectrogramConfig(sampleRate)spectrogram := utils.GenerateSpectrogram(segSamples, config)if spectrogram == nil {return nil, fmt.Errorf("failed to generate spectrogram")}colorData := utils.ApplyL4Colormap(spectrogram)img := utils.CreateRGBImage(colorData)if img == nil {return nil, fmt.Errorf("failed to create image")
return output, err
return err}return utils.ValidatePositive(*input.SleepSeconds, "sleep_seconds")}// findExistingPattern checks if an active pattern with the given record/sleep times exists.func findExistingPattern(ctx context.Context, tx *db.LoggedTx, recordSeconds, sleepSeconds int) (db.CyclicRecordingPattern, bool, error) {var existingID stringerr := tx.QueryRowContext(ctx,"SELECT id FROM cyclic_recording_pattern WHERE record_s = ? AND sleep_s = ? AND active = true",recordSeconds, sleepSeconds,).Scan(&existingID)if err == sql.ErrNoRows {return db.CyclicRecordingPattern{}, false, nil}if err != nil {return db.CyclicRecordingPattern{}, false, err}var pattern db.CyclicRecordingPatternerr = tx.QueryRowContext(ctx,"SELECT id, record_s, sleep_s, created_at, last_modified, active FROM cyclic_recording_pattern WHERE id = ?",existingID,).Scan(&pattern.ID, &pattern.RecordS, &pattern.SleepS, &pattern.CreatedAt, &pattern.LastModified, &pattern.Active)if err != nil {return db.CyclicRecordingPattern{}, false, fmt.Errorf("failed to fetch existing pattern: %w", err)}return pattern, true, nil}// validateUpdatePatternInput validates fields for pattern update.func validateUpdatePatternInput(input PatternInput) error {if err := utils.ValidateShortID(*input.ID, "pattern_id"); err != nil {return err}if input.RecordSeconds != nil {if err := utils.ValidatePositive(*input.RecordSeconds, "record_seconds"); err != nil {return err}
if err := utils.ValidatePositive(*input.SleepSeconds, "sleep_seconds"); err != nil {
if input.SleepSeconds != nil {if err := utils.ValidateNonNegative(*input.SleepSeconds, "sleep_seconds"); err != nil {return err}}return nil}func createPattern(ctx context.Context, input PatternInput) (PatternOutput, error) {var output PatternOutputif err := validateCreatePatternInput(input); err != nil {
var existingID stringerr = tx.QueryRowContext(ctx,"SELECT id FROM cyclic_recording_pattern WHERE record_s = ? AND sleep_s = ? AND active = true",*input.RecordSeconds, *input.SleepSeconds,).Scan(&existingID)if err == nil {// Pattern already exists, return it instead of creating duplicatevar pattern db.CyclicRecordingPatternerr = tx.QueryRowContext(ctx,"SELECT id, record_s, sleep_s, created_at, last_modified, active FROM cyclic_recording_pattern WHERE id = ?",existingID,).Scan(&pattern.ID, &pattern.RecordS, &pattern.SleepS, &pattern.CreatedAt, &pattern.LastModified, &pattern.Active)if err != nil {return output, fmt.Errorf("failed to fetch existing pattern: %w", err)}
if existing, found, err := findExistingPattern(ctx, tx, *input.RecordSeconds, *input.SleepSeconds); err != nil {return output, fmt.Errorf("failed to check for existing pattern: %w", err)} else if found {
// Validate fields if providedif input.RecordSeconds != nil {if err := utils.ValidatePositive(*input.RecordSeconds, "record_seconds"); err != nil {return output, err}}if input.SleepSeconds != nil {if err := utils.ValidateNonNegative(*input.SleepSeconds, "sleep_seconds"); err != nil {return output, err}}
package toolsimport ("fmt""os""path/filepath""sort""sync/atomic")// parallelResult is the common interface for birda/raven worker results.type parallelResult interface {filePath() stringgetCalls() []ClusteredCallwasWritten() boolwasSkipped() boolgetError() error}// aggregateStats holds the collected results from a parallel fan-out/fan-in.type aggregateStats struct {calls []ClusteredCallspeciesCount map[string]intdataFilesWritten intdataFilesSkipped intfilesProcessed intfilesDeleted intfirstErr error}// aggregateResults collects results from a channel of parallelResult values,// handling error tracking, species counting, optional file deletion, and// progress reporting. Returns the aggregated stats.func aggregateResults(results <-chan parallelResult,total int,processed *atomic.Int32,deleteFiles bool,progressHandler func(int, int, string),) aggregateStats {var stats aggregateStatsstats.speciesCount = make(map[string]int)for result := range results {if err := result.getError(); err != nil && stats.firstErr == nil {stats.firstErr = err}if result.wasWritten() {stats.dataFilesWritten++}if result.wasSkipped() {stats.dataFilesSkipped++}for _, call := range result.getCalls() {stats.calls = append(stats.calls, call)stats.speciesCount[call.EbirdCode]++}stats.filesProcessed++stats.maybeDeleteFile(deleteFiles, result)if progressHandler != nil {current := int(processed.Add(1))progressHandler(current, total, filepath.Base(result.filePath()))}}return stats}// maybeDeleteFile deletes the source file if requested and it was successfully processed.func (s *aggregateStats) maybeDeleteFile(deleteFiles bool, result parallelResult) {if !deleteFiles || !result.wasWritten() {return}if err := os.Remove(result.filePath()); err != nil {if s.firstErr == nil {s.firstErr = fmt.Errorf("failed to delete %s: %w", result.filePath(), err)}} else {s.filesDeleted++}}// sortCallsByFileAndTime sorts calls by filename, then start time.func sortCallsByFileAndTime(calls []ClusteredCall) {sort.Slice(calls, func(i, j int) bool {if calls[i].File != calls[j].File {return calls[i].File < calls[j].File}return calls[i].StartTime < calls[j].StartTime})}
}// validateClusterCyclicPattern validates the cyclic recording pattern if provided.func validateClusterCyclicPattern(database *sql.DB, input ClusterInput) error {if input.CyclicRecordingPatternID == nil {return nil}trimmed := strings.TrimSpace(*input.CyclicRecordingPatternID)if trimmed == "" {return nil}return validateCyclicPattern(database, trimmed)
func updateCluster(ctx context.Context, input ClusterInput) (ClusterOutput, error) {var output ClusterOutput
// validateClusterUpdateInput validates cluster ID, fields, and cyclic pattern for update.func validateClusterUpdateInput(input ClusterInput) (string, error) {
if input.CyclicRecordingPatternID != nil && strings.TrimSpace(*input.CyclicRecordingPatternID) != "" {if err := utils.ValidateShortID(*input.CyclicRecordingPatternID, "cyclic_recording_pattern_id"); err != nil {return output, err
if input.CyclicRecordingPatternID != nil {trimmed := strings.TrimSpace(*input.CyclicRecordingPatternID)if trimmed != "" {if err := utils.ValidateShortID(trimmed, "cyclic_recording_pattern_id"); err != nil {return "", err}
if input.CyclicRecordingPatternID != nil {trimmedPatternID := strings.TrimSpace(*input.CyclicRecordingPatternID)if trimmedPatternID != "" {if err := validateCyclicPattern(database, trimmedPatternID); err != nil {return output, err}}
if err := validateClusterCyclicPattern(database, input); err != nil {return output, err
func (r ravenResult) filePath() string { return r.ravenFile }func (r ravenResult) getCalls() []ClusteredCall { return r.calls }func (r ravenResult) wasWritten() bool { return r.written }func (r ravenResult) wasSkipped() bool { return r.skipped }func (r ravenResult) getError() error { return r.err }
speciesCount := make(map[string]int)var allCalls []ClusteredCalldataFilesWritten := 0dataFilesSkipped := 0filesProcessed := 0filesDeleted := 0var firstErr errorfor result := range results {if result.err != nil && firstErr == nil {firstErr = result.err}if result.written {dataFilesWritten++}if result.skipped {dataFilesSkipped++}for _, call := range result.calls {allCalls = append(allCalls, call)speciesCount[call.EbirdCode]++}filesProcessed++// Delete if requested and successfully processedif input.Delete && result.written {if err := os.Remove(result.ravenFile); err != nil {if firstErr == nil {firstErr = fmt.Errorf("failed to delete %s: %w", result.ravenFile, err)}} else {filesDeleted++}}
stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)
if input.ProgressHandler != nil {current := int(processed.Add(1))input.ProgressHandler(current, total, filepath.Base(result.ravenFile))}}if firstErr != nil {errMsg := firstErr.Error()
if stats.firstErr != nil {errMsg := stats.firstErr.Error()
// Sort all calls by file, then start timesort.Slice(allCalls, func(i, j int) bool {if allCalls[i].File != allCalls[j].File {return allCalls[i].File < allCalls[j].File}return allCalls[i].StartTime < allCalls[j].StartTime})
sortCallsByFileAndTime(stats.calls)
output.Calls = allCallsoutput.TotalCalls = len(allCalls)output.SpeciesCount = speciesCountoutput.DataFilesWritten = dataFilesWrittenoutput.DataFilesSkipped = dataFilesSkippedoutput.FilesProcessed = filesProcessedoutput.FilesDeleted = filesDeleted
output.Calls = stats.callsoutput.TotalCalls = len(stats.calls)output.SpeciesCount = stats.speciesCountoutput.DataFilesWritten = stats.dataFilesWrittenoutput.DataFilesSkipped = stats.dataFilesSkippedoutput.FilesProcessed = stats.filesProcessedoutput.FilesDeleted = stats.filesDeleted
func (r birdaResult) filePath() string { return r.birdaFile }func (r birdaResult) getCalls() []ClusteredCall { return r.calls }func (r birdaResult) wasWritten() bool { return r.written }func (r birdaResult) wasSkipped() bool { return r.skipped }func (r birdaResult) getError() error { return r.err }
speciesCount := make(map[string]int)var allCalls []ClusteredCalldataFilesWritten := 0dataFilesSkipped := 0filesProcessed := 0filesDeleted := 0var firstErr errorfor result := range results {if result.err != nil && firstErr == nil {firstErr = result.err}if result.written {dataFilesWritten++}if result.skipped {dataFilesSkipped++}for _, call := range result.calls {allCalls = append(allCalls, call)speciesCount[call.EbirdCode]++}filesProcessed++// Delete if requested and successfully processedif input.Delete && result.written {if err := os.Remove(result.birdaFile); err != nil {if firstErr == nil {firstErr = fmt.Errorf("failed to delete %s: %w", result.birdaFile, err)}} else {filesDeleted++}}
stats := aggregateResults(results, total, &processed, input.Delete, input.ProgressHandler)
if input.ProgressHandler != nil {current := int(processed.Add(1))input.ProgressHandler(current, total, filepath.Base(result.birdaFile))}}if firstErr != nil {errMsg := firstErr.Error()
if stats.firstErr != nil {errMsg := stats.firstErr.Error()
// Sort all calls by file, then start timesort.Slice(allCalls, func(i, j int) bool {if allCalls[i].File != allCalls[j].File {return allCalls[i].File < allCalls[j].File}return allCalls[i].StartTime < allCalls[j].StartTime})
sortCallsByFileAndTime(stats.calls)
output.Calls = allCallsoutput.TotalCalls = len(allCalls)output.SpeciesCount = speciesCountoutput.DataFilesWritten = dataFilesWrittenoutput.DataFilesSkipped = dataFilesSkippedoutput.FilesProcessed = filesProcessedoutput.FilesDeleted = filesDeleted
output.Calls = stats.callsoutput.TotalCalls = len(stats.calls)output.SpeciesCount = stats.speciesCountoutput.DataFilesWritten = stats.dataFilesWrittenoutput.DataFilesSkipped = stats.dataFilesSkippedoutput.FilesProcessed = stats.filesProcessedoutput.FilesDeleted = stats.filesDeleted
func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {folder := filepath.Clean(input.Folder)output := DetectAnomaliesOutput{Folder: folder,Models: input.Models,}
// validateAnomalyInput validates the input parameters for DetectAnomalies.func validateAnomalyInput(input DetectAnomaliesInput) error {
output.Error = fmt.Sprintf("not a directory: %s", input.Folder)return output, fmt.Errorf("%s", output.Error)
return fmt.Errorf("not a directory: %s", input.Folder)}return nil}func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {folder := filepath.Clean(input.Folder)output := DetectAnomaliesOutput{Folder: folder,Models: input.Models,
dataFiles, err := parseAndSortDataFiles(config)if err != nil {return nil, err}kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)if config.Sample > 0 && config.Sample < 100 {rng := rand.New(rand.NewSource(time.Now().UnixNano()))kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)}return buildClassifyState(config, kept, cachedSegs, timeFiltered)}// parseAndSortDataFiles finds, parses, and sorts .data files from the config.func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
// Compute filtered segments once, remove files with no matches
return dataFiles, nil}// filterDataFiles applies segment filters to each data file, returning kept files and their segments.func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
// Handle --goto: find file by basename and set initial positionif config.Goto != "" {found := falsefor i, df := range state.DataFiles {base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]if base == config.Goto {state.FileIdx = ifound = truebreak}}if !found {return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
if config.Goto == "" {return state, nil}for i, df := range state.DataFiles {base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]if base == config.Goto {state.FileIdx = ireturn state, nil
}// BulkFileImport imports WAV files across multiple locations using CSV specification// failOutput sets error details and processing time on the output before returning.func (o *BulkFileImportOutput) failOutput(errs []string, startTime time.Time) {o.Errors = errso.ProcessingTime = time.Since(startTime).String()
readDB, err := db.OpenReadOnlyDB(dbPath)if err != nil {logger.Log("ERROR: Failed to open database: %v", err)output.Errors = []string{fmt.Sprintf("failed to open database: %v", err)}output.ProcessingTime = time.Since(startTime).String()return output, fmt.Errorf("failed to open database: %w", err)}locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, input.DatasetID)readDB.Close()if len(locationErrors) > 0 {for _, locErr := range locationErrors {logger.Log("ERROR: %s", locErr)}output.Errors = locationErrorsoutput.ProcessingTime = time.Since(startTime).String()return output, fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), input.DatasetID)
if err := bulkValidateLocations(logger, locations, input.DatasetID); err != nil {output.failOutput([]string{err.Error()}, startTime)return output, err
for i, loc := range locations {logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)// Check if cluster already existsvar existingClusterID stringerr := database.QueryRow(`SELECT id FROM clusterWHERE location_id = ? AND name = ? AND active = true`, loc.LocationID, loc.DateRange).Scan(&existingClusterID)
var clusterID stringif err == sql.ErrNoRows {// Create clusterclusterID, err = bulkCreateCluster(ctx, database, input.DatasetID, loc.LocationID, loc.DateRange, loc.SampleRate)if err != nil {errMsg := fmt.Sprintf("Failed to create cluster for location %s: %v", loc.LocationName, err)logger.Log("ERROR: %s", errMsg)output.Errors = append(output.Errors, errMsg)output.ProcessingTime = time.Since(startTime).String()return output, fmt.Errorf("failed to create cluster: %w", err)}logger.Log(" Created cluster: %s", clusterID)output.ClustersCreated++} else if err != nil {errMsg := fmt.Sprintf("Failed to check cluster for location %s: %v", loc.LocationName, err)logger.Log("ERROR: %s", errMsg)output.Errors = append(output.Errors, errMsg)output.ProcessingTime = time.Since(startTime).String()return output, fmt.Errorf("failed to check cluster: %w", err)} else {clusterID = existingClusterIDlogger.Log(" Using existing cluster: %s", clusterID)output.ClustersExisting++}compositeKey := loc.LocationID + "|" + loc.DateRangeclusterIDMap[compositeKey] = clusterID
clusterIDMap, created, existing, err := bulkCreateClusters(ctx, database, logger, locations, input.DatasetID)if err != nil {output.failOutput(output.Errors, startTime)return output, err
totalImported := 0totalDuplicates := 0totalErrors := 0totalScanned := 0for i, loc := range locations {compositeKey := loc.LocationID + "|" + loc.DateRangeclusterID, ok := clusterIDMap[compositeKey]if !ok {continue // Should not happen, but safety check}logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)logger.Log(" Directory: %s", loc.DirectoryPath)// Check if directory existsif _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {logger.Log(" WARNING: Directory not found, skipping")continue}// Import filesstats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, input.DatasetID, loc.LocationID, clusterID)if err != nil {errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)logger.Log("ERROR: %s", errMsg)output.Errors = append(output.Errors, errMsg)output.TotalFilesScanned = totalScannedoutput.FilesImported = totalImportedoutput.FilesDuplicate = totalDuplicatesoutput.FilesError = totalErrorsoutput.ProcessingTime = time.Since(startTime).String()return output, fmt.Errorf("failed to import files: %w", err)}logger.Log(" Scanned: %d files", stats.TotalFiles)logger.Log(" Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)if stats.ErrorFiles > 0 {logger.Log(" Errors: %d files", stats.ErrorFiles)}
fileStats, errs := bulkImportAllFiles(database, logger, locations, clusterIDMap, input.DatasetID)output.TotalFilesScanned = fileStats.TotalFilesoutput.FilesImported = fileStats.ImportedFilesoutput.FilesDuplicate = fileStats.DuplicateFilesoutput.FilesError = fileStats.ErrorFilesoutput.Errors = append(output.Errors, errs...)
totalScanned += stats.TotalFilestotalImported += stats.ImportedFilestotalDuplicates += stats.DuplicateFilestotalErrors += stats.ErrorFiles
if len(errs) > 0 {output.ProcessingTime = time.Since(startTime).String()return output, fmt.Errorf("failed to import files: %s", errs[0])
logger.Log("Total files scanned: %d", totalScanned)logger.Log("Files imported: %d", totalImported)logger.Log("Duplicates skipped: %d", totalDuplicates)logger.Log("Errors: %d", totalErrors)
logger.Log("Total files scanned: %d", fileStats.TotalFiles)logger.Log("Files imported: %d", fileStats.ImportedFiles)logger.Log("Duplicates skipped: %d", fileStats.DuplicateFiles)logger.Log("Errors: %d", fileStats.ErrorFiles)
}// bulkValidateLocations validates that all location_ids in the CSV belong to the dataset.// Returns an error if validation fails.func bulkValidateLocations(logger *progressLogger, locations []bulkLocationData, datasetID string) error {readDB, err := db.OpenReadOnlyDB(dbPath)if err != nil {logger.Log("ERROR: Failed to open database: %v", err)return fmt.Errorf("failed to open database: %w", err)}locationErrors := bulkValidateLocationsBelongToDataset(readDB, locations, datasetID)readDB.Close()if len(locationErrors) > 0 {for _, locErr := range locationErrors {logger.Log("ERROR: %s", locErr)}return fmt.Errorf("location validation failed: %d location(s) do not belong to dataset %s", len(locationErrors), datasetID)}return nil
// bulkCreateClusters creates or validates clusters for all locations.// Returns the cluster ID map, counts of created/existing clusters, and any error.func bulkCreateClusters(ctx context.Context, database *sql.DB, logger *progressLogger, locations []bulkLocationData, datasetID string) (map[string]string, int, int, error) {clusterIDMap := make(map[string]string)created := 0existing := 0for i, loc := range locations {logger.Log("[%d/%d] Processing location: %s", i+1, len(locations), loc.LocationName)var existingClusterID stringerr := database.QueryRow(`SELECT id FROM clusterWHERE location_id = ? AND name = ? AND active = true`, loc.LocationID, loc.DateRange).Scan(&existingClusterID)
// bulkReadCSV reads and parses the CSV file
var clusterID stringif err == sql.ErrNoRows {clusterID, err = bulkCreateCluster(ctx, database, datasetID, loc.LocationID, loc.DateRange, loc.SampleRate)if err != nil {logger.Log("ERROR: Failed to create cluster for location %s: %v", loc.LocationName, err)return nil, 0, 0, fmt.Errorf("failed to create cluster: %w", err)}logger.Log(" Created cluster: %s", clusterID)created++} else if err != nil {logger.Log("ERROR: Failed to check cluster for location %s: %v", loc.LocationName, err)return nil, 0, 0, fmt.Errorf("failed to check cluster: %w", err)} else {clusterID = existingClusterIDlogger.Log(" Using existing cluster: %s", clusterID)existing++}compositeKey := loc.LocationID + "|" + loc.DateRangeclusterIDMap[compositeKey] = clusterID}return clusterIDMap, created, existing, nil}// bulkImportAllFiles imports files for all locations using the cluster ID map.// Returns aggregate stats and any error messages.func bulkImportAllFiles(database *sql.DB, logger *progressLogger, locations []bulkLocationData, clusterIDMap map[string]string, datasetID string) (bulkImportStats, []string) {var total bulkImportStatsvar errs []stringfor i, loc := range locations {compositeKey := loc.LocationID + "|" + loc.DateRangeclusterID, ok := clusterIDMap[compositeKey]if !ok {continue}logger.Log("[%d/%d] Importing files for: %s", i+1, len(locations), loc.LocationName)logger.Log(" Directory: %s", loc.DirectoryPath)if _, err := os.Stat(loc.DirectoryPath); os.IsNotExist(err) {logger.Log(" WARNING: Directory not found, skipping")continue}stats, err := bulkImportFilesForCluster(database, logger, loc.DirectoryPath, datasetID, loc.LocationID, clusterID)if err != nil {errMsg := fmt.Sprintf("Failed to import files for location %s: %v", loc.LocationName, err)logger.Log("ERROR: %s", errMsg)return total, []string{errMsg}}logger.Log(" Scanned: %d files", stats.TotalFiles)logger.Log(" Imported: %d, Duplicates: %d", stats.ImportedFiles, stats.DuplicateFiles)if stats.ErrorFiles > 0 {logger.Log(" Errors: %d files", stats.ErrorFiles)}total.TotalFiles += stats.TotalFilestotal.ImportedFiles += stats.ImportedFilestotal.DuplicateFiles += stats.DuplicateFilestotal.ErrorFiles += stats.ErrorFiles}return total, errs}
// Parse optional floats
tools.SetDBPath(*dbPath)defer initEventLog(*dbPath)()input := parseLocationUpdateInput(fs, dbPath, id, name, lat, lon, tz, description)output, err := tools.CreateOrUpdateLocation(context.Background(), input)if err != nil {fmt.Fprintf(os.Stderr, "Error: %v\n", err)os.Exit(1)}printJSON(output)}// parseLocationUpdateInput builds a LocationInput from parsed flags, handling optional float parsing.func parseLocationUpdateInput(fs *flag.FlagSet, dbPath, id, name, lat, lon, tz, description *string) tools.LocationInput {
## [2026-05-05] Reduce cyclomatic complexity across codebase, add cyclop linter gateAdded `cyclop` linter to `.golangci.yml` with `max-complexity: 15` (CI gate)and `package-average: 8.0`. Test files, `main()`, and `RunCalls()` dispatchswitches are excluded (cyclop overcounts trivial switch dispatches).Refactored 11 functions from 15-19 complexity down to ≤10:- **parseClipArgs** (19→~3): Replaced manual switch/case arg parser with`flag.FlagSet`, consistent with all other `cmd/` functions.- **callsFromBirdaParallel** (17→~8): Extracted `aggregateResults()` and`sortCallsByFileAndTime()` into `tools/parallel_aggregate.go`.- **callsFromRavenParallel** (17→~8): Same pattern, shared via `parallelResult`interface on both `birdaResult` and `ravenResult`.- **BulkFileImport** (17→~8): Extracted `bulkCreateClusters()`,`bulkImportAllFiles()`, `bulkValidateLocations()`, and `failOutput()` helper.- **saveClip** (16→~7): Extracted `buildClipPaths()`, `generateClipSpectrogram()`,and `writeClipPNG()`.- **RunLocationUpdate** (15→~8): Extracted `parseLocationUpdateInput()`.- **DetectAnomalies** (15→~8): Extracted `validateAnomalyInput()`.- **LoadDataFiles** (15→~6): Extracted `parseAndSortDataFiles()`,`filterDataFiles()`, and `buildClassifyState()`.- **updateCluster** (17→~10): Extracted `validateClusterUpdateInput()` and`validateClusterCyclicPattern()`.- **IsNight** (18→~10): Extracted `populateSunTimes()`.- **createPattern/updatePattern** (16-18→~10): Extracted`validateCreatePatternInput()`, `validateUpdatePatternInput()`, and`findExistingPattern()`.- **SegmentMatchesFilters** (16→~6): Extracted `labelMatchesFilters()`.Also removed the now-unnecessary `clipArgParser` type and its three methods.
- cyclop# Dispatch switches: cyclomatic complexity overcounts these — each case# is trivially independent, not a branching hazard.- path: main\.golinters:- cyclop- path: cmd/calls\.golinters:- cyclop