2P27XV3DGJCRA4SNJENCJYZLPR2XWZMTY7CGYYSJOY4UMDVVO25AC mappedSpeciesSet, mappedCalltypes := collectMappedLabels(mapping, dataCalltypes)// Validate species exist in DBif err := validateMappedSpecies(queryer, mappedSpeciesSet, &result); err != nil {return result, err}// Validate calltypes exist in DBif err := validateMappedCalltypes(queryer, mappedCalltypes, &result); err != nil {return result, err}return result, nil}// collectMappedLabels builds sets of mapped species and calltype labelsfunc collectMappedLabels(mapping MappingFile, dataCalltypes map[string]map[string]bool) (map[string]bool, map[string]map[string]string) {
// Validate species exist in DB
return mappedSpeciesSet, mappedCalltypes}// validateMappedSpecies checks that all mapped species exist in the databasefunc validateMappedSpecies(queryer DB, mappedSpeciesSet map[string]bool, result *MappingValidationResult) error {
if len(speciesLabels) > 0 {query := `SELECT label FROM species WHERE label IN (` + db.Placeholders(len(speciesLabels)) + `) AND active = true`args := make([]any, len(speciesLabels))for i, s := range speciesLabels {args[i] = s}
query := `SELECT label FROM species WHERE label IN (` + db.Placeholders(len(speciesLabels)) + `) AND active = true`args := make([]any, len(speciesLabels))for i, s := range speciesLabels {args[i] = s}
rows, err := queryer.Query(query, args...)if err != nil {return result, fmt.Errorf("failed to query species: %w", err)}defer rows.Close()
rows, err := queryer.Query(query, args...)if err != nil {return fmt.Errorf("failed to query species: %w", err)}defer rows.Close()
foundSpecies := make(map[string]bool)for rows.Next() {var label stringif err := rows.Scan(&label); err == nil {foundSpecies[label] = true}
foundSpecies := make(map[string]bool)for rows.Next() {var label stringif err := rows.Scan(&label); err == nil {foundSpecies[label] = true
if input.DatasetID == nil || strings.TrimSpace(*input.DatasetID) == "" {return output, fmt.Errorf("dataset_id is required when creating a cluster")}if input.LocationID == nil || strings.TrimSpace(*input.LocationID) == "" {return output, fmt.Errorf("location_id is required when creating a cluster")}if input.Name == nil || strings.TrimSpace(*input.Name) == "" {return output, fmt.Errorf("name is required when creating a cluster")}if input.SampleRate == nil {return output, fmt.Errorf("sample_rate is required when creating a cluster")}// Validate ID formatsif err := utils.ValidateShortID(*input.DatasetID, "dataset_id"); err != nil {
if err := validateCreateClusterFields(input); err != nil {
var datasetExists, datasetActive boolvar datasetName stringerr = tx.QueryRowContext(ctx,"SELECT EXISTS(SELECT 1 FROM dataset WHERE id = ?), COALESCE((SELECT active FROM dataset WHERE id = ?), false), COALESCE((SELECT name FROM dataset WHERE id = ?), '')",*input.DatasetID, *input.DatasetID, *input.DatasetID,).Scan(&datasetExists, &datasetActive, &datasetName)
datasetName, err := verifyDatasetForCluster(ctx, tx, *input.DatasetID)
return output, fmt.Errorf("failed to verify dataset: %w", err)}if !datasetExists {return output, fmt.Errorf("dataset with ID '%s' does not exist", *input.DatasetID)}if !datasetActive {return output, fmt.Errorf("dataset '%s' (ID: %s) is not active", datasetName, *input.DatasetID)
return output, err
var locationExists, locationActive boolvar locationName stringvar locationDatasetID stringerr = tx.QueryRowContext(ctx,"SELECT EXISTS(SELECT 1 FROM location WHERE id = ?), COALESCE((SELECT active FROM location WHERE id = ?), false), COALESCE((SELECT name FROM location WHERE id = ?), ''), COALESCE((SELECT dataset_id FROM location WHERE id = ?), '')",*input.LocationID, *input.LocationID, *input.LocationID, *input.LocationID,).Scan(&locationExists, &locationActive, &locationName, &locationDatasetID)
locationName, err := verifyLocationForCluster(ctx, tx, *input.LocationID, *input.DatasetID, datasetName)
if !locationExists {return output, fmt.Errorf("location with ID '%s' does not exist", *input.LocationID)}if !locationActive {return output, fmt.Errorf("location '%s' (ID: %s) is not active", locationName, *input.LocationID)}if locationDatasetID != *input.DatasetID {return output, fmt.Errorf("location '%s' (ID: %s) does not belong to dataset '%s' (ID: %s) - it belongs to dataset ID '%s'",locationName, *input.LocationID, datasetName, *input.DatasetID, locationDatasetID)}
var patternExists, patternActive boolerr = tx.QueryRowContext(ctx,"SELECT EXISTS(SELECT 1 FROM cyclic_recording_pattern WHERE id = ?), COALESCE((SELECT active FROM cyclic_recording_pattern WHERE id = ?), false)",*input.CyclicRecordingPatternID, *input.CyclicRecordingPatternID,).Scan(&patternExists, &patternActive)if err != nil {return output, fmt.Errorf("failed to verify cyclic recording pattern: %w", err)
if err := verifyPatternExists(ctx, tx, *input.CyclicRecordingPatternID); err != nil {return output, err
if !patternExists {return output, fmt.Errorf("cyclic recording pattern with ID '%s' does not exist", *input.CyclicRecordingPatternID)}if !patternActive {return output, fmt.Errorf("cyclic recording pattern with ID '%s' is not active", *input.CyclicRecordingPatternID)}
var existingID stringerr = tx.QueryRowContext(ctx,"SELECT id FROM cluster WHERE location_id = ? AND name = ? AND active = true",*input.LocationID, *input.Name,).Scan(&existingID)
existing, err := findExistingClusterInLocation(ctx, tx, *input.LocationID, *input.Name)
// Cluster with this name already exists in location - return existing (consistent duplicate handling)var cluster db.Clustererr = tx.QueryRowContext(ctx,"SELECT id, dataset_id, location_id, name, description, created_at, last_modified, active, cyclic_recording_pattern_id, sample_rate FROM cluster WHERE id = ?",existingID,).Scan(&cluster.ID, &cluster.DatasetID, &cluster.LocationID, &cluster.Name, &cluster.Description,&cluster.CreatedAt, &cluster.LastModified, &cluster.Active, &cluster.CyclicRecordingPatternID, &cluster.SampleRate)if err != nil {return output, fmt.Errorf("failed to fetch existing cluster: %w", err)}
// Cluster with this name already exists - return existing
output.Cluster = clusteroutput.Message = fmt.Sprintf("Cluster '%s' already exists in location '%s' (ID: %s) - returning existing cluster", cluster.Name, locationName, cluster.ID)
output.Cluster = existingoutput.Message = fmt.Sprintf("Cluster '%s' already exists in location '%s' (ID: %s) - returning existing cluster", existing.Name, locationName, existing.ID)
var cluster db.Clustererr = tx.QueryRowContext(ctx,"SELECT id, dataset_id, location_id, name, description, created_at, last_modified, active, cyclic_recording_pattern_id, sample_rate FROM cluster WHERE id = ?",id,).Scan(&cluster.ID, &cluster.DatasetID, &cluster.LocationID, &cluster.Name, &cluster.Description,&cluster.CreatedAt, &cluster.LastModified, &cluster.Active, &cluster.CyclicRecordingPatternID, &cluster.SampleRate)
cluster, err := fetchClusterByID(ctx, tx, id)
// validateCreateClusterFields validates required fields for creating a clusterfunc validateCreateClusterFields(input ClusterInput) error {if input.DatasetID == nil || strings.TrimSpace(*input.DatasetID) == "" {return fmt.Errorf("dataset_id is required when creating a cluster")}if input.LocationID == nil || strings.TrimSpace(*input.LocationID) == "" {return fmt.Errorf("location_id is required when creating a cluster")}if input.Name == nil || strings.TrimSpace(*input.Name) == "" {return fmt.Errorf("name is required when creating a cluster")}if input.SampleRate == nil {return fmt.Errorf("sample_rate is required when creating a cluster")}return nil}// validateCreateClusterIDs validates ID format fieldsfunc validateCreateClusterIDs(input ClusterInput) error {if err := utils.ValidateShortID(*input.DatasetID, "dataset_id"); err != nil {return err}return utils.ValidateShortID(*input.LocationID, "location_id")}// verifyDatasetForCluster verifies dataset exists and is active within a transactionfunc verifyDatasetForCluster(ctx context.Context, tx *db.LoggedTx, datasetID string) (string, error) {var exists, active boolvar name stringerr := tx.QueryRowContext(ctx,"SELECT EXISTS(SELECT 1 FROM dataset WHERE id = ?), COALESCE((SELECT active FROM dataset WHERE id = ?), false), COALESCE((SELECT name FROM dataset WHERE id = ?), '')",datasetID, datasetID, datasetID,).Scan(&exists, &active, &name)if err != nil {return "", fmt.Errorf("failed to verify dataset: %w", err)}if !exists {return "", fmt.Errorf("dataset with ID '%s' does not exist", datasetID)}if !active {return "", fmt.Errorf("dataset '%s' (ID: %s) is not active", name, datasetID)}return name, nil}// verifyLocationForCluster verifies location exists, is active, and belongs to the datasetfunc verifyLocationForCluster(ctx context.Context, tx *db.LoggedTx, locationID, datasetID, datasetName string) (string, error) {var exists, active boolvar name, locDatasetID stringerr := tx.QueryRowContext(ctx,"SELECT EXISTS(SELECT 1 FROM location WHERE id = ?), COALESCE((SELECT active FROM location WHERE id = ?), false), COALESCE((SELECT name FROM location WHERE id = ?), ''), COALESCE((SELECT dataset_id FROM location WHERE id = ?), '')",locationID, locationID, locationID, locationID,).Scan(&exists, &active, &name, &locDatasetID)if err != nil {return "", fmt.Errorf("failed to verify location: %w", err)}if !exists {return "", fmt.Errorf("location with ID '%s' does not exist", locationID)}if !active {return "", fmt.Errorf("location '%s' (ID: %s) is not active", name, locationID)}if locDatasetID != datasetID {return "", fmt.Errorf("location '%s' (ID: %s) does not belong to dataset '%s' (ID: %s) - it belongs to dataset ID '%s'",name, locationID, datasetName, datasetID, locDatasetID)}return name, nil}// verifyPatternExists verifies a cyclic recording pattern exists and is activefunc verifyPatternExists(ctx context.Context, tx *db.LoggedTx, patternID string) error {var exists, active boolerr := tx.QueryRowContext(ctx,"SELECT EXISTS(SELECT 1 FROM cyclic_recording_pattern WHERE id = ?), COALESCE((SELECT active FROM cyclic_recording_pattern WHERE id = ?), false)",patternID, patternID,).Scan(&exists, &active)if err != nil {return fmt.Errorf("failed to verify cyclic recording pattern: %w", err)}if !exists {return fmt.Errorf("cyclic recording pattern with ID '%s' does not exist", patternID)}if !active {return fmt.Errorf("cyclic recording pattern with ID '%s' is not active", patternID)}return nil}// findExistingClusterInLocation checks for an existing cluster with the same name in a locationfunc findExistingClusterInLocation(ctx context.Context, tx *db.LoggedTx, locationID, name string) (db.Cluster, error) {var existingID stringerr := tx.QueryRowContext(ctx,"SELECT id FROM cluster WHERE location_id = ? AND name = ? AND active = true",locationID, name,).Scan(&existingID)if err != nil {return db.Cluster{}, err}return fetchClusterByID(ctx, tx, existingID)}// fetchClusterByID fetches a cluster row by IDfunc fetchClusterByID(ctx context.Context, tx *db.LoggedTx, id string) (db.Cluster, error) {var c db.Clustererr := tx.QueryRowContext(ctx,"SELECT id, dataset_id, location_id, name, description, created_at, last_modified, active, cyclic_recording_pattern_id, sample_rate FROM cluster WHERE id = ?",id,).Scan(&c.ID, &c.DatasetID, &c.LocationID, &c.Name, &c.Description,&c.CreatedAt, &c.LastModified, &c.Active, &c.CyclicRecordingPatternID, &c.SampleRate)return c, err}
// Process each file
// Count segments for totalif input.Brief {for _, fs := range output.Filters {output.TotalSegments += fs.Segments}} else {output.TotalSegments = len(output.Segments)}finaliseSummary(&output, operatorSet, reviewerSet, input.Brief)return output, nil}// summariseFiles processes all data files, populating output statsfunc summariseFiles(filePaths []string, input CallsSummariseInput, output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool) {
// Filter labels if --filter is specifiedvar filteredLabels []*utils.Labelfor _, l := range seg.Labels {if input.Filter == "" || l.Filter == input.Filter {filteredLabels = append(filteredLabels, l)}}// Skip segments with no matching labels when filter is active
filteredLabels := filterLabels(seg.Labels, input.Filter)
for _, l := range filteredLabels {labelSummary := LabelSummary{Filter: l.Filter,Certainty: l.Certainty,Species: l.Species,}if l.CallType != "" {labelSummary.CallType = l.CallType}if l.Comment != "" {labelSummary.Comment = l.Comment}if l.Bookmark {labelSummary.Bookmark = true}labels = append(labels, labelSummary)}
output.Segments = append(output.Segments, SegmentSummary{File: relPath,StartTime: seg.StartTime,EndTime: seg.EndTime,Labels: buildLabelSummaries(filteredLabels),})
// Update filter stats and review status (using filtered labels)for _, l := range filteredLabels {// Update filter statsfs, exists := output.Filters[l.Filter]if !exists {fs = FilterStats{Segments: 0,Species: make(map[string]int),Calltypes: make(map[string]map[string]int),}}fs.Segments++fs.Species[l.Species]++
// trackMeta records operator and reviewer from file metadatafunc trackMeta(meta *utils.DataMeta, operatorSet, reviewerSet map[string]bool) {if meta == nil {return}if meta.Operator != "" {operatorSet[meta.Operator] = true}if meta.Reviewer != "" {reviewerSet[meta.Reviewer] = true}}
// Track calltypes if presentif l.CallType != "" {if fs.Calltypes[l.Species] == nil {fs.Calltypes[l.Species] = make(map[string]int)}fs.Calltypes[l.Species][l.CallType]++}output.Filters[l.Filter] = fs
// filterLabels returns labels matching the filter, or all labels if filter is emptyfunc filterLabels(labels []*utils.Label, filter string) []*utils.Label {if filter == "" {return labels}var filtered []*utils.Labelfor _, l := range labels {if l.Filter == filter {filtered = append(filtered, l)}}return filtered}
// Update review statusswitch l.Certainty {case 100:output.ReviewStatus.Confirmed++case 0:output.ReviewStatus.DontKnow++default:output.ReviewStatus.Unreviewed++}
// buildLabelSummaries converts labels to label summariesfunc buildLabelSummaries(labels []*utils.Label) []LabelSummary {var summaries []LabelSummaryfor _, l := range labels {ls := LabelSummary{Filter: l.Filter,Certainty: l.Certainty,Species: l.Species,}if l.CallType != "" {ls.CallType = l.CallType}if l.Comment != "" {ls.Comment = l.Comment}if l.Bookmark {ls.Bookmark = true}summaries = append(summaries, ls)}return summaries}
if l.CallType != "" {output.ReviewStatus.WithCallType++}if l.Comment != "" {output.ReviewStatus.WithComments++}if l.Bookmark {output.ReviewStatus.Bookmarked++}}
// updateStatsFromLabels updates filter stats and review status from a set of labelsfunc updateStatsFromLabels(labels []*utils.Label, output *CallsSummariseOutput) {for _, l := range labels {updateFilterStats(l, output)updateReviewStatus(l, output)}}
// Create segment summary only if not briefif !input.Brief {segSummary := SegmentSummary{File: relPath,StartTime: seg.StartTime,EndTime: seg.EndTime,Labels: labels,}output.Segments = append(output.Segments, segSummary)}
// updateFilterStats increments filter-level statistics for a single labelfunc updateFilterStats(l *utils.Label, output *CallsSummariseOutput) {fs, exists := output.Filters[l.Filter]if !exists {fs = FilterStats{Segments: 0,Species: make(map[string]int),Calltypes: make(map[string]map[string]int),
// Count segments for totalif input.Brief {// Recount from filter stats since we didn't track segmentsfor _, fs := range output.Filters {output.TotalSegments += fs.Segments
if l.CallType != "" {if fs.Calltypes[l.Species] == nil {fs.Calltypes[l.Species] = make(map[string]int)
} else {output.TotalSegments = len(output.Segments)
fs.Calltypes[l.Species][l.CallType]++}output.Filters[l.Filter] = fs}// updateReviewStatus increments review status counters for a single labelfunc updateReviewStatus(l *utils.Label, output *CallsSummariseOutput) {switch l.Certainty {case 100:output.ReviewStatus.Confirmed++case 0:output.ReviewStatus.DontKnow++default:output.ReviewStatus.Unreviewed++}if l.CallType != "" {output.ReviewStatus.WithCallType++}if l.Comment != "" {output.ReviewStatus.WithComments++}if l.Bookmark {output.ReviewStatus.Bookmarked++
// Clean up empty calltypes maps (omitempty doesn't work on non-nil empty maps)
// finaliseSummary sorts output, cleans empty maps, and converts sets to sorted slicesfunc finaliseSummary(output *CallsSummariseOutput, operatorSet, reviewerSet map[string]bool, brief bool) {// Clean up empty calltypes maps
if input.File == "" {output.Error = "--file is required"
if err := validatePropagateInput(&output, input); err != nil {return output, err}df, err := utils.ParseDataFile(input.File)if err != nil {output.Error = fmt.Sprintf("parse %s: %v", input.File, err)
if input.FromFilter == "" {output.Error = "--from is required"return output, fmt.Errorf("%s", output.Error)
// Fast path: skip files that don't contain both filters at all.if !hasBothFilters(df, input.FromFilter, input.ToFilter) {output.FiltersMissing = truereturn output, nil
if input.ToFilter == "" {output.Error = "--to is required"return output, fmt.Errorf("%s", output.Error)
sources := collectPropagateSources(df, input.FromFilter, input.Species)propagateTargets(df, sources, input, &output)if output.Propagated > 0 {df.Meta.Reviewer = "Skraak"if err := df.Write(input.File); err != nil {output.Error = fmt.Sprintf("write %s: %v", input.File, err)return output, fmt.Errorf("%s", output.Error)}}return output, nil}// validatePropagateInput checks required fields and file existencefunc validatePropagateInput(output *CallsPropagateOutput, input CallsPropagateInput) error {checks := []struct {val stringmsg string}{{input.File, "--file is required"},{input.FromFilter, "--from is required"},{input.ToFilter, "--to is required"},{input.Species, "--species is required"},
df, err := utils.ParseDataFile(input.File)if err != nil {output.Error = fmt.Sprintf("parse %s: %v", input.File, err)return output, fmt.Errorf("%s", output.Error)}// Fast path: skip files that don't contain both filters at all.
// hasBothFilters checks whether the data file contains both from and to filtersfunc hasBothFilters(df *utils.DataFile, fromFilter, toFilter string) bool {
type sourceRef struct {seg *utils.Segmentlabel *utils.Label}
// sourceRef pairs a segment with its matching source labeltype sourceRef struct {seg *utils.Segmentlabel *utils.Label}// collectPropagateSources gathers verified source labels (certainty==100) for the given filter/speciesfunc collectPropagateSources(df *utils.DataFile, fromFilter, species string) []sourceRef {
choices := make([]PropagateSourceChoice, 0, len(overlaps))for _, s := range overlaps {choices = append(choices, PropagateSourceChoice{Start: s.seg.StartTime,End: s.seg.EndTime,Species: s.label.Species,CallType: s.label.CallType,})}output.Conflicts = append(output.Conflicts, PropagateConflict{TargetStart: tSeg.StartTime,TargetEnd: tSeg.EndTime,TargetCallType: toLabel.CallType,SourceChoices: choices,})
output.Conflicts = append(output.Conflicts, buildConflictRecord(tSeg, toLabel, overlaps))
change := PropagateChange{TargetStart: tSeg.StartTime,TargetEnd: tSeg.EndTime,PrevSpecies: toLabel.Species,PrevCallType: toLabel.CallType,PrevCertainty: toLabel.Certainty,NewSpecies: input.Species,NewCallType: agreedCallType,NewCertainty: 90,
applyPropagation(toLabel, input.Species, agreedCallType, tSeg, output)}}// findUpdatableTargetLabel finds a target label with certainty 70 or 0 for the given filterfunc findUpdatableTargetLabel(labels []*utils.Label, toFilter string) *utils.Label {for _, lbl := range labels {if lbl.Filter == toFilter && (lbl.Certainty == 70 || lbl.Certainty == 0) {return lbl}}return nil}// findOverlappingSources returns sources whose segments overlap with the target segmentfunc findOverlappingSources(sources []sourceRef, tSeg *utils.Segment) []sourceRef {var overlaps []sourceReffor _, s := range sources {if s.seg.StartTime < tSeg.EndTime && tSeg.StartTime < s.seg.EndTime {overlaps = append(overlaps, s)
toLabel.Species = input.SpeciestoLabel.CallType = agreedCallTypetoLabel.Certainty = 90changed = true
// resolveCallType checks if all overlapping sources agree on a call type.// Returns the agreed call type and whether there is a conflict.func resolveCallType(overlaps []sourceRef) (string, bool) {agreedCallType := overlaps[0].label.CallTypefor _, s := range overlaps[1:] {if s.label.CallType != agreedCallType {return "", true}}return agreedCallType, false}
output.Propagated++output.Changes = append(output.Changes, change)
// buildConflictRecord creates a PropagateConflict from overlapping disagreeing sourcesfunc buildConflictRecord(tSeg *utils.Segment, toLabel *utils.Label, overlaps []sourceRef) PropagateConflict {choices := make([]PropagateSourceChoice, 0, len(overlaps))for _, s := range overlaps {choices = append(choices, PropagateSourceChoice{Start: s.seg.StartTime,End: s.seg.EndTime,Species: s.label.Species,CallType: s.label.CallType,})}return PropagateConflict{TargetStart: tSeg.StartTime,TargetEnd: tSeg.EndTime,TargetCallType: toLabel.CallType,SourceChoices: choices,
if changed {df.Meta.Reviewer = "Skraak"if err := df.Write(input.File); err != nil {output.Error = fmt.Sprintf("write %s: %v", input.File, err)return output, fmt.Errorf("%s", output.Error)}
// applyPropagation updates the target label and records the changefunc applyPropagation(toLabel *utils.Label, species, callType string, tSeg *utils.Segment, output *CallsPropagateOutput) {change := PropagateChange{TargetStart: tSeg.StartTime,TargetEnd: tSeg.EndTime,PrevSpecies: toLabel.Species,PrevCallType: toLabel.CallType,PrevCertainty: toLabel.Certainty,NewSpecies: species,NewCallType: callType,NewCertainty: 90,
// processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookupfunc processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {// Open filefile, err := os.Open(ravenFile)if err != nil {return nil, false, false, fmt.Errorf("failed to open file: %w", err)}defer func() { _ = file.Close() }()// Read header and selections (tab-separated)scanner := bufio.NewScanner(file)
// Read header lineif !scanner.Scan() {return nil, false, false, fmt.Errorf("empty file")}header := strings.Split(scanner.Text(), "\t")// Find column indicesbeginTimeIdx := -1endTimeIdx := -1lowFreqIdx := -1highFreqIdx := -1speciesIdx := -1
// ravenColumnIndices holds the column index positions for a Raven filetype ravenColumnIndices struct {beginTimeIdx intendTimeIdx intlowFreqIdx inthighFreqIdx intspeciesIdx int}
if beginTimeIdx == -1 || endTimeIdx == -1 || speciesIdx == -1 {return nil, false, false, fmt.Errorf("missing required columns in Raven file")
if idx.beginTimeIdx == -1 || idx.endTimeIdx == -1 || idx.speciesIdx == -1 {return idx, fmt.Errorf("missing required columns in Raven file")
var sel RavenSelectionif _, err := fmt.Sscanf(fields[beginTimeIdx], "%f", &sel.StartTime); err != nil {return nil, false, false, fmt.Errorf("failed to parse begin time %q: %w", fields[beginTimeIdx], err)
sel, err := parseRavenRow(fields, idx)if err != nil {return nil, err
if _, err := fmt.Sscanf(fields[endTimeIdx], "%f", &sel.EndTime); err != nil {return nil, false, false, fmt.Errorf("failed to parse end time %q: %w", fields[endTimeIdx], err)}if lowFreqIdx >= 0 && lowFreqIdx < len(fields) {if _, err := fmt.Sscanf(fields[lowFreqIdx], "%f", &sel.FreqLow); err != nil {return nil, false, false, fmt.Errorf("failed to parse low freq %q: %w", fields[lowFreqIdx], err)}}if highFreqIdx >= 0 && highFreqIdx < len(fields) {if _, err := fmt.Sscanf(fields[highFreqIdx], "%f", &sel.FreqHigh); err != nil {return nil, false, false, fmt.Errorf("failed to parse high freq %q: %w", fields[highFreqIdx], err)}}sel.Species = fields[speciesIdx]
if len(selections) == 0 {return nil, false, true, nil // No selections, skip
// parseRavenRow parses a single tab-separated row into a RavenSelectionfunc parseRavenRow(fields []string, idx ravenColumnIndices) (RavenSelection, error) {var sel RavenSelectionif _, err := fmt.Sscanf(fields[idx.beginTimeIdx], "%f", &sel.StartTime); err != nil {return sel, fmt.Errorf("failed to parse begin time %q: %w", fields[idx.beginTimeIdx], err)}if _, err := fmt.Sscanf(fields[idx.endTimeIdx], "%f", &sel.EndTime); err != nil {return sel, fmt.Errorf("failed to parse end time %q: %w", fields[idx.endTimeIdx], err)}if idx.lowFreqIdx >= 0 && idx.lowFreqIdx < len(fields) {if _, err := fmt.Sscanf(fields[idx.lowFreqIdx], "%f", &sel.FreqLow); err != nil {return sel, fmt.Errorf("failed to parse low freq %q: %w", fields[idx.lowFreqIdx], err)}}if idx.highFreqIdx >= 0 && idx.highFreqIdx < len(fields) {if _, err := fmt.Sscanf(fields[idx.highFreqIdx], "%f", &sel.FreqHigh); err != nil {return sel, fmt.Errorf("failed to parse high freq %q: %w", fields[idx.highFreqIdx], err)}
}return nameWithoutSuffix}// processRavenFileCached processes a single Raven selection file using a DirCache for WAV lookupfunc processRavenFileCached(ravenFile string, cache *DirCache) ([]ClusteredCall, bool, bool, error) {file, err := os.Open(ravenFile)if err != nil {return nil, false, false, fmt.Errorf("failed to open file: %w", err)
// Find WAV file using DirCache (O(1) lookup instead of O(N) directory scan)var wavPath stringif cache != nil {wavPath = cache.FindWAV(nameWithoutSuffix)} else {wavPath = findWAVFile(filepath.Dir(ravenFile), nameWithoutSuffix)
if !scanner.Scan() {return nil, false, false, fmt.Errorf("empty file")}header := strings.Split(scanner.Text(), "\t")idx, err := parseRavenHeader(header)if err != nil {return nil, false, false, err}selections, err := parseRavenSelections(scanner, idx)if err != nil {return nil, false, false, err}if len(selections) == 0 {return nil, false, true, nil
return output, fmt.Errorf("%s", errMsg)
return output, err}output.ClipDuration = clipDurationgapMultiplier := CLUSTER_GAP_MULTIPLIERif input.GapMultiplier > 0 {gapMultiplier = input.GapMultiplier}minDetections := MIN_DETECTIONS_PER_CLUSTERif input.MinDetections >= 0 {minDetections = input.MinDetections}gapThreshold := float64(gapMultiplier) * clipDurationoutput.GapThreshold = gapThresholdallCalls, speciesCount := clusterDetections(detections, clipDuration, gapThreshold, minDetections)output.Calls = allCallsoutput.TotalCalls = len(allCalls)output.SpeciesCount = speciesCountif input.WriteDotData {dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)if err != nil {errMsg := fmt.Sprintf("Error writing .data files: %v", err)output.Error = &errMsgreturn output, fmt.Errorf("%s", errMsg)}output.DataFilesWritten = dataFilesWrittenoutput.DataFilesSkipped = dataFilesSkipped}return output, nil}// readPredCSV opens and reads a predictions CSV, returning column mappings, detections, and clip durationfunc readPredCSV(csvPath string) (predCSVColumns, map[predFileSpeciesKey][]float64, float64, error) {file, err := os.Open(csvPath)if err != nil {return predCSVColumns{}, nil, 0, fmt.Errorf("failed to open CSV file: %w", err)
errMsg := fmt.Sprintf("Failed to read CSV header: %v", err)output.Error = &errMsgreturn output, fmt.Errorf("%s", errMsg)
return predCSVColumns{}, nil, 0, fmt.Errorf("failed to read CSV header: %w", err)}cols, err := findPredCSVColumns(header)if err != nil {return predCSVColumns{}, nil, 0, err
// Find column indicesfileIdx := -1startTimeIdx := -1endTimeIdx := -1var ebirdCodes []stringvar ebirdIdx []int
detections, clipDuration, err := readPredCSVRows(reader, cols)if err != nil {return predCSVColumns{}, nil, 0, err}return cols, detections, clipDuration, nil}
// Columns to ignore (not ebird codes)ignoredColumns := map[string]bool{"NotKiwi": true,"0.0": true,
// predCSVColumns holds the column indices for a predictions CSVtype predCSVColumns struct {fileIdx intstartTimeIdx intendTimeIdx intebirdCodes []stringebirdIdx []int}// findPredCSVColumns parses the CSV header to find column indicesfunc findPredCSVColumns(header []string) (predCSVColumns, error) {cols := predCSVColumns{fileIdx: -1,startTimeIdx: -1,endTimeIdx: -1,
if fileIdx == -1 || startTimeIdx == -1 || endTimeIdx == -1 {errMsg := "CSV must have 'file', 'start_time', and 'end_time' columns"output.Error = &errMsgreturn output, fmt.Errorf("%s", errMsg)
if cols.fileIdx == -1 || cols.startTimeIdx == -1 || cols.endTimeIdx == -1 {return cols, fmt.Errorf("CSV must have 'file', 'start_time', and 'end_time' columns")
if len(ebirdCodes) == 0 {errMsg := "CSV must have at least one ebird code column"output.Error = &errMsgreturn output, fmt.Errorf("%s", errMsg)
if len(cols.ebirdCodes) == 0 {return cols, fmt.Errorf("CSV must have at least one ebird code column")
// Read all rows and organize by (file, ebird_code) -> start_times// Using maps for efficient groupingtype FileEbirdKey struct {File stringEbirdCode string}detections := make(map[FileEbirdKey][]float64)
// readPredCSVRows reads all CSV data rows and returns detections grouped by file+species, plus clip durationfunc readPredCSVRows(reader *csv.Reader, cols predCSVColumns) (map[predFileSpeciesKey][]float64, float64, error) {detections := make(map[predFileSpeciesKey][]float64)
if err != nil && err != io.EOF {errMsg := fmt.Sprintf("Failed to read first CSV row: %v", err)output.Error = &errMsgreturn output, fmt.Errorf("%s", errMsg)
if err == io.EOF {return detections, 0, nil}if err != nil {return nil, 0, fmt.Errorf("failed to read first CSV row: %w", err)
if err != io.EOF {startTime, _ := strconv.ParseFloat(record[startTimeIdx], 64)endTime, _ := strconv.ParseFloat(record[endTimeIdx], 64)clipDuration = endTime - startTimeoutput.ClipDuration = clipDuration
startTime, _ := strconv.ParseFloat(record[cols.startTimeIdx], 64)endTime, _ := strconv.ParseFloat(record[cols.endTimeIdx], 64)clipDuration = endTime - startTimeaddDetectionsFromRow(record, cols, startTime, detections)
// Process first rowfileName := record[fileIdx]for i, idx := range ebirdIdx {if record[idx] == "1" {key := FileEbirdKey{File: fileName, EbirdCode: ebirdCodes[i]}detections[key] = append(detections[key], startTime)}
for {record, err := reader.Read()if err == io.EOF {break}if err != nil {return nil, 0, fmt.Errorf("failed to read CSV row: %w", err)
// Read remaining rowsfor {record, err := reader.Read()if err == io.EOF {break}if err != nil {errMsg := fmt.Sprintf("Failed to read CSV row: %v", err)output.Error = &errMsgreturn output, fmt.Errorf("%s", errMsg)}
startTime, _ = strconv.ParseFloat(record[cols.startTimeIdx], 64)addDetectionsFromRow(record, cols, startTime, detections)}
for i, idx := range ebirdIdx {if record[idx] == "1" {key := FileEbirdKey{File: fileName, EbirdCode: ebirdCodes[i]}detections[key] = append(detections[key], startTime)}}
// addDetectionsFromRow adds positive detections from a single CSV rowfunc addDetectionsFromRow(record []string, cols predCSVColumns, startTime float64, detections map[predFileSpeciesKey][]float64) {fileName := record[cols.fileIdx]for i, idx := range cols.ebirdIdx {if record[idx] == "1" {key := predFileSpeciesKey{File: fileName, EbirdCode: cols.ebirdCodes[i]}detections[key] = append(detections[key], startTime)
// Cluster detections by (file, ebird_code)
// clusterDetections groups detections into clusters and produces sorted ClusteredCallsfunc clusterDetections(detections map[predFileSpeciesKey][]float64, clipDuration, gapThreshold float64, minDetections int) ([]ClusteredCall, map[string]int) {
// Write .data files if requestedif input.WriteDotData {dataFilesWritten, dataFilesSkipped, err := writeDotFiles(input.CSVPath, filter, allCalls, input.ProgressHandler)if err != nil {// Return error - this includes clobber protection and parse errorserrMsg := fmt.Sprintf("Error writing .data files: %v", err)output.Error = &errMsgreturn output, fmt.Errorf("%s", errMsg)}output.DataFilesWritten = dataFilesWrittenoutput.DataFilesSkipped = dataFilesSkipped}return output, nil
return allCalls, speciesCount
// runCallsPushCertainty promotes certainty=90 segments to certainty=100 for a filtered set.//// JSON output schema://// {// "segments_updated": int, // Number of segments promoted from 90→100// "files_updated": int, // Number of .data files modified// "time_filtered_count": int // Files skipped by --night/--day filter// }func runCallsPushCertainty(args []string) {var folder, file, filter, species, timezone stringvar night, day boolvar lat, lng float64var latSet, lngSet bool
// pushCertaintyFlags holds the parsed CLI flags for push-certaintytype pushCertaintyFlags struct {folder stringfile stringfilter stringspecies stringtimezone stringnight boolday boollat float64lng float64latSet boollngSet bool}
if i+1 >= len(args) {fmt.Fprintf(os.Stderr, "Error: --lat requires a value\n")os.Exit(1)}v, err := strconv.ParseFloat(args[i+1], 64)if err != nil {fmt.Fprintf(os.Stderr, "Error: --lat must be a number\n")os.Exit(1)}lat = vlatSet = truei += 2
f.lat = requireFloat(arg, args, &i)f.latSet = true
if i+1 >= len(args) {fmt.Fprintf(os.Stderr, "Error: --lng requires a value\n")os.Exit(1)}v, err := strconv.ParseFloat(args[i+1], 64)if err != nil {fmt.Fprintf(os.Stderr, "Error: --lng must be a number\n")os.Exit(1)}lng = vlngSet = truei += 2
f.lng = requireFloat(arg, args, &i)f.lngSet = true
if folder == "" && file == "" {
// requireValue returns the next argument after a flag, or exits with an errorfunc requireValue(flag string, args []string, i *int) string {if *i+1 >= len(args) {fmt.Fprintf(os.Stderr, "Error: %s requires a value\n", flag)os.Exit(1)}v := args[*i+1]*i += 2return v}// requireFloat parses the next argument as a float64, or exits with an errorfunc requireFloat(flag string, args []string, i *int) float64 {s := requireValue(flag, args, i)v, err := strconv.ParseFloat(s, 64)if err != nil {fmt.Fprintf(os.Stderr, "Error: %s must be a number\n", flag)os.Exit(1)}return v}// validatePushCertaintyFlags checks flag combinations and exits on errorfunc validatePushCertaintyFlags(f pushCertaintyFlags) {if f.folder == "" && f.file == "" {
// runCallsPushCertainty promotes certainty=90 segments to certainty=100 for a filtered set.//// JSON output schema://// {// "segments_updated": int, // Number of segments promoted from 90→100// "files_updated": int, // Number of .data files modified// "time_filtered_count": int // Files skipped by --night/--day filter// }func runCallsPushCertainty(args []string) {f := parsePushCertaintyArgs(args)validatePushCertaintyFlags(f)
}// nextUniqueValue returns the next argument after the flag, or exits if already set.func (p *clipArgParser) nextUniqueValue(flag, current string) string {if current != "" {fmt.Fprintf(os.Stderr, "Error: %s can only be specified once\n", flag)os.Exit(1)}return p.nextValue(flag)
// RunCallsClip handles the "calls clip" subcommand//// JSON output schema://// {// "files_processed": int, // .data files processed// "segments_clipped": int, // Segments that generated clips// "night_skipped": int, // Segments skipped (--night, omitted if 0)// "day_skipped": int, // Segments skipped (--day, omitted if 0)// "output_files": [string], // Paths to generated clip files (.wav/.png)// "errors": [string] // Error messages (omitted if empty)// }func RunCallsClip(args []string) {var file, folder, output, prefix, filter, species, timezone stringvar size, certainty intvar color, wavOnly, night, day boolvar lat, lng float64var latSet, lngSet bool
// Default to -1 (no certainty filter)certainty = -1
// clipFlags holds the parsed CLI flags for calls cliptype clipFlags struct {file stringfolder stringoutput stringprefix stringfilter stringspecies stringtimezone stringsize intcertainty intcolor boolwavOnly boolnight boolday boollat float64lng float64latSet boollngSet bool}
}// RunCallsClip handles the "calls clip" subcommand//// JSON output schema://// {// "files_processed": int, // .data files processed// "segments_clipped": int, // Segments that generated clips// "night_skipped": int, // Segments skipped (--night, omitted if 0)// "day_skipped": int, // Segments skipped (--day, omitted if 0)// "output_files": [string], // Paths to generated clip files (.wav/.png)// "errors": [string] // Error messages (omitted if empty)// }func RunCallsClip(args []string) {f := parseClipArgs(args)validateClipFlags(f)
File: file,Folder: folder,Output: output,Prefix: prefix,Filter: filter,Species: species,Certainty: certainty,Size: size,Color: color,WavOnly: wavOnly,Night: night,Day: day,Lat: lat,Lng: lng,Timezone: timezone,
File: f.file,Folder: f.folder,Output: f.output,Prefix: f.prefix,Filter: f.filter,Species: f.species,Certainty: f.certainty,Size: f.size,Color: f.color,WavOnly: f.wavOnly,Night: f.night,Day: f.day,Lat: f.lat,Lng: f.lng,Timezone: f.timezone,
## [2026-05-04] Reduce cyclomatic complexity of 8 functions over gocyclo 30Refactored 8 functions that exceeded cyclomatic complexity of 30 by extractinghelper functions with clear responsibilities:1. **`CallsPropagate` (39→6)**: Extracted `validatePropagateInput`, `hasBothFilters`,`collectPropagateSources`, `propagateTargets`, `findUpdatableTargetLabel`,`findOverlappingSources`, `resolveCallType`, `buildConflictRecord`, `applyPropagation`.2. **`CallsSummarise` (38→5)**: Extracted `summariseFiles`, `trackMeta`, `filterLabels`,`buildLabelSummaries`, `updateStatsFromLabels`, `updateFilterStats`,`updateReviewStatus`, `finaliseSummary`.3. **`runCallsPushCertainty` (35→7)**: Extracted `parsePushCertaintyArgs`,`requireValue`, `requireFloat`, `validatePushCertaintyFlags`.4. **`RunCallsClip` (35→2)**: Extracted `parseClipArgs`, `validateClipFlags`,`nextUniqueValue` on clipArgParser.5. **`createCluster` (34→19)**: Extracted `validateCreateClusterFields`,`validateCreateClusterIDs`, `verifyDatasetForCluster`, `verifyLocationForCluster`,`verifyPatternExists`, `findExistingClusterInLocation`, `fetchClusterByID`.6. **`ValidateMappingAgainstDB` (32→5)**: Extracted `collectMappedLabels`,`validateMappedSpecies`, `validateMappedCalltypes`.7. **`CallsFromPreds` (32→8)**: Extracted `readPredCSV`, `findPredCSVColumns`,`readPredCSVRows`, `addDetectionsFromRow`, `clusterDetections`.8. **`processRavenFileCached` (31→10)**: Extracted `parseRavenHeader`,`parseRavenSelections`, `parseRavenRow`, `deriveWAVBaseName`, `resolveWAVPath`.