package tools
import (
"fmt"
"os"
"path/filepath"
"skraak/utils"
)
type DetectAnomaliesInput struct {
Folder string
Models []string Species []string }
type DetectAnomaliesOutput struct {
Folder string `json:"folder"`
Models []string `json:"models"`
FilesExamined int `json:"files_examined"`
FilesWithAllModels int `json:"files_with_all_models"`
AnomaliesTotal int `json:"anomalies_total"`
LabelMismatches int `json:"label_mismatches"`
CertaintyMismatches int `json:"certainty_mismatches"`
Anomalies []Anomaly `json:"anomalies,omitempty"`
Error string `json:"error,omitempty"`
}
type Anomaly struct {
File string `json:"file"`
Type string `json:"type"` Segments []AnomalySegment `json:"segments"`
}
type AnomalySegment struct {
Model string `json:"model"`
Start float64 `json:"start"`
End float64 `json:"end"`
Species string `json:"species"`
CallType string `json:"calltype,omitempty"`
Certainty int `json:"certainty"`
}
func validateAnomalyInput(input DetectAnomaliesInput) error {
if len(input.Models) < 2 {
return fmt.Errorf("at least 2 --model values required")
}
for i, a := range input.Models {
for j, b := range input.Models {
if i != j && a == b {
return fmt.Errorf("duplicate --model values are not allowed")
}
}
}
info, err := os.Stat(input.Folder)
if err != nil {
return fmt.Errorf("folder not found: %s", input.Folder)
}
if !info.IsDir() {
return fmt.Errorf("not a directory: %s", input.Folder)
}
return nil
}
func DetectAnomalies(input DetectAnomaliesInput) (DetectAnomaliesOutput, error) {
folder := filepath.Clean(input.Folder)
output := DetectAnomaliesOutput{
Folder: folder,
Models: input.Models,
}
if err := validateAnomalyInput(input); err != nil {
output.Error = err.Error()
return output, err
}
files, err := utils.FindDataFiles(folder)
if err != nil {
output.Error = fmt.Sprintf("list .data files: %v", err)
return output, fmt.Errorf("%s", output.Error)
}
scopeSet := make(map[string]bool, len(input.Species))
for _, s := range input.Species {
scopeSet[s] = true
}
for _, path := range files {
df, err := utils.ParseDataFile(path)
if err != nil {
continue
}
output.FilesExamined++
anomalies := detectAnomaliesInFile(df, path, input.Models, scopeSet)
if anomalies == nil {
continue
}
output.FilesWithAllModels++
for _, a := range anomalies {
if a.Type == "label_mismatch" {
output.LabelMismatches++
} else {
output.CertaintyMismatches++
}
}
output.Anomalies = append(output.Anomalies, anomalies...)
}
output.AnomaliesTotal = len(output.Anomalies)
return output, nil
}
type labeledSeg struct {
seg *utils.Segment
label *utils.Label
}
func detectAnomaliesInFile(df *utils.DataFile, path string, models []string, scope map[string]bool) []Anomaly {
modelSegs := collectModelSegments(df, models)
for _, model := range models {
if len(modelSegs[model]) == 0 {
return nil
}
}
var anomalies []Anomaly
for _, anchor := range modelSegs[models[0]] {
if !inScope(anchor, scope) {
continue
}
if matches := findOverlappingMatches(anchor, models, modelSegs); matches == nil {
continue
} else {
group := buildComparisonGroup(anchor, models, matches)
if a := checkGroupAnomaly(group, path, models); a != nil {
anomalies = append(anomalies, *a)
}
}
}
return anomalies
}
func collectModelSegments(df *utils.DataFile, models []string) map[string][]labeledSeg {
modelSegs := make(map[string][]labeledSeg, len(models))
for _, seg := range df.Segments {
for _, lbl := range seg.Labels {
for _, model := range models {
if lbl.Filter == model {
modelSegs[model] = append(modelSegs[model], labeledSeg{seg: seg, label: lbl})
break
}
}
}
}
return modelSegs
}
func inScope(anchor labeledSeg, scope map[string]bool) bool {
if len(scope) == 0 {
return true
}
key := anchor.label.Species
if anchor.label.CallType != "" {
key += "+" + anchor.label.CallType
}
return scope[key] || scope[anchor.label.Species]
}
func findOverlappingMatches(anchor labeledSeg, models []string, modelSegs map[string][]labeledSeg) map[string][]labeledSeg {
matches := make(map[string][]labeledSeg, len(models)-1)
for _, model := range models[1:] {
for _, candidate := range modelSegs[model] {
if overlaps(anchor.seg, candidate.seg) {
matches[model] = append(matches[model], candidate)
}
}
if len(matches[model]) == 0 {
return nil
}
}
return matches
}
func buildComparisonGroup(anchor labeledSeg, models []string, matches map[string][]labeledSeg) []labeledSeg {
group := []labeledSeg{anchor}
for _, model := range models[1:] {
group = append(group, matches[model][0])
}
return group
}
func checkGroupAnomaly(group []labeledSeg, path string, models []string) *Anomaly {
refSpecies := group[0].label.Species
refCallType := group[0].label.CallType
for _, ls := range group[1:] {
if ls.label.Species != refSpecies || ls.label.CallType != refCallType {
a := Anomaly{File: path, Type: "label_mismatch", Segments: buildAnomalySegs(group, models)}
return &a
}
}
refCertainty := group[0].label.Certainty
for _, ls := range group[1:] {
if ls.label.Certainty != refCertainty {
a := Anomaly{File: path, Type: "certainty_mismatch", Segments: buildAnomalySegs(group, models)}
return &a
}
}
return nil
}
func buildAnomalySegs(group []labeledSeg, models []string) []AnomalySegment {
segs := make([]AnomalySegment, len(group))
for i, ls := range group {
segs[i] = AnomalySegment{
Model: models[i],
Start: ls.seg.StartTime,
End: ls.seg.EndTime,
Species: ls.label.Species,
CallType: ls.label.CallType,
Certainty: ls.label.Certainty,
}
}
return segs
}
func overlaps(a, b *utils.Segment) bool {
return a.StartTime < b.EndTime && b.StartTime < a.EndTime
}