package tools
import (
"encoding/csv"
"os"
"path/filepath"
"strings"
"testing"
"skraak/utils"
)
func writeDataFile(t *testing.T, dir, name string, df *utils.DataFile) {
t.Helper()
if err := df.Write(filepath.Join(dir, name)); err != nil {
t.Fatalf("write .data file %s: %v", name, err)
}
}
func writeMapping(t *testing.T, dir, json string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, "mapping.json"), []byte(json), 0644); err != nil {
t.Fatalf("write mapping.json: %v", err)
}
}
func parseCSV(t *testing.T, path string) ([]string, [][]string) {
t.Helper()
f, err := os.Open(path)
if err != nil {
t.Fatalf("open CSV %s: %v", path, err)
}
defer f.Close()
r := csv.NewReader(f)
header, err := r.Read()
if err != nil {
t.Fatalf("read header: %v", err)
}
rows, err := r.ReadAll()
if err != nil {
t.Fatalf("read rows: %v", err)
}
return header, rows
}
func clipLabels(t *testing.T, dir string, extra ...func(*CallsClipLabelsInput)) CallsClipLabelsOutput {
t.Helper()
input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
for _, fn := range extra {
fn(&input)
}
out, err := CallsClipLabels(input)
if err != nil {
t.Fatalf("CallsClipLabels: %v", err)
}
return out
}
func TestClipLabels_RealClassTrue(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 20},
Segments: []*utils.Segment{
{
StartTime: 3, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
out := clipLabels(t, dir)
header, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
if len(header) != 4 || header[3] != "Kiwi" {
t.Fatalf("header = %v, want [..., Kiwi]", header)
}
kiwiCol := 3
for i, row := range rows {
switch row[1] {
case "0.0", "5.0":
if row[kiwiCol] != "True" {
t.Errorf("row %d (start=%s): Kiwi=%s, want True", i, row[1], row[kiwiCol])
}
case "10.0", "15.0":
if row[kiwiCol] != "False" {
t.Errorf("row %d (start=%s): Kiwi=%s, want False", i, row[1], row[kiwiCol])
}
}
}
if out.PerClassTrueCount["Kiwi"] != 2 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 2", out.PerClassTrueCount["Kiwi"])
}
}
func TestClipLabels_GapClipsAllFalse(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 15},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
out := clipLabels(t, dir)
if out.ClipsAllFalseGap != 2 {
t.Errorf("ClipsAllFalseGap = %d, want 2", out.ClipsAllFalseGap)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}
}
func TestClipLabels_NegativeOverridesPositive(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 8, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
{
StartTime: 0, EndTime: 4, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Not", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
out := clipLabels(t, dir)
if out.ClipsNegative != 1 {
t.Errorf("ClipsNegative = %d, want 1", out.ClipsNegative)
}
_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
if rows[0][3] != "False" {
t.Errorf("clip 0-5 Kiwi = %s, want False (overridden by __NEGATIVE__)", rows[0][3])
}
if rows[1][3] != "True" {
t.Errorf("clip 5-10 Kiwi = %s, want True", rows[1][3])
}
}
func TestClipLabels_IgnoreExcludesClip(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 15},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Don't Know", Certainty: 0, Filter: "f1"}},
},
{
StartTime: 6, EndTime: 10, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Don't Know":{"species":"__IGNORE__"}}`)
out := clipLabels(t, dir)
if out.ClipsIgnored != 1 {
t.Errorf("ClipsIgnored = %d, want 1", out.ClipsIgnored)
}
if out.SegmentsIgnored != 1 {
t.Errorf("SegmentsIgnored = %d, want 1", out.SegmentsIgnored)
}
if out.RowsWritten != 2 {
t.Errorf("RowsWritten = %d, want 2", out.RowsWritten)
}
}
func TestClipLabels_FilterRestrictsLabels(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{
{Species: "Kiwi", Certainty: 100, Filter: "wanted"},
{Species: "Not", Certainty: 100, Filter: "unwanted"},
},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"},"Not":{"species":"__NEGATIVE__"}}`)
out := clipLabels(t, dir, func(in *CallsClipLabelsInput) { in.Filter = "wanted" })
if out.ClipsNegative != 0 {
t.Errorf("ClipsNegative = %d, want 0 (Not filter excluded)", out.ClipsNegative)
}
if out.PerClassTrueCount["Kiwi"] != 1 {
t.Errorf("PerClassTrueCount[Kiwi] = %d, want 1", out.PerClassTrueCount["Kiwi"])
}
}
func TestClipLabels_MappingCoverageError(t *testing.T) {
dir := t.TempDir()
writeDataFile(t, dir, "rec.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Mystery", Certainty: 100, Filter: "f1"}},
},
},
})
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
input := CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
}
_, err := CallsClipLabels(input)
if err == nil {
t.Fatal("expected error for missing species in mapping")
}
if !strings.Contains(err.Error(), "Mystery") {
t.Errorf("error should mention missing species, got: %v", err)
}
}
func TestClipLabels_AppendMode(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 5},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
out1 := clipLabels(t, dir)
if out1.RowsWritten != 1 {
t.Fatalf("first run: RowsWritten = %d, want 1", out1.RowsWritten)
}
_, err := CallsClipLabels(CallsClipLabelsInput{
Folder: dir,
MappingPath: filepath.Join(dir, "mapping.json"),
OutputPath: filepath.Join(dir, "clip_labels.csv"),
ClipDuration: 5,
ClipOverlap: 0,
MinLabelOverlap: 0.25,
FinalClip: "full",
})
if err == nil {
t.Fatal("expected duplicate error on second run with same folder")
}
if !strings.Contains(err.Error(), "duplicate") {
t.Errorf("error should mention duplicate, got: %v", err)
}
}
func TestClipLabels_MultipleFiles(t *testing.T) {
dir := t.TempDir()
writeMapping(t, dir, `{"Kiwi":{"species":"Kiwi"}}`)
writeDataFile(t, dir, "a.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 10},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
writeDataFile(t, dir, "b.wav.data", &utils.DataFile{
Meta: &utils.DataMeta{Duration: 5},
Segments: []*utils.Segment{
{
StartTime: 0, EndTime: 5, FreqLow: 100, FreqHigh: 5000,
Labels: []*utils.Label{{Species: "Kiwi", Certainty: 100, Filter: "f1"}},
},
},
})
out := clipLabels(t, dir)
if out.DataFilesParsed != 2 {
t.Errorf("DataFilesParsed = %d, want 2", out.DataFilesParsed)
}
if out.RowsWritten != 3 {
t.Errorf("RowsWritten = %d, want 3", out.RowsWritten)
}
_, rows := parseCSV(t, filepath.Join(dir, "clip_labels.csv"))
files := map[string]int{}
for _, r := range rows {
files[r[0]]++
}
if len(files) != 2 {
t.Errorf("expected 2 distinct files in CSV, got %d", len(files))
}
}