package tools
import (
"os"
"path/filepath"
"testing"
)
func writeDataFileContent(t *testing.T, dir, name, content string) {
t.Helper()
if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil {
t.Fatal(err)
}
}
func mustLoadDataFiles(t *testing.T, config ClassifyConfig) *ClassifyState {
t.Helper()
state, err := LoadDataFiles(config)
if err != nil {
t.Fatal(err)
}
return state
}
func assertFileSegCounts(t *testing.T, state *ClassifyState, wantFiles, wantSegs int, label string) {
t.Helper()
if len(state.DataFiles) != wantFiles {
t.Errorf("%s: expected %d files, got %d", label, wantFiles, len(state.DataFiles))
}
if state.TotalSegments() != wantSegs {
t.Errorf("%s: expected %d segments total, got %d", label, wantSegs, state.TotalSegments())
}
}
const (
kiwiSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]]]`
tomtitSeg = `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Tomtit", "certainty": 90}]]]`
)
func TestLoadDataFilesFiltersFilesWithNoMatchingSegments(t *testing.T) {
tempDir := t.TempDir()
writeDataFileContent(t, tempDir, "file1.data", kiwiSeg)
writeDataFileContent(t, tempDir, "file2.data", tomtitSeg)
writeDataFileContent(t, tempDir, "file3.data", kiwiSeg)
t.Run("no_filter", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: -1})
assertFileSegCounts(t, state, 3, 3, "No filter")
})
t.Run("species_kiwi", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
assertFileSegCounts(t, state, 2, 2, "Species=Kiwi")
})
t.Run("species_tomtit", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Tomtit", Certainty: -1})
assertFileSegCounts(t, state, 1, 1, "Species=Tomtit")
})
t.Run("species_nonexistent", func(t *testing.T) {
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "NonExistent", Certainty: -1})
assertFileSegCounts(t, state, 0, 0, "Species=NonExistent")
})
}
func TestLoadDataFilesWithMixedSegments(t *testing.T) {
tempDir := t.TempDir()
file := `[
{"Operator": "test"},
[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]],
[20, 30, 100, 1000, [{"species": "Kiwi", "certainty": 95}]]
]`
writeDataFileContent(t, tempDir, "mixed.data", file)
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
if len(state.DataFiles) != 1 {
t.Errorf("Expected 1 file, got %d", len(state.DataFiles))
}
if state.TotalSegments() != 2 {
t.Errorf("Species=Kiwi: expected 2 segments, got %d", state.TotalSegments())
}
if len(state.DataFiles[0].Segments) != 3 {
t.Errorf("DataFile should have 3 segments internally, got %d", len(state.DataFiles[0].Segments))
}
if state.TotalSegments() != 2 {
t.Errorf("TotalSegments should return 2 Kiwi segments, got %d", state.TotalSegments())
}
}
func TestFilteringDoesNotModifyOriginalSegments(t *testing.T) {
tempDir := t.TempDir()
file := `[
{"Operator": "test"},
[0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 90}]],
[10, 20, 100, 1000, [{"species": "Tomtit", "certainty": 80}]]
]`
writeDataFileContent(t, tempDir, "test.data", file)
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Species: "Kiwi", Certainty: -1})
originalSegments := state.DataFiles[0].Segments
if len(originalSegments) != 2 {
t.Errorf("Original should have 2 segments, got %d", len(originalSegments))
}
species := []string{}
for _, seg := range originalSegments {
if len(seg.Labels) > 0 {
species = append(species, seg.Labels[0].Species)
}
}
if len(species) != 2 || species[0] != "Kiwi" || species[1] != "Tomtit" {
t.Errorf("Original segments should have both species, got %v", species)
}
}
func TestLoadDataFilesCertaintyPruning(t *testing.T) {
tempDir := t.TempDir()
writeDataFileContent(t, tempDir, "file1.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 70}]]]`)
writeDataFileContent(t, tempDir, "file2.data", `[{"Operator": "test"}, [0, 10, 100, 1000, [{"species": "Kiwi", "certainty": 100}]]]`)
state := mustLoadDataFiles(t, ClassifyConfig{Folder: tempDir, Certainty: 100})
assertFileSegCounts(t, state, 1, 1, "Certainty=100")
seg := state.CurrentSegment()
if seg == nil {
t.Error("CurrentSegment should not be nil after pruning")
}
}