package tools
import (
"fmt"
"math/rand"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"time"
"skraak/utils"
)
type KeyBinding struct {
Key string Species string CallType string }
type ClassifyConfig struct {
Folder string
File string
Filter string
Species string CallType string Certainty int Sample int Goto string Reviewer string
Color bool
ImageSize int Sixel bool
ITerm bool
Bindings []KeyBinding
SecondaryBindings map[string]map[string]string
Night bool
Day bool
Lat float64
Lng float64
Timezone string
}
type ClassifyState struct {
Config ClassifyConfig
DataFiles []*utils.DataFile
filteredSegs [][]*utils.Segment totalSegs int FileIdx int
SegmentIdx int
Dirty bool
Player *utils.AudioPlayer
PlaybackSpeed float64 TimeFilteredCount int }
type BindingResult struct {
Species string
CallType string }
func findDataFilePaths(config ClassifyConfig) ([]string, error) {
if config.File != "" {
return []string{config.File}, nil
}
paths, err := utils.FindDataFiles(config.Folder)
if err != nil {
return nil, fmt.Errorf("find data files: %w", err)
}
return paths, nil
}
func filterDataFileSegments(df *utils.DataFile, config ClassifyConfig) ([]*utils.Segment, bool, int) {
segs := filterSegmentsByLabel(df.Segments, config)
if segs == nil {
return nil, false, 0
}
timeFiltered := 0
if config.Night || config.Day {
keep, tf := filterByTimeOfDay(df.FilePath, config)
if !keep {
return nil, false, tf
}
}
return segs, true, timeFiltered
}
func filterSegmentsByLabel(segments []*utils.Segment, config ClassifyConfig) []*utils.Segment {
hasFilter := config.Filter != "" || config.Species != "" || config.Certainty >= 0
if !hasFilter {
return segments
}
var segs []*utils.Segment
for _, seg := range segments {
if seg.SegmentMatchesFilters(config.Filter, config.Species, config.CallType, config.Certainty) {
segs = append(segs, seg)
}
}
return segs }
func filterByTimeOfDay(dataFilePath string, config ClassifyConfig) (bool, int) {
wavPath := filepath.Clean(strings.TrimSuffix(dataFilePath, ".data"))
result, err := IsNight(IsNightInput{
FilePath: wavPath,
Lat: config.Lat,
Lng: config.Lng,
Timezone: config.Timezone,
})
if err != nil {
fmt.Fprintf(os.Stderr, "warning: skipping %s (isnight error: %v)\n", wavPath, err)
return false, 1
}
if config.Night && !result.SolarNight {
return false, 1
}
if config.Day && !result.DiurnalActive {
return false, 1
}
return true, 0
}
func LoadDataFiles(config ClassifyConfig) (*ClassifyState, error) {
dataFiles, err := parseAndSortDataFiles(config)
if err != nil {
return nil, err
}
kept, cachedSegs, timeFiltered := filterDataFiles(dataFiles, config)
if config.Sample > 0 && config.Sample < 100 {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
kept, cachedSegs = applySampling(kept, cachedSegs, config.Sample, rng)
}
return buildClassifyState(config, kept, cachedSegs, timeFiltered)
}
func parseAndSortDataFiles(config ClassifyConfig) ([]*utils.DataFile, error) {
filePaths, err := findDataFilePaths(config)
if err != nil {
return nil, err
}
if len(filePaths) == 0 {
return nil, fmt.Errorf("no .data files found")
}
var dataFiles []*utils.DataFile
for _, path := range filePaths {
df, err := utils.ParseDataFile(path)
if err != nil {
continue
}
dataFiles = append(dataFiles, df)
}
if len(dataFiles) == 0 {
return nil, fmt.Errorf("no valid .data files")
}
sort.Slice(dataFiles, func(i, j int) bool {
return dataFiles[i].FilePath < dataFiles[j].FilePath
})
return dataFiles, nil
}
func filterDataFiles(dataFiles []*utils.DataFile, config ClassifyConfig) ([]*utils.DataFile, [][]*utils.Segment, int) {
var kept []*utils.DataFile
var cachedSegs [][]*utils.Segment
var timeFiltered int
for _, df := range dataFiles {
segs, keep, tf := filterDataFileSegments(df, config)
timeFiltered += tf
if !keep {
continue
}
kept = append(kept, df)
cachedSegs = append(cachedSegs, segs)
}
return kept, cachedSegs, timeFiltered
}
func buildClassifyState(config ClassifyConfig, dataFiles []*utils.DataFile, filteredSegs [][]*utils.Segment, timeFiltered int) (*ClassifyState, error) {
total := 0
for _, segs := range filteredSegs {
total += len(segs)
}
state := &ClassifyState{
Config: config,
DataFiles: dataFiles,
filteredSegs: filteredSegs,
totalSegs: total,
TimeFilteredCount: timeFiltered,
}
if config.Goto == "" {
return state, nil
}
for i, df := range state.DataFiles {
base := df.FilePath[strings.LastIndex(df.FilePath, "/")+1:]
if base == config.Goto {
state.FileIdx = i
return state, nil
}
}
return nil, fmt.Errorf("goto file not found (or has no matching segments): %s", config.Goto)
}
func applySampling(kept []*utils.DataFile, cachedSegs [][]*utils.Segment, sample int, rng *rand.Rand) ([]*utils.DataFile, [][]*utils.Segment) {
flat := make([]struct{ fileIdx, segIdx int }, 0)
for fi, segs := range cachedSegs {
for si := range segs {
flat = append(flat, struct{ fileIdx, segIdx int }{fi, si})
}
}
targetCount := max(len(flat)*sample/100, 1)
rng.Shuffle(len(flat), func(i, j int) { flat[i], flat[j] = flat[j], flat[i] })
selected := flat[:targetCount]
sort.Slice(selected, func(i, j int) bool {
if selected[i].fileIdx != selected[j].fileIdx {
return selected[i].fileIdx < selected[j].fileIdx
}
return selected[i].segIdx < selected[j].segIdx
})
newCached := make([][]*utils.Segment, len(cachedSegs))
for _, ref := range selected {
newCached[ref.fileIdx] = append(newCached[ref.fileIdx], cachedSegs[ref.fileIdx][ref.segIdx])
}
var newKept []*utils.DataFile
var finalCached [][]*utils.Segment
for i, segs := range newCached {
if len(segs) > 0 {
newKept = append(newKept, kept[i])
finalCached = append(finalCached, segs)
}
}
return newKept, finalCached
}
func (s *ClassifyState) FilteredSegs() [][]*utils.Segment {
return s.filteredSegs
}
func (s *ClassifyState) CurrentFile() *utils.DataFile {
if s.FileIdx >= len(s.DataFiles) {
return nil
}
return s.DataFiles[s.FileIdx]
}
func (s *ClassifyState) CurrentSegment() *utils.Segment {
if s.FileIdx >= len(s.filteredSegs) {
return nil
}
segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx >= len(segs) {
return nil
}
return segs[s.SegmentIdx]
}
func (s *ClassifyState) TotalSegments() int {
return s.totalSegs
}
func (s *ClassifyState) CurrentSegmentNumber() int {
count := 0
for i := 0; i < s.FileIdx; i++ {
count += len(s.filteredSegs[i])
}
return count + s.SegmentIdx + 1
}
func (s *ClassifyState) NextSegment() bool {
if s.FileIdx >= len(s.filteredSegs) {
return false
}
segs := s.filteredSegs[s.FileIdx]
if s.SegmentIdx+1 < len(segs) {
s.SegmentIdx++
return true
}
if s.FileIdx+1 < len(s.DataFiles) {
s.FileIdx++
s.SegmentIdx = 0
return true
}
return false
}
func (s *ClassifyState) PrevSegment() bool {
if s.SegmentIdx > 0 {
s.SegmentIdx--
return true
}
if s.FileIdx > 0 {
s.FileIdx--
segs := s.filteredSegs[s.FileIdx]
s.SegmentIdx = max(len(segs)-1, 0)
return true
}
return false
}
func (s *ClassifyState) ParseKeyBuffer(key string) *BindingResult {
for _, b := range s.Config.Bindings {
if b.Key == key {
return &BindingResult{
Species: b.Species,
CallType: b.CallType,
}
}
}
return nil
}
func (s *ClassifyState) SetComment(comment string) string {
seg := s.CurrentSegment()
if seg == nil {
return ""
}
df := s.CurrentFile()
if df == nil {
return ""
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels := seg.GetFilterLabels(s.Config.Filter)
var oldComment string
if len(filterLabels) == 0 {
label := &utils.Label{
Species: "Don't Know",
Certainty: 0,
Filter: s.Config.Filter,
Comment: comment,
}
seg.Labels = append(seg.Labels, label)
} else {
oldComment = filterLabels[0].Comment
filterLabels[0].Comment = comment
}
s.Dirty = true
return oldComment
}
func (s *ClassifyState) GetCurrentComment() string {
seg := s.CurrentSegment()
if seg == nil {
return ""
}
filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return ""
}
return filterLabels[0].Comment
}
func (s *ClassifyState) ApplyBinding(result *BindingResult) {
seg := s.CurrentSegment()
if seg == nil {
return
}
df := s.CurrentFile()
if df == nil {
return
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels := seg.GetFilterLabels(s.Config.Filter)
certainty := 100
if result.Species == "Don't Know" {
certainty = 0
}
if len(filterLabels) == 0 {
seg.Labels = append(seg.Labels, &utils.Label{
Species: result.Species,
Certainty: certainty,
Filter: s.Config.Filter,
CallType: result.CallType,
})
} else {
filterLabels[0].Species = result.Species
filterLabels[0].Certainty = certainty
filterLabels[0].CallType = result.CallType
if len(filterLabels) > 1 {
var newLabels []*utils.Label
for _, l := range seg.Labels {
keep := !slices.Contains(filterLabels[1:], l)
if keep {
newLabels = append(newLabels, l)
}
}
seg.Labels = newLabels
}
}
sort.Slice(seg.Labels, func(i, j int) bool {
return seg.Labels[i].Species < seg.Labels[j].Species
})
s.Dirty = true
}
func (s *ClassifyState) ApplyCallTypeOnly(callType string) {
seg := s.CurrentSegment()
if seg == nil {
return
}
df := s.CurrentFile()
if df == nil {
return
}
filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels[0].CallType = callType
s.Dirty = true
}
func (s *ClassifyState) HasSecondary(primaryKey string) bool {
return len(s.Config.SecondaryBindings[primaryKey]) > 0
}
func (s *ClassifyState) ConfirmLabel() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
filterLabels := seg.GetFilterLabels(s.Config.Filter)
if len(filterLabels) == 0 {
return false
}
if filterLabels[0].Certainty == 0 {
return false
}
if filterLabels[0].Certainty == 100 {
return false
}
df := s.CurrentFile()
if df == nil {
return false
}
df.Meta.Reviewer = s.Config.Reviewer
filterLabels[0].Certainty = 100
s.Dirty = true
return true
}
func (s *ClassifyState) Save() error {
df := s.CurrentFile()
if df == nil {
return nil
}
if !s.Dirty {
return nil
}
err := df.Write(df.FilePath)
if err != nil {
return err
}
s.Dirty = false
return nil
}
func (s *ClassifyState) getFilterLabel(seg *utils.Segment) *utils.Label {
if s.Config.Filter == "" {
if len(seg.Labels) > 0 {
return seg.Labels[0]
}
return nil
}
for _, label := range seg.Labels {
if label.Filter == s.Config.Filter {
return label
}
}
return nil
}
func (s *ClassifyState) getOrCreateFilterLabel(seg *utils.Segment) *utils.Label {
label := s.getFilterLabel(seg)
if label != nil {
return label
}
label = &utils.Label{
Species: "Don't Know",
Certainty: 0,
Filter: s.Config.Filter,
}
seg.Labels = append(seg.Labels, label)
s.Dirty = true
return label
}
func (s *ClassifyState) HasBookmark() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
label := s.getFilterLabel(seg)
return label != nil && label.Bookmark
}
func (s *ClassifyState) ToggleBookmark() {
seg := s.CurrentSegment()
if seg == nil {
return
}
df := s.CurrentFile()
if df == nil {
return
}
df.Meta.Reviewer = s.Config.Reviewer
label := s.getOrCreateFilterLabel(seg)
label.Bookmark = !label.Bookmark
s.Dirty = true
}
func (s *ClassifyState) NextBookmark() bool {
startFile := s.FileIdx
startSeg := s.SegmentIdx
first := true
for {
if !s.NextSegment() {
s.FileIdx = 0
s.SegmentIdx = 0
}
if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
return false }
first = false
if s.hasFilterBookmark() {
return true
}
}
}
func (s *ClassifyState) PrevBookmark() bool {
startFile := s.FileIdx
startSeg := s.SegmentIdx
first := true
for {
if !s.PrevSegment() {
s.FileIdx = len(s.DataFiles) - 1
segs := s.filteredSegs[s.FileIdx]
s.SegmentIdx = max(len(segs)-1, 0)
}
if !first && s.FileIdx == startFile && s.SegmentIdx == startSeg {
return false }
first = false
if s.hasFilterBookmark() {
return true
}
}
}
func (s *ClassifyState) hasFilterBookmark() bool {
seg := s.CurrentSegment()
if seg == nil {
return false
}
label := s.getFilterLabel(seg)
return label != nil && label.Bookmark
}
func FormatLabels(labels []*utils.Label, filter string) string {
var parts []string
for _, l := range labels {
if filter != "" && l.Filter != filter {
continue
}
part := l.Species
if l.CallType != "" {
part += "/" + l.CallType
}
part += fmt.Sprintf(" (%d%%)", l.Certainty)
if l.Filter != "" {
part += " [" + l.Filter + "]"
}
if l.Comment != "" {
part += fmt.Sprintf(" \"%s\"", l.Comment)
}
parts = append(parts, part)
}
return strings.Join(parts, ", ")
}