package tools

import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"slices"
	"sort"
	"strconv"
	"strings"

	"skraak/utils"
)

// CallsClipLabelsInput configures the clip-labels exporter.
type CallsClipLabelsInput struct {
	Folder          string  `json:"folder"`
	MappingPath     string  `json:"mapping"`
	Filter          string  `json:"filter,omitempty"`
	OutputPath      string  `json:"output"`
	ClipDuration    float64 `json:"clip_duration"`
	ClipOverlap     float64 `json:"clip_overlap"`
	MinLabelOverlap float64 `json:"min_label_overlap"`
	FinalClip       string  `json:"final_clip"`
}

// CallsClipLabelsOutput summarises a run.
type CallsClipLabelsOutput struct {
	Folder            string         `json:"folder"`
	OutputPath        string         `json:"output"`
	Filter            string         `json:"filter,omitempty"`
	Classes           []string       `json:"classes"`
	DataFilesParsed   int            `json:"data_files_parsed"`
	ClipsNegative     int            `json:"clips_negative"`      // emitted, all-False because of __NEGATIVE__
	ClipsIgnored      int            `json:"clips_ignored"`       // excluded from output because of __IGNORE__ overlap
	SegmentsIgnored   int            `json:"segments_ignored"`    // segments whose species maps to __IGNORE__
	ClipsAllFalseGap  int            `json:"clips_all_false_gap"` // emitted, all-False because no overlap
	PerClassTrueCount map[string]int `json:"per_class_true_count"`
	AppendedToFile    bool           `json:"appended_to_file"`
	ExistingRowsFound int            `json:"existing_rows_found"`
	RowsWritten       int            `json:"rows_written"`
}

// resolvedSeg is a segment that has been classified by the mapping and is
// ready for overlap-checking against clip windows.
type resolvedSeg struct {
	start, end float64
	kind       utils.MappingKind
	classIdx   int // valid only when kind == utils.MappingReal
}

// clipDisposition describes the outcome for a single clip window.
type clipDisposition int

const (
	dispoLabelled clipDisposition = iota // at least one class column is True
	dispoNegative                        // __NEGATIVE__ hit, all class columns False
	dispoGap                             // no segment overlaps, all class columns False
	dispoIgnored                         // __IGNORE__ hit, clip excluded from output
)

// clipLabelsRow is one row of the output CSV.
type clipLabelsRow struct {
	file  string
	start float64
	end   float64
	flags []bool
}

// rowKey is used for duplicate detection.
type rowKey struct {
	file  string
	start string
	end   string
}

// CallsClipLabels reads .data files from a single folder and writes a CSV in
// OpenSoundScape's clip_labels format: one row per clip per file, with one
// True/False column per class in the mapping.
//
// Mirrors BoxedAnnotations.clip_labels(): every clip window is emitted; a
// column is True when any annotation of that class overlaps the window by
// ≥ min_label_overlap seconds. Sentinel mappings (__NEGATIVE__, __IGNORE__)
// get no column and contribute no labels.

// parsedClipFile holds a parsed .data file for clip-labels processing.
type parsedClipFile struct {
	path string
	df   *utils.DataFile
}

// validateClipLabelsInput validates the input parameters and returns the parsed finalClipMode.
func validateClipLabelsInput(input CallsClipLabelsInput) (utils.FinalClipMode, error) {
	finalClipMode, err := utils.ParseFinalClipMode(input.FinalClip)
	if err != nil {
		return 0, err
	}
	if input.ClipDuration <= 0 {
		return 0, fmt.Errorf("--clip-duration must be > 0, got %v", input.ClipDuration)
	}
	if input.ClipOverlap < 0 || input.ClipOverlap >= input.ClipDuration {
		return 0, fmt.Errorf("--clip-overlap must be in [0, clip-duration), got %v", input.ClipOverlap)
	}
	if input.MinLabelOverlap <= 0 {
		return 0, fmt.Errorf("--min-label-overlap must be > 0, got %v", input.MinLabelOverlap)
	}
	return finalClipMode, nil
}

// parseClipLabelsDataFiles finds and parses .data files, collecting species seen.
func parseClipLabelsDataFiles(folder, filter string, mapping utils.MappingFile) ([]parsedClipFile, error) {
	dataPaths, err := utils.FindDataFiles(folder)
	if err != nil {
		return nil, fmt.Errorf("scan folder %s: %w", folder, err)
	}
	if len(dataPaths) == 0 {
		return nil, fmt.Errorf("no .data files found in %s", folder)
	}

	speciesSeen := map[string]bool{}
	parsed := make([]parsedClipFile, 0, len(dataPaths))
	for _, p := range dataPaths {
		df, err := utils.ParseDataFile(p)
		if err != nil {
			return nil, fmt.Errorf("parse %s: %w", p, err)
		}
		if df.Meta == nil || df.Meta.Duration <= 0 {
			return nil, fmt.Errorf("missing or non-positive Duration in %s (cannot generate clips)", p)
		}
		for _, seg := range df.Segments {
			for _, lbl := range seg.Labels {
				if filter != "" && lbl.Filter != filter {
					continue
				}
				speciesSeen[lbl.Species] = true
			}
		}
		parsed = append(parsed, parsedClipFile{path: p, df: df})
	}

	if missing := mapping.ValidateCoversSpecies(speciesSeen); len(missing) > 0 {
		return nil, fmt.Errorf("mapping.json is missing entries for species: %s\n(run /data-mapping to regenerate)", strings.Join(missing, ", "))
	}
	return parsed, nil
}

// dedupClipLabelsRows checks for duplicate rows within new rows and against existing CSV rows.
func dedupClipLabelsRows(rows []clipLabelsRow, existing map[rowKey]bool) error {
	dedup := make(map[rowKey]bool, len(existing)+len(rows))
	for k := range existing {
		dedup[k] = true
	}
	for _, r := range rows {
		k := rowKey{file: r.file, start: formatTime(r.start), end: formatTime(r.end)}
		if dedup[k] {
			return fmt.Errorf("duplicate clip detected: file=%s start=%s end=%s", k.file, k.start, k.end)
		}
		dedup[k] = true
	}
	return nil
}

func CallsClipLabels(input CallsClipLabelsInput) (CallsClipLabelsOutput, error) {
	out := CallsClipLabelsOutput{
		Folder:            input.Folder,
		OutputPath:        input.OutputPath,
		PerClassTrueCount: map[string]int{},
	}

	finalClipMode, err := validateClipLabelsInput(input)
	if err != nil {
		return out, err
	}

	mapping, err := utils.LoadMappingFile(input.MappingPath)
	if err != nil {
		return out, fmt.Errorf("load mapping %s: %w", input.MappingPath, err)
	}

	classes := mapping.Classes()
	if len(classes) == 0 {
		return out, fmt.Errorf("mapping.json has no real (non-sentinel) classes")
	}
	out.Classes = classes
	out.Filter = input.Filter
	classIdx := map[string]int{}
	for i, c := range classes {
		classIdx[c] = i
	}

	parsed, err := parseClipLabelsDataFiles(input.Folder, input.Filter, mapping)
	if err != nil {
		return out, err
	}
	out.DataFilesParsed = len(parsed)

	expectedHeader := append([]string{"file", "start_time", "end_time"}, classes...)
	existing, appendMode, err := loadExistingRows(input.OutputPath, expectedHeader)
	if err != nil {
		return out, err
	}
	out.AppendedToFile = appendMode
	out.ExistingRowsFound = len(existing)

	cwd, err := os.Getwd()
	if err != nil {
		return out, fmt.Errorf("getwd: %w", err)
	}
	folderAbs, err := filepath.Abs(input.Folder)
	if err != nil {
		return out, fmt.Errorf("abs %s: %w", input.Folder, err)
	}

	rows := make([]clipLabelsRow, 0, 1024)
	for _, pf := range parsed {
		fileRows, err := processClipLabelsFile(pf.path, pf.df, mapping, classIdx, classes, input, finalClipMode, cwd, folderAbs, &out)
		if err != nil {
			return out, err
		}
		rows = append(rows, fileRows...)
	}

	if err := dedupClipLabelsRows(rows, existing); err != nil {
		return out, err
	}

	if err := writeRows(input.OutputPath, expectedHeader, rows, appendMode); err != nil {
		return out, err
	}
	out.RowsWritten = len(rows)

	sort.Strings(out.Classes)
	return out, nil
}

// processClipLabelsFile generates clip-labels rows for a single .data file.
func processClipLabelsFile(
	path string,
	df *utils.DataFile,
	mapping utils.MappingFile,
	classIdx map[string]int,
	classes []string,
	input CallsClipLabelsInput,
	finalClipMode utils.FinalClipMode,
	cwd, folderAbs string,
	out *CallsClipLabelsOutput,
) ([]clipLabelsRow, error) {
	windows, err := utils.GenerateClipTimes(
		df.Meta.Duration,
		input.ClipDuration,
		input.ClipOverlap,
		finalClipMode,
		10,
	)
	if err != nil {
		return nil, fmt.Errorf("generate clip windows for %s: %w", path, err)
	}
	if len(windows) == 0 {
		return nil, nil
	}

	segs := resolveSegments(df.Segments, input.Filter, input.MinLabelOverlap, mapping, classIdx, out)

	rel, err := computeWavRelPath(path, cwd, folderAbs)
	if err != nil {
		return nil, err
	}

	return labelClipWindows(windows, segs, rel, classes, input.MinLabelOverlap, out), nil
}

// resolveSegments maps segments to their classification and filters out mismatches.
func resolveSegments(
	segments []*utils.Segment,
	filter string,
	minLabelOverlap float64,
	mapping utils.MappingFile,
	classIdx map[string]int,
	out *CallsClipLabelsOutput,
) []resolvedSeg {
	segs := make([]resolvedSeg, 0, len(segments))
	for _, seg := range segments {
		if seg.EndTime-seg.StartTime < minLabelOverlap {
			continue
		}
		for _, lbl := range seg.Labels {
			if filter != "" && lbl.Filter != filter {
				continue
			}
			canon, kind, ok := mapping.Classify(lbl.Species)
			if !ok {
				continue
			}
			switch kind {
			case utils.MappingIgn:
				out.SegmentsIgnored++
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
			case utils.MappingNeg:
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind})
			case utils.MappingReal:
				idx, present := classIdx[canon]
				if !present {
					continue
				}
				segs = append(segs, resolvedSeg{start: seg.StartTime, end: seg.EndTime, kind: kind, classIdx: idx})
			}
		}
	}
	return segs
}

// computeWavRelPath computes the relative path from cwd to the WAV file corresponding to a .data file.
func computeWavRelPath(dataPath, cwd, folderAbs string) (string, error) {
	wavName := strings.TrimSuffix(filepath.Base(dataPath), ".data")
	wavAbs := filepath.Join(folderAbs, wavName)
	rel, err := filepath.Rel(cwd, wavAbs)
	if err != nil {
		rel = wavAbs
	}
	// Ensure relative paths start with ./ to match OPSO / pandas convention.
	if rel != "" && !filepath.IsAbs(rel) && !strings.HasPrefix(rel, "."+string(filepath.Separator)) {
		rel = "." + string(filepath.Separator) + rel
	}
	return rel, nil
}

// labelClipWindows classifies each clip window and builds the output rows.
func labelClipWindows(windows []utils.ClipWindow, segs []resolvedSeg, rel string, classes []string, minLabelOverlap float64, out *CallsClipLabelsOutput) []clipLabelsRow {
	var rows []clipLabelsRow
	for _, w := range windows {
		dispo, classHits := classifyClip(w, segs, minLabelOverlap, len(classes))

		if dispo == dispoIgnored {
			out.ClipsIgnored++
			continue
		}

		row := clipLabelsRow{
			file:  rel,
			start: w.Start,
			end:   w.End,
			flags: make([]bool, len(classes)),
		}

		switch dispo {
		case dispoNegative:
			out.ClipsNegative++
		case dispoGap:
			out.ClipsAllFalseGap++
		case dispoLabelled:
			for i, hit := range classHits {
				if hit {
					row.flags[i] = true
					out.PerClassTrueCount[classes[i]]++
				}
			}
		}
		rows = append(rows, row)
	}
	return rows
}

// classifyClip determines the disposition of a single clip window against
// the resolved segments. Priority: __IGNORE__ > __NEGATIVE__ > class labels.
func classifyClip(w utils.ClipWindow, segs []resolvedSeg, minLabelOverlap float64, nClasses int) (clipDisposition, []bool) {
	ignoreHit := false
	negativeHit := false
	classHits := make([]bool, nClasses)

	for _, s := range segs {
		if overlapSeconds(s.start, s.end, w.Start, w.End) < minLabelOverlap {
			continue
		}
		switch s.kind {
		case utils.MappingIgn:
			ignoreHit = true
		case utils.MappingNeg:
			negativeHit = true
		case utils.MappingReal:
			classHits[s.classIdx] = true
		}
	}

	if ignoreHit {
		return dispoIgnored, nil
	}
	if negativeHit {
		return dispoNegative, classHits
	}
	for _, hit := range classHits {
		if hit {
			return dispoLabelled, classHits
		}
	}
	return dispoGap, classHits
}

// loadExistingRows reads an existing output CSV and returns its row keys
// (for deduplication) and whether we're in append mode.
func loadExistingRows(outputPath string, expectedHeader []string) (map[rowKey]bool, bool, error) {
	fi, err := os.Stat(outputPath)
	if err != nil {
		if os.IsNotExist(err) {
			return nil, false, nil
		}
		return nil, false, fmt.Errorf("stat %s: %w", outputPath, err)
	}
	if fi.Size() == 0 {
		return nil, false, nil
	}

	f, err := os.Open(outputPath)
	if err != nil {
		return nil, false, fmt.Errorf("open existing %s: %w", outputPath, err)
	}
	defer func() { _ = f.Close() }()

	r := csv.NewReader(f)
	r.FieldsPerRecord = -1

	header, err := r.Read()
	if err != nil {
		return nil, false, fmt.Errorf("read header of existing %s: %w", outputPath, err)
	}
	if !slices.Equal(header, expectedHeader) {
		return nil, false, fmt.Errorf("column-set mismatch in existing %s\n  existing: %s\n  new:      %s",
			outputPath, strings.Join(header, ","), strings.Join(expectedHeader, ","))
	}

	existing := map[rowKey]bool{}
	for {
		rec, err := r.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, false, fmt.Errorf("read row of existing %s: %w", outputPath, err)
		}
		if len(rec) < 3 {
			return nil, false, fmt.Errorf("malformed row in existing %s: %v", outputPath, rec)
		}
		existing[rowKey{file: rec[0], start: rec[1], end: rec[2]}] = true
	}

	return existing, true, nil
}

// overlapSeconds returns the duration of overlap between two half-open intervals.
func overlapSeconds(aStart, aEnd, bStart, bEnd float64) float64 {
	lo := max(aStart, bStart)
	hi := min(aEnd, bEnd)
	if hi <= lo {
		return 0
	}
	return hi - lo
}

// formatTime renders a float to match pandas' default float repr in to_csv:
// always at least one decimal place, no trailing zeros beyond what's needed.
// e.g. 5 → "5.0", 5.5 → "5.5", 3.5001250000 → "3.500125".
func formatTime(v float64) string {
	s := strconv.FormatFloat(v, 'f', -1, 64)
	if !strings.ContainsRune(s, '.') {
		s += ".0"
	}
	return s
}

// writeRows writes the clip-labels rows to a CSV file.
func writeRows(path string, header []string, rows []clipLabelsRow, appendMode bool) error {
	var f *os.File
	var err error
	if appendMode {
		f, err = os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0644)
	} else {
		f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
	}
	if err != nil {
		return fmt.Errorf("open %s for write: %w", path, err)
	}
	defer func() { _ = f.Close() }()

	w := csv.NewWriter(f)
	if !appendMode {
		if err := w.Write(header); err != nil {
			return fmt.Errorf("write header: %w", err)
		}
	}

	if len(rows) == 0 {
		w.Flush()
		return w.Error()
	}
	rec := make([]string, 3+len(rows[0].flags))
	for _, r := range rows {
		rec[0] = r.file
		rec[1] = formatTime(r.start)
		rec[2] = formatTime(r.end)
		for i, b := range r.flags {
			if b {
				rec[3+i] = "True"
			} else {
				rec[3+i] = "False"
			}
		}
		if err := w.Write(rec); err != nil {
			return fmt.Errorf("write row: %w", err)
		}
	}
	w.Flush()
	return w.Error()
}