#!/bin/bash
# Test skraak calls clip-labels
# Compares output against reference CSVs in clip-labels_test_data/
#
# Two test cases:
#   1. Normal (OPSO-equivalent): output matches clip_labels_opso.csv
#   2. __IGNORE__ mapping: D03 clips overlapping the ignored segment are excluded,
#      but the file is not dropped entirely
#
# Note: removes clip_labels.csv and clip_labels_ignore.csv before each run
# because the command appends and checks for duplicates.

source "$(dirname "$0")/test_lib.sh"

TEST_DIR="$SCRIPT_DIR/clip-labels_test_data"

echo "=== Testing skraak calls clip-labels ==="
echo ""

check_binary

cd "$TEST_DIR"

# ── Test 1: OPSO-equivalent output ──────────────────────────────────────
echo "Test 1: OPSO-equivalent output"

rm -f ./clip_labels.csv

"$PROJECT_DIR/skraak" calls clip-labels \
    --folder . --mapping ./mapping.json \
    --clip-duration 5 --clip-overlap 0 --min-label-overlap 0.25 --final-clip full \
    --output ./clip_labels.csv 2>/dev/null

# Compare: sort both, skip header
diff_output=$(diff <(tail -n +2 clip_labels_opso.csv | sort) \
                    <(tail -n +2 clip_labels.csv | sort))

if [ -z "$diff_output" ]; then
    echo -e "  ${GREEN}${NC} clip_labels.csv matches clip_labels_opso.csv (sorted, prefix-normalised)"
    ((TESTS_PASSED++)) || true
else
    echo -e "  ${RED}${NC} clip_labels.csv differs from clip_labels_opso.csv"
    echo "$diff_output" | head -20
    ((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true

# ── Test 2: __IGNORE__ mapping ──────────────────────────────────────────
echo "Test 2: __IGNORE__ mapping (D03 segment skipped, file kept)"

rm -f ./clip_labels_ignore.csv

"$PROJECT_DIR/skraak" calls clip-labels \
    --folder . --mapping ./mapping_ignore.json \
    --clip-duration 5 --clip-overlap 0 --min-label-overlap 0.25 --final-clip full \
    --output ./clip_labels_ignore.csv 2>/dev/null

# With __IGNORE__, clips overlapping the Don't Know segment (777-860s)
# in D03 are excluded, but D03's other clips are still emitted.
# The non-D03 rows should be identical to opso.
diff_output=$(diff <(grep -v "D03" clip_labels_opso.csv | sort) \
                    <(grep -v "D03" clip_labels_ignore.csv | sort))

if [ -z "$diff_output" ]; then
    echo -e "  ${GREEN}${NC} non-D03 rows match between ignore and opso"
    ((TESTS_PASSED++)) || true
else
    echo -e "  ${RED}${NC} non-D03 rows differ between ignore and opso"
    echo "$diff_output" | head -20
    ((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true

# Verify D03 IS present in ignore output (file not dropped)
if grep -q "D03" clip_labels_ignore.csv; then
    echo -e "  ${GREEN}${NC} D03 rows present in clip_labels_ignore.csv (file not dropped)"
    ((TESTS_PASSED++)) || true
else
    echo -e "  ${RED}${NC} D03 rows missing from clip_labels_ignore.csv (file should be kept)"
    ((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true

# Verify D03 clips overlapping the __IGNORE__ segment (775-860s) are excluded
d03_ignore=$(grep "D03" clip_labels_ignore.csv | wc -l)
d03_opso=$(grep "D03" clip_labels_opso.csv | wc -l)
if [ "$d03_ignore" -lt "$d03_opso" ]; then
    echo -e "  ${GREEN}${NC} D03 clips reduced: $d03_ignore in ignore vs $d03_opso in opso (overlapping clips excluded)"
    ((TESTS_PASSED++)) || true
else
    echo -e "  ${RED}${NC} D03 clips not reduced: $d03_ignore in ignore vs $d03_opso in opso"
    ((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true

# Verify no D03 clips in the 775-860s range appear in ignore output
d03_in_range=$(grep "D03" clip_labels_ignore.csv | awk -F, '{split($2,a,"."); if ($2+0 >= 775 && $2+0 < 860) print}' | wc -l)
if [ "$d03_in_range" -eq 0 ]; then
    echo -e "  ${GREEN}${NC} No D03 clips in 775-860s range (correctly excluded)"
    ((TESTS_PASSED++)) || true
else
    echo -e "  ${RED}${NC} Found $d03_in_range D03 clips in 775-860s range (should be excluded)"
    ((TESTS_FAILED++)) || true
fi
((TESTS_RUN++)) || true

echo ""
print_summary