#!/bin/bash
# Test cluster_import.go via the CLI
# Exercises ImportCluster, GetLocationData, EnsureClusterPath, batchProcessFiles, insertClusterFiles
# Uses fresh copy of production DB in /tmp (auto-cleaned)

source "$(dirname "$0")/test_lib.sh"

echo "=== Testing Cluster Import ==="
echo ""

check_binary

# Create fresh test database
DB_PATH=$(fresh_test_db)
trap "cleanup_test_db '$DB_PATH'" EXIT
echo "Using fresh test database: $DB_PATH"
echo ""

SKRAAK="$PROJECT_DIR/skraak"

# Create test entities in DB
echo "Setup: Creating test dataset, location, cluster"
DATASET_RESULT=$($SKRAAK create dataset --db "$DB_PATH" --name "Cluster Import Test" --type structured 2>/dev/null)
DATASET_ID=$(echo "$DATASET_RESULT" | jq -r '.dataset.id // empty')
if [ -z "$DATASET_ID" ]; then
    echo -e "${RED}✗ Failed to create test dataset${NC}"
    echo "$DATASET_RESULT" | head -5
    exit 1
fi
echo "  Dataset: $DATASET_ID"

LOCATION_RESULT=$($SKRAAK create location --db "$DB_PATH" --dataset "$DATASET_ID" --name "Test Site" --lat -41.2865 --lon 174.7762 --timezone "Pacific/Auckland" 2>/dev/null)
LOCATION_ID=$(echo "$LOCATION_RESULT" | jq -r '.location.id // empty')
if [ -z "$LOCATION_ID" ]; then
    echo -e "${RED}✗ Failed to create test location${NC}"
    exit 1
fi
echo "  Location: $LOCATION_ID"

CLUSTER_RESULT=$($SKRAAK create cluster --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --name "Test Cluster" --sample-rate 16000 2>/dev/null)
CLUSTER_ID=$(echo "$CLUSTER_RESULT" | jq -r '.cluster.id // empty')
if [ -z "$CLUSTER_ID" ]; then
    echo -e "${RED}✗ Failed to create test cluster${NC}"
    exit 1
fi
echo "  Cluster: $CLUSTER_ID"
echo ""

# Helper: extract JSON object from mixed stdout/stderr output
extract_json() {
    echo "$1" | grep -A 1000 '^{' | head -100
}

# Create test WAV files
WAV_DIR="/tmp/skraak_cluster_test_$$"
mkdir -p "$WAV_DIR"
# Create test WAV files - each with unique content to avoid hash collisions
generate_wav "$WAV_DIR/test_recording_01.wav" 1 16000
# Make second file unique by appending a byte (still valid enough for header parsing)
generate_wav "$WAV_DIR/test_recording_02.wav" 2 16000
echo -e "${GREEN}${NC} Created 2 test WAV files in $WAV_DIR"
echo ""

# -------------------------------------------------------
# Test 1: Happy path - import folder
# -------------------------------------------------------
echo "Test 1: Import folder with valid WAV files"
RESULT=$($SKRAAK import folder --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --cluster "$CLUSTER_ID" --folder "$WAV_DIR" --recursive=false 2>&1)

JSON=$(extract_json "$RESULT")
IMPORTED=$(echo "$JSON" | jq -r '.summary.imported_files // empty')
FAILED=$(echo "$JSON" | jq -r '.summary.failed_files // 0')
if [ "$IMPORTED" = "2" ] && [ "$FAILED" = "0" ]; then
    echo -e "${GREEN}${NC} Imported 2 files, 0 failures"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Expected imported=2 failed=0, got imported=$IMPORTED failed=$FAILED"
    echo "$RESULT" | head -10
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 2: DB state - file records exist
# -------------------------------------------------------
echo ""
echo "Test 2: Verify file records in database"
FILE_COUNT=$($SKRAAK sql --db "$DB_PATH" "SELECT COUNT(*) as cnt FROM file WHERE cluster_id = '$CLUSTER_ID' AND active = true" 2>/dev/null | jq -r '.rows[0].cnt')
if [ "$FILE_COUNT" = "2" ]; then
    echo -e "${GREEN}${NC} Found 2 file records for cluster"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Expected 2 file records, got $FILE_COUNT"
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 3: DB state - file_dataset junction records
# -------------------------------------------------------
echo ""
echo "Test 3: Verify file_dataset junction records"
FD_COUNT=$($SKRAAK sql --db "$DB_PATH" "SELECT COUNT(*) as cnt FROM file_dataset WHERE dataset_id = '$DATASET_ID'" 2>/dev/null | jq -r '.rows[0].cnt')
if [ "$FD_COUNT" = "2" ]; then
    echo -e "${GREEN}${NC} Found 2 file_dataset records"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Expected 2 file_dataset records, got $FD_COUNT"
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 4: DB state - astronomical data computed
# -------------------------------------------------------
echo ""
echo "Test 4: Verify astronomical data was computed"
ASTRO_RESULT=$($SKRAAK sql --db "$DB_PATH" "SELECT maybe_solar_night, maybe_civil_night, moon_phase FROM file WHERE cluster_id = '$CLUSTER_ID' AND active = true LIMIT 1" 2>/dev/null)
SOLAR_NIGHT=$(echo "$ASTRO_RESULT" | jq -r '.rows[0].maybe_solar_night')
MOON_PHASE=$(echo "$ASTRO_RESULT" | jq -r '.rows[0].moon_phase')
if [ "$SOLAR_NIGHT" != "null" ] && [ "$MOON_PHASE" != "null" ]; then
    echo -e "${GREEN}${NC} Astronomical data present (solar_night=$SOLAR_NIGHT, moon_phase=$MOON_PHASE)"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Astronomical data missing"
    echo "$ASTRO_RESULT" | head -5
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 5: DB state - duration correct
# -------------------------------------------------------
echo ""
echo "Test 5: Verify file duration is correct"
DURATION=$($SKRAAK sql --db "$DB_PATH" "SELECT duration FROM file WHERE cluster_id = '$CLUSTER_ID' AND active = true LIMIT 1" 2>/dev/null | jq -r '.rows[0].duration')
# Duration should be approximately 1.0 (within 0.1 tolerance)
if [ "$DURATION" != "null" ] && [ "$DURATION" != "" ]; then
    # Use awk for float comparison
    APPROX_OK=$(echo "$DURATION" | awk '{if ($1 > 0.9 && $1 < 1.1) print "yes"; else print "no"}')
    if [ "$APPROX_OK" = "yes" ]; then
        echo -e "${GREEN}${NC} Duration = $DURATION (≈ 1.0s)"
        ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
    else
        echo -e "${RED}${NC} Duration = $DURATION (expected ≈ 1.0)"
        ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
    fi
else
    echo -e "${RED}${NC} Duration not found"
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 6: EnsureClusterPath - cluster path set after import
# -------------------------------------------------------
echo ""
echo "Test 6: Verify cluster path was set (EnsureClusterPath)"
CLUSTER_PATH=$($SKRAAK sql --db "$DB_PATH" "SELECT path FROM cluster WHERE id = '$CLUSTER_ID'" 2>/dev/null | jq -r '.rows[0].path')
if [ -n "$CLUSTER_PATH" ] && [ "$CLUSTER_PATH" != "null" ]; then
    echo -e "${GREEN}${NC} Cluster path set: $CLUSTER_PATH"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Cluster path not set"
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 7: Duplicate detection - re-import should skip
# -------------------------------------------------------
echo ""
echo "Test 7: Duplicate detection (re-import same folder)"
# Need a new cluster to re-import into (hash dedup is global)
CLUSTER2_RESULT=$($SKRAAK create cluster --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --name "Test Cluster 2" --sample-rate 16000 2>/dev/null)
CLUSTER2_ID=$(echo "$CLUSTER2_RESULT" | jq -r '.cluster.id // empty')

RESULT2=$($SKRAAK import folder --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --cluster "$CLUSTER2_ID" --folder "$WAV_DIR" --recursive=false 2>&1)
JSON2=$(extract_json "$RESULT2")
SKIPPED=$(echo "$JSON2" | jq -r '.summary.skipped_files // 0')
IMPORTED2=$(echo "$JSON2" | jq -r '.summary.imported_files // 0')
if [ "$SKIPPED" = "2" ] && [ "$IMPORTED2" = "0" ]; then
    echo -e "${GREEN}${NC} Skipped 2 duplicates, imported 0 new"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Expected skipped=2 imported=0, got skipped=$SKIPPED imported=$IMPORTED2"
    echo "$RESULT2" | head -10
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 8: Empty folder - no WAV files
# -------------------------------------------------------
echo ""
echo "Test 8: Empty folder (no WAV files)"
EMPTY_DIR="/tmp/skraak_empty_test_$$"
mkdir -p "$EMPTY_DIR"

CLUSTER3_RESULT=$($SKRAAK create cluster --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --name "Test Cluster 3" --sample-rate 16000 2>/dev/null)
CLUSTER3_ID=$(echo "$CLUSTER3_RESULT" | jq -r '.cluster.id // empty')

RESULT3=$($SKRAAK import folder --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --cluster "$CLUSTER3_ID" --folder "$EMPTY_DIR" 2>&1)
JSON3=$(extract_json "$RESULT3")
TOTAL_FILES=$(echo "$JSON3" | jq -r '.summary.total_files // empty')
if [ "$TOTAL_FILES" = "0" ]; then
    echo -e "${GREEN}${NC} Empty folder returns total_files=0"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Expected total_files=0, got $TOTAL_FILES"
    echo "$RESULT3" | head -5
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi
rm -rf "$EMPTY_DIR"

# -------------------------------------------------------
# Test 9: Invalid location ID
# -------------------------------------------------------
echo ""
echo "Test 9: Invalid location ID (should fail)"
CLUSTER4_RESULT=$($SKRAAK create cluster --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --name "Test Cluster 4" --sample-rate 16000 2>/dev/null)
CLUSTER4_ID=$(echo "$CLUSTER4_RESULT" | jq -r '.cluster.id // empty')

RESULT4=$($SKRAAK import folder --db "$DB_PATH" --dataset "$DATASET_ID" --location "INVALID_ID_123" --cluster "$CLUSTER4_ID" --folder "$WAV_DIR" 2>&1 || true)
if echo "$RESULT4" | grep -qi "error\|failed\|not found"; then
    echo -e "${GREEN}${NC} Correctly rejected invalid location ID"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Should have rejected invalid location ID"
    echo "$RESULT4" | head -5
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 10: Non-existent folder
# -------------------------------------------------------
echo ""
echo "Test 10: Non-existent folder (should fail)"
RESULT5=$($SKRAAK import folder --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --cluster "$CLUSTER_ID" --folder /nonexistent/path 2>&1 || true)
if echo "$RESULT5" | grep -qi "error\|not accessible\|not found\|no such"; then
    echo -e "${GREEN}${NC} Correctly rejected non-existent folder"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Should have rejected non-existent folder"
    echo "$RESULT5" | head -5
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# -------------------------------------------------------
# Test 11: Recursive vs non-recursive
# -------------------------------------------------------
echo ""
echo "Test 11: Recursive scan finds WAV in subfolder"
mkdir -p "$WAV_DIR/subfolder"
generate_wav "$WAV_DIR/subfolder/nested_file.wav" 1 16000

CLUSTER5_RESULT=$($SKRAAK create cluster --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --name "Test Cluster 5" --sample-rate 16000 2>/dev/null)
CLUSTER5_ID=$(echo "$CLUSTER5_RESULT" | jq -r '.cluster.id // empty')

# The nested file has same hash as the others (identical silence), so it'll be skipped
# But total_files should show 3 (2 top-level + 1 nested)
RESULT6=$($SKRAAK import folder --db "$DB_PATH" --dataset "$DATASET_ID" --location "$LOCATION_ID" --cluster "$CLUSTER5_ID" --folder "$WAV_DIR" --recursive=true 2>&1)
JSON6=$(extract_json "$RESULT6")
TOTAL=$(echo "$JSON6" | jq -r '.summary.total_files // empty')
if [ "$TOTAL" = "3" ]; then
    echo -e "${GREEN}${NC} Recursive scan found 3 WAV files"
    ((TESTS_RUN++)) || true; ((TESTS_PASSED++)) || true
else
    echo -e "${RED}${NC} Expected total_files=3, got $TOTAL"
    echo "$RESULT6" | head -5
    ((TESTS_RUN++)) || true; ((TESTS_FAILED++)) || true
fi

# Cleanup
rm -rf "$WAV_DIR"
echo ""

print_summary