#!/bin/bash
# Verify database state - check table counts and referential integrity
# Usage: ./test_db_state.sh [db_path]
# Default: uses test.duckdb

source "$(dirname "$0")/test_lib.sh"

DB_PATH="${1:-$DEFAULT_TEST_DB}"

if [ ! -f "$DB_PATH" ]; then
    echo -e "${RED}Error: Database not found at $DB_PATH${NC}"
    exit 1
fi

echo "=== Database State Verification ==="
echo "Database: $DB_PATH"
echo ""

check_binary

sql() {
    "$PROJECT_DIR/skraak" sql --db "$DB_PATH" "$1" 2>/dev/null
}

cnt() {
    sql "$1" | jq -r '.rows[0].cnt // "error"'
}

# Assert a query returns 0 rows (integrity violation check)
check_zero() {
    local name="$1"
    local query="$2"
    local count
    count=$(cnt "$query")
    ((TESTS_RUN++)) || true
    if [ "$count" = "0" ]; then
        echo -e "  ${GREEN}${NC} $name"
        ((TESTS_PASSED++)) || true
    else
        echo -e "  ${RED}${NC} $name: $count violation(s)"
        ((TESTS_FAILED++)) || true
    fi
}

# ── Counts ────────────────────────────────────────────────────────────────────
echo "Table Counts:"
echo "  Datasets:        $(cnt 'SELECT COUNT(*) AS cnt FROM dataset WHERE active = true')"
echo "  Locations:       $(cnt 'SELECT COUNT(*) AS cnt FROM location WHERE active = true')"
echo "  Clusters:        $(cnt 'SELECT COUNT(*) AS cnt FROM cluster WHERE active = true')"
echo "  Files:           $(cnt 'SELECT COUNT(*) AS cnt FROM file WHERE active = true')"
echo "  File-Dataset:    $(cnt 'SELECT COUNT(*) AS cnt FROM file_dataset')"
echo "  Segments:        $(cnt 'SELECT COUNT(*) AS cnt FROM segment WHERE active = true')"
echo "  Labels:          $(cnt 'SELECT COUNT(*) AS cnt FROM label WHERE active = true')"
echo "  Label subtypes:  $(cnt 'SELECT COUNT(*) AS cnt FROM label_subtype WHERE active = true')"
echo "  Moth metadata:   $(cnt 'SELECT COUNT(*) AS cnt FROM moth_metadata WHERE active = true')"
echo "  File metadata:   $(cnt 'SELECT COUNT(*) AS cnt FROM file_metadata WHERE active = true')"
echo "  Label metadata:  $(cnt 'SELECT COUNT(*) AS cnt FROM label_metadata WHERE active = true')"
echo ""

# ── Location hierarchy ────────────────────────────────────────────────────────
echo "Location hierarchy:"
check_zero "location.dataset_id → dataset" \
    "SELECT COUNT(*) AS cnt FROM location l LEFT JOIN dataset d ON l.dataset_id = d.id WHERE d.id IS NULL"
check_zero "cluster.location_id → location" \
    "SELECT COUNT(*) AS cnt FROM cluster c LEFT JOIN location l ON c.location_id = l.id WHERE l.id IS NULL"
check_zero "cluster.dataset_id → dataset" \
    "SELECT COUNT(*) AS cnt FROM cluster c LEFT JOIN dataset d ON c.dataset_id = d.id WHERE d.id IS NULL"
check_zero "cluster.cyclic_recording_pattern_id → cyclic_recording_pattern" \
    "SELECT COUNT(*) AS cnt FROM cluster c LEFT JOIN cyclic_recording_pattern p ON c.cyclic_recording_pattern_id = p.id WHERE c.cyclic_recording_pattern_id IS NOT NULL AND p.id IS NULL"
echo ""

# ── File linkage ──────────────────────────────────────────────────────────────
echo "File linkage:"
check_zero "file.location_id → location" \
    "SELECT COUNT(*) AS cnt FROM file f LEFT JOIN location l ON f.location_id = l.id WHERE f.location_id IS NOT NULL AND l.id IS NULL"
check_zero "file.cluster_id → cluster" \
    "SELECT COUNT(*) AS cnt FROM file f LEFT JOIN cluster c ON f.cluster_id = c.id WHERE f.cluster_id IS NOT NULL AND c.id IS NULL"
check_zero "file_dataset.file_id → file" \
    "SELECT COUNT(*) AS cnt FROM file_dataset fd LEFT JOIN file f ON fd.file_id = f.id WHERE f.id IS NULL"
check_zero "file_dataset.dataset_id → dataset" \
    "SELECT COUNT(*) AS cnt FROM file_dataset fd LEFT JOIN dataset d ON fd.dataset_id = d.id WHERE d.id IS NULL"
check_zero "active files have file_dataset entry" \
    "SELECT COUNT(*) AS cnt FROM file f LEFT JOIN file_dataset fd ON f.id = fd.file_id WHERE f.active = true AND fd.file_id IS NULL"
check_zero "file_dataset count >= active file count" \
    "SELECT CASE WHEN (SELECT COUNT(*) FROM file_dataset) >= (SELECT COUNT(*) FROM file WHERE active = true) THEN 0 ELSE 1 END AS cnt"
check_zero "file_metadata.file_id → file" \
    "SELECT COUNT(*) AS cnt FROM file_metadata fm LEFT JOIN file f ON fm.file_id = f.id WHERE f.id IS NULL"
echo ""

# ── Segment integrity ─────────────────────────────────────────────────────────
echo "Segment integrity:"
check_zero "segment.file_id → file" \
    "SELECT COUNT(*) AS cnt FROM segment s LEFT JOIN file f ON s.file_id = f.id WHERE f.id IS NULL"
check_zero "segment.dataset_id → dataset" \
    "SELECT COUNT(*) AS cnt FROM segment s LEFT JOIN dataset d ON s.dataset_id = d.id WHERE d.id IS NULL"
check_zero "active segments on inactive files" \
    "SELECT COUNT(*) AS cnt FROM segment s JOIN file f ON s.file_id = f.id WHERE s.active = true AND f.active = false"
echo ""

# ── Label integrity ───────────────────────────────────────────────────────────
echo "Label integrity:"
check_zero "label.segment_id → segment" \
    "SELECT COUNT(*) AS cnt FROM label l LEFT JOIN segment s ON l.segment_id = s.id WHERE s.id IS NULL"
check_zero "label.species_id → species" \
    "SELECT COUNT(*) AS cnt FROM label l LEFT JOIN species sp ON l.species_id = sp.id WHERE sp.id IS NULL"
check_zero "label.filter_id → filter" \
    "SELECT COUNT(*) AS cnt FROM label l LEFT JOIN filter f ON l.filter_id = f.id WHERE f.id IS NULL"
check_zero "active labels on inactive segments" \
    "SELECT COUNT(*) AS cnt FROM label l JOIN segment s ON l.segment_id = s.id WHERE l.active = true AND s.active = false"
check_zero "label_metadata.label_id → label" \
    "SELECT COUNT(*) AS cnt FROM label_metadata lm LEFT JOIN label l ON lm.label_id = l.id WHERE l.id IS NULL"
echo ""

# ── Label subtype integrity ───────────────────────────────────────────────────
echo "Label subtype integrity:"
check_zero "label_subtype.label_id → label" \
    "SELECT COUNT(*) AS cnt FROM label_subtype ls LEFT JOIN label l ON ls.label_id = l.id WHERE l.id IS NULL"
check_zero "label_subtype.calltype_id → call_type" \
    "SELECT COUNT(*) AS cnt FROM label_subtype ls LEFT JOIN call_type ct ON ls.calltype_id = ct.id WHERE ct.id IS NULL"
check_zero "label_subtype.filter_id → filter" \
    "SELECT COUNT(*) AS cnt FROM label_subtype ls LEFT JOIN filter f ON ls.filter_id = f.id WHERE ls.filter_id IS NOT NULL AND f.id IS NULL"
echo ""

# ── Reference table integrity ─────────────────────────────────────────────────
echo "Reference table integrity:"
check_zero "call_type.species_id → species" \
    "SELECT COUNT(*) AS cnt FROM call_type ct LEFT JOIN species sp ON ct.species_id = sp.id WHERE sp.id IS NULL"
echo ""

# ── Summary ───────────────────────────────────────────────────────────────────
echo "Summary: $TESTS_PASSED/$TESTS_RUN checks passed"

if [ "$TESTS_FAILED" -gt 0 ]; then
    exit 1
fi