MK5UPYCRKUKCIBWBKLCEW754G4JJWSU2R2JCYNECUG2IRFLQKVNAC OCRETPZZPDCUSOPYRH5MVRATJ37TRFGVSIMOI4IV755HFXXOVHEAC DORZF5HSV672ZP5HUDYB3J6TBH5O2LMXJE4HPSE7H5SOGZQBDCXQC 65G4H2V6262GLHTPQQ5H4NIPDJB7HRPBRVNAD2EL26N75YUM5PWQC IFVRAERTCCDICNTYTG3TX2WASB6RXQQEJWWXQMQZJSQDQ3HLE5OQC 25GQ5TYGSGL7QED7L2IAPFLZ4WJJ2ZFAM6O6X5AOSTYPVJNCOGPQC L4STQEXDGCPZXDHTEUBCOQKBMTFDRVXRLNFQHPDHOVXDCJO33LQQC OGLLBQQYE5KICDMI6EX7ZI4TZT5RB7UFHH7O2DUOZ44QQXVL5YAAC 2GJMZ6YA6OPHNS5KFFFI6POQ2BJ33SSS3NIPXYBFTJSN4BZBVEVAC ZVYOPUNH7UJL3YALGNNXQW2B4H4ONI5Z6XWAUZUONFG7LR55W4SQC VZGXBNYYO3E7EPFQ4GOLNVMRXXTQDDQZUU2BZ6JHNBDY4B2QLDAAC #!/bin/bash# Verify database state - check that tables have expected data# Usage: ./verify_database_state.sh [db_path]# Default: ../db/test.duckdbDB_PATH="${1:-../db/test.duckdb}"echo "=== Database State Verification ==="echo "Database: $DB_PATH"echo ""{# Initialize MCP connectionecho '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"verify","version":"1.0"}}}'sleep 0.2# Check datasetsecho '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"execute_sql","arguments":{"query":"SELECT COUNT(*) as dataset_count FROM dataset WHERE active = true"}}}'sleep 0.2# Check locationsecho '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"execute_sql","arguments":{"query":"SELECT COUNT(*) as location_count FROM location WHERE active = true"}}}'sleep 0.2# Check clustersecho '{"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"name":"execute_sql","arguments":{"query":"SELECT COUNT(*) as cluster_count FROM cluster WHERE active = true"}}}'sleep 0.2# Check filesecho '{"jsonrpc":"2.0","id":5,"method":"tools/call","params":{"name":"execute_sql","arguments":{"query":"SELECT COUNT(*) as file_count FROM file WHERE active = true"}}}'sleep 0.2# Check file_dataset junction tableecho '{"jsonrpc":"2.0","id":6,"method":"tools/call","params":{"name":"execute_sql","arguments":{"query":"SELECT COUNT(*) as file_dataset_count FROM file_dataset"}}}'sleep 0.2# Check moth_metadataecho '{"jsonrpc":"2.0","id":7,"method":"tools/call","params":{"name":"execute_sql","arguments":{"query":"SELECT COUNT(*) as moth_metadata_count FROM moth_metadata WHERE active = true"}}}'sleep 0.2# Check for orphaned files (files without file_dataset entries - SHOULD BE ZERO)echo '{"jsonrpc":"2.0","id":8,"method":"tools/call","params":{"name":"execute_sql","arguments":{"query":"SELECT COUNT(*) as orphaned_files FROM file f LEFT JOIN file_dataset fd ON f.id = fd.file_id WHERE f.active = true AND fd.file_id IS NULL"}}}'sleep 0.2} | ../skraak_mcp "$DB_PATH" 2>/dev/null | jq -s '. as $responses |{"datasets": ($responses[] | select(.id == 2) | .result.content[0].text | fromjson | .rows[0][0]),"locations": ($responses[] | select(.id == 3) | .result.content[0].text | fromjson | .rows[0][0]),"clusters": ($responses[] | select(.id == 4) | .result.content[0].text | fromjson | .rows[0][0]),"files": ($responses[] | select(.id == 5) | .result.content[0].text | fromjson | .rows[0][0]),"file_dataset_entries": ($responses[] | select(.id == 6) | .result.content[0].text | fromjson | .rows[0][0]),"moth_metadata_entries": ($responses[] | select(.id == 7) | .result.content[0].text | fromjson | .rows[0][0]),"orphaned_files": ($responses[] | select(.id == 8) | .result.content[0].text | fromjson | .rows[0][0])}'echo ""echo "=== Verification ==="echo "✓ All queries completed successfully"echo "⚠️ Check that orphaned_files = 0 (files should have file_dataset entries)"echo "⚠️ Check that file_dataset_entries ≥ files (should be equal or more if multiple datasets per file)"
// Register import single filemcp.AddTool(server, &mcp.Tool{Name: "import_file",Description: "Import a single WAV file into the database. Automatically parses AudioMoth and filename timestamps, calculates hash, extracts metadata, and computes astronomical data. Skips if duplicate (by hash).",}, tools.ImportFile)
Name: "import_file",Description: "Import a single WAV file into the database. Automatically parses AudioMoth and filename timestamps, calculates hash, extracts metadata, and computes astronomical data. Skips if duplicate (by hash).",}, tools.ImportFile)
Name: "bulk_file_import",Description: "Batch import WAV files across multiple locations/clusters using a CSV file. Auto-creates clusters if needed, logs progress to file for monitoring, and returns summary statistics. Synchronous/fail-fast execution.",}, tools.BulkFileImport)
// Register bulk file importmcp.AddTool(server, &mcp.Tool{Name: "bulk_file_import",Description: "Batch import WAV files across multiple locations/clusters using a CSV file. Auto-creates clusters if needed, logs progress to file for monitoring, and returns summary statistics. Synchronous/fail-fast execution.",}, tools.BulkFileImport)
### Latest Update: Generic SQL Tool + Codebase Rationalization (2026-01-26)
**Major refactoring: Extracted shared cluster import logic into utils module****Key Insight:** A cluster is the atomic unit of import (one SD card / one recording session / one folder).**Created:**- `utils/cluster_import.go` (553 lines) - Single source of truth for cluster imports- `ImportCluster()` - Main entry point used by both import_files.go and bulk_file_import.go- `scanClusterFiles()` - Recursive WAV file scanning- `batchProcessFiles()` - Batch processing with variance-based filename timestamp parsing- `insertClusterFiles()` - Transactional database insertion- Moved `FileImportError` type from tools/ to utils/**Modified:**- `tools/import_files.go` - **75% code reduction** (650 lines → 161 lines)- Now just calls `utils.ImportCluster()` for all the heavy lifting- Removed ~500 lines of duplicated logic- `tools/bulk_file_import.go` - **Bug fixes + simplification**- **🐛 CRITICAL BUG FIXED:** Now inserts into `file_dataset` table (was missing!)- **🐛 CRITICAL BUG FIXED:** Now inserts into `moth_metadata` table (was missing!)- Now uses shared `utils.ImportCluster()` logic- Files are no longer orphaned from datasets- `tools/import_file.go` - Added helper wrappers for compatibility**Benefits:**- ✅ **Bug Fixed:** 68,043 orphaned files found in test database (confirms bug was real)- ✅ **Single source of truth:** All cluster import logic in one place- ✅ **Code reduction:** ~500 lines of duplicated code eliminated- ✅ **Consistency:** Both single-cluster and multi-cluster imports use identical logic- ✅ **Maintainability:** Changes to import logic made in one place- ✅ **Performance:** No regression, same batch processing as before**Architecture:**```Before:tools/import_files.go (650 lines) - Custom logictools/bulk_file_import.go (460 lines) - Different logic (BUGGY)After:utils/cluster_import.go (553 lines) - Shared logictools/import_files.go (161 lines) - Calls utils.ImportCluster()tools/bulk_file_import.go (393 lines) - Calls utils.ImportCluster()```**See Also:**- `plan.md` - Complete refactoring plan with implementation checklist- `REFACTORING_SUMMARY.md` - Detailed summary of changes- `VERIFICATION_RESULTS.md` - Test results and database analysis---### Previous Update: Generic SQL Tool + Codebase Rationalization (2026-01-26)
**Current Database**: 139 locations, 8 active datasets, read-only + write modes
**Import Logic**: Centralized in utils/cluster_import.go (553 lines)**Code Quality**: ~500 lines of duplication eliminated (75% reduction in import_files.go)**Current Database**: 139 locations, 8 active datasets, 1.19M files in test.duckdb
**Known Issue**: test.duckdb has 68K orphaned files from old buggy import (historical data)