L4STQEXDGCPZXDHTEUBCOQKBMTFDRVXRLNFQHPDHOVXDCJO33LQQC package toolsimport ("context""database/sql""fmt""os""path/filepath""strings""time"gonanoid "github.com/matoous/go-nanoid/v2""github.com/modelcontextprotocol/go-sdk/mcp""skraak_mcp/db""skraak_mcp/utils")// ImportFileInput defines the input parameters for the import_file tooltype ImportFileInput struct {FilePath string `json:"file_path" jsonschema:"required,Absolute path to WAV file"`DatasetID string `json:"dataset_id" jsonschema:"required,Dataset ID (12 characters)"`LocationID string `json:"location_id" jsonschema:"required,Location ID (12 characters)"`ClusterID string `json:"cluster_id" jsonschema:"required,Cluster ID (12 characters)"`}// ImportFileOutput defines the output structure for the import_file tooltype ImportFileOutput struct {FileID string `json:"file_id" jsonschema:"Generated 21-character nanoid"`FileName string `json:"file_name" jsonschema:"Base filename"`Hash string `json:"hash" jsonschema:"XXH64 hash (16-character hex)"`Duration float64 `json:"duration_seconds" jsonschema:"File duration in seconds"`SampleRate int `json:"sample_rate" jsonschema:"Sample rate in Hz"`TimestampLocal time.Time `json:"timestamp_local" jsonschema:"Local timestamp"`IsAudioMoth bool `json:"is_audiomoth" jsonschema:"AudioMoth detection"`IsDuplicate bool `json:"is_duplicate" jsonschema:"Skipped as duplicate"`ProcessingTime string `json:"processing_time" jsonschema:"Duration string"`Error *string `json:"error,omitempty" jsonschema:"Error if failed"`}// ImportFile implements the import_file MCP tool// Imports a single WAV audio file into the database with full metadata extractionfunc ImportFile(ctx context.Context,req *mcp.CallToolRequest,input ImportFileInput,) (*mcp.CallToolResult, ImportFileOutput, error) {startTime := time.Now()var output ImportFileOutput// Phase 1: Validate file path_, err := validateFilePath(input.FilePath)if err != nil {return nil, output, fmt.Errorf("file validation failed: %w", err)}output.FileName = filepath.Base(input.FilePath)// Phase 2: Validate database hierarchyif err := validateImportInput(ImportAudioFilesInput{DatasetID: input.DatasetID,LocationID: input.LocationID,ClusterID: input.ClusterID,FolderPath: filepath.Dir(input.FilePath), // For validation only}, dbPath); err != nil {return nil, output, fmt.Errorf("hierarchy validation failed: %w", err)}// Phase 3: Get location data for astronomical calculationslocationData, err := getLocationData(dbPath, input.LocationID)if err != nil {return nil, output, fmt.Errorf("failed to get location data: %w", err)}// Phase 4: Process file metadatafileData, err := processFile(input.FilePath, locationData)if err != nil {errMsg := err.Error()output.Error = &errMsgoutput.ProcessingTime = time.Since(startTime).String()return nil, output, fmt.Errorf("file processing failed: %w", err)}// Populate output with extracted metadataoutput.FileName = fileData.FileNameoutput.Hash = fileData.Hashoutput.Duration = fileData.Durationoutput.SampleRate = fileData.SampleRateoutput.TimestampLocal = fileData.TimestampLocaloutput.IsAudioMoth = fileData.IsAudioMoth// Phase 5: Ensure cluster path is setif err := ensureClusterPath(dbPath, input.ClusterID, filepath.Dir(input.FilePath)); err != nil {return nil, output, fmt.Errorf("failed to set cluster path: %w", err)}// Phase 6: Insert into databasefileID, isDuplicate, err := insertFileIntoDB(dbPath,fileData,input.DatasetID,input.ClusterID,input.LocationID,)if err != nil {errMsg := err.Error()output.Error = &errMsgoutput.ProcessingTime = time.Since(startTime).String()return nil, output, fmt.Errorf("database insertion failed: %w", err)}output.FileID = fileIDoutput.IsDuplicate = isDuplicateoutput.ProcessingTime = time.Since(startTime).String()return &mcp.CallToolResult{}, output, nil}// validateFilePath validates the file exists, is a regular file, is a WAV file, and is not emptyfunc validateFilePath(filePath string) (os.FileInfo, error) {// Check file existsinfo, err := os.Stat(filePath)if err != nil {if os.IsNotExist(err) {return nil, fmt.Errorf("file does not exist: %s", filePath)}return nil, fmt.Errorf("cannot access file: %w", err)}// Check it's a regular fileif !info.Mode().IsRegular() {return nil, fmt.Errorf("path is not a regular file: %s", filePath)}// Check extension is .wav (case-insensitive)ext := strings.ToLower(filepath.Ext(filePath))if ext != ".wav" {return nil, fmt.Errorf("file must be a WAV file (got extension: %s)", ext)}// Check file is not emptyif info.Size() == 0 {return nil, fmt.Errorf("file is empty: %s", filePath)}return info, nil}// processFile extracts all metadata from a single filefunc processFile(filePath string, location *locationData) (*fileData, error) {// Step 1: Parse WAV headermetadata, err := utils.ParseWAVHeader(filePath)if err != nil {return nil, fmt.Errorf("WAV header parsing failed: %w", err)}// Step 2: Calculate hashhash, err := utils.ComputeXXH64(filePath)if err != nil {return nil, fmt.Errorf("hash calculation failed: %w", err)}// Step 3: Extract timestampvar timestampLocal time.Timevar isAudioMoth boolvar mothData *utils.AudioMothData// Try AudioMoth comment firstif utils.IsAudioMoth(metadata.Comment, metadata.Artist) {isAudioMoth = truemothData, err = utils.ParseAudioMothComment(metadata.Comment)if err == nil {timestampLocal = mothData.Timestamp} else {// AudioMoth detected but parsing failed - try filename// (Continue to filename parsing below)}}// If no AudioMoth timestamp, try filename timestampif timestampLocal.IsZero() {if utils.HasTimestampFilename(filePath) {// Parse filename timestamp (single-file array)filenameTimestamps, err := utils.ParseFilenameTimestamps([]string{filepath.Base(filePath)})if err != nil {return nil, fmt.Errorf("filename timestamp parsing failed: %w", err)}// Apply timezone offsetadjustedTimestamps, err := utils.ApplyTimezoneOffset(filenameTimestamps, location.TimezoneID)if err != nil {return nil, fmt.Errorf("timezone offset application failed: %w", err)}if len(adjustedTimestamps) > 0 {timestampLocal = adjustedTimestamps[0]}}}// If still no timestamp, return errorif timestampLocal.IsZero() {return nil, fmt.Errorf("cannot import file without timestamp (neither AudioMoth nor filename pattern)")}// Step 4: Calculate astronomical dataastroData := utils.CalculateAstronomicalData(timestampLocal.UTC(),metadata.Duration,location.Latitude,location.Longitude,)return &fileData{FileName: filepath.Base(filePath),Hash: hash,Duration: metadata.Duration,SampleRate: metadata.SampleRate,TimestampLocal: timestampLocal,IsAudioMoth: isAudioMoth,MothData: mothData,AstroData: astroData,}, nil}// insertFileIntoDB inserts a single file into the database// Returns (fileID, isDuplicate, error)func insertFileIntoDB(dbPath string,fileData *fileData,datasetID, clusterID, locationID string,) (string, bool, error) {// Open writable databasedatabase, err := db.OpenWriteableDB(dbPath)if err != nil {return "", false, fmt.Errorf("failed to open database: %w", err)}defer database.Close()// Begin transactionctx := context.Background()tx, err := database.BeginTx(ctx, nil)if err != nil {return "", false, fmt.Errorf("failed to begin transaction: %w", err)}defer tx.Rollback() // Rollback if not committed// Check for duplicate hashvar existingID stringvar existingName stringerr = tx.QueryRowContext(ctx,"SELECT id, file_name FROM file WHERE xxh64_hash = ? AND active = true",fileData.Hash,).Scan(&existingID, &existingName)if err == nil {// Duplicate found - return existing file inforeturn existingID, true, nil} else if err != sql.ErrNoRows {// Query error (not just "no rows")return "", false, fmt.Errorf("duplicate check failed: %w", err)}// No duplicate - proceed with insert// Generate file IDfileID, err := gonanoid.Generate("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 21)if err != nil {return "", false, fmt.Errorf("ID generation failed: %w", err)}// Insert file record_, err = tx.ExecContext(ctx, `INSERT INTO file (id, file_name, xxh64_hash, location_id, timestamp_local,cluster_id, duration, sample_rate, maybe_solar_night, maybe_civil_night,moon_phase, created_at, last_modified, active) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, now(), now(), true)`,fileID, fileData.FileName, fileData.Hash, locationID,fileData.TimestampLocal, clusterID, fileData.Duration, fileData.SampleRate,fileData.AstroData.SolarNight, fileData.AstroData.CivilNight, fileData.AstroData.MoonPhase,)if err != nil {return "", false, fmt.Errorf("file insert failed: %w", err)}// Insert file_dataset junction_, err = tx.ExecContext(ctx, `INSERT INTO file_dataset (file_id, dataset_id, created_at, last_modified)VALUES (?, ?, now(), now())`, fileID, datasetID)if err != nil {return "", false, fmt.Errorf("file_dataset insert failed: %w", err)}// If AudioMoth, insert moth_metadataif fileData.IsAudioMoth && fileData.MothData != nil {_, err = tx.ExecContext(ctx, `INSERT INTO moth_metadata (file_id, timestamp, recorder_id, gain, battery_v, temp_c,created_at, last_modified, active) VALUES (?, ?, ?, ?, ?, ?, now(), now(), true)`,fileID,fileData.MothData.Timestamp,&fileData.MothData.RecorderID,&fileData.MothData.Gain,&fileData.MothData.BatteryV,&fileData.MothData.TempC,)if err != nil {return "", false, fmt.Errorf("moth_metadata insert failed: %w", err)}}// Commit transactionif err = tx.Commit(); err != nil {return "", false, fmt.Errorf("transaction commit failed: %w", err)}return fileID, false, nil}
{"jsonrpc":"2.0","id":1,"result":{"capabilities":{"logging":{},"prompts":{"listChanged":true},"resources":{"listChanged":true},"tools":{"listChanged":true}},"protocolVersion":"2024-11-05","serverInfo":{"name":"skraak_mcp","version":"v1.0.0"}}}{"jsonrpc":"2.0","method":"notifications/tools/list_changed","params":{}}{"jsonrpc":"2.0","method":"notifications/prompts/list_changed","params":{}}{"jsonrpc":"2.0","method":"notifications/resources/list_changed","params":{}}{"jsonrpc":"2.0","id":2,"result":{"content":[{"type":"text","text":"file validation failed: file does not exist: /nonexistent/path/to/file.wav"}],"isError":true}}{"jsonrpc":"2.0","id":3,"result":{"content":[{"type":"text","text":"file validation failed: file must be a WAV file (got extension: )"}],"isError":true}}{"jsonrpc":"2.0","id":4,"result":{"content":[{"type":"text","text":"file validation failed: file does not exist: /tmp/test.wav"}],"isError":true}}{"jsonrpc":"2.0","id":5,"result":{"content":[{"type":"text","text":"file validation failed: file does not exist: /tmp/test.wav"}],"isError":true}}{"jsonrpc":"2.0","id":8,"result":{"content":[{"type":"text","text":"{\"columns\":[{\"database_type\":\"VARCHAR\",\"name\":\"file_name\"},{\"database_type\":\"VARCHAR\",\"name\":\"xxh64_hash\"},{\"database_type\":\"DECIMAL(7,3)\",\"name\":\"duration\"},{\"database_type\":\"INTEGER\",\"name\":\"sample_rate\"},{\"database_type\":\"BOOLEAN\",\"name\":\"maybe_solar_night\"}],\"limited\":false,\"query_executed\":\"SELECT file_name, xxh64_hash, duration, sample_rate, maybe_solar_night FROM file WHERE cluster_id = ? AND active = true ORDER BY created_at DESC LIMIT 3\",\"row_count\":3,\"rows\":[{\"duration\":\"60\",\"file_name\":\"20231204_123000.WAV\",\"maybe_solar_night\":false,\"sample_rate\":\"250000\",\"xxh64_hash\":\"f51d08eb40779d25\"},{\"duration\":\"60\",\"file_name\":\"20231031_090000.WAV\",\"maybe_solar_night\":false,\"sample_rate\":\"250000\",\"xxh64_hash\":\"58e100c14b67c0f3\"},{\"duration\":\"59\",\"file_name\":\"20231102_110001.WAV\",\"maybe_solar_night\":false,\"sample_rate\":\"250000\",\"xxh64_hash\":\"d05da810db87d31b\"}]}"}],"structuredContent":{"columns":[{"database_type":"VARCHAR","name":"file_name"},{"database_type":"VARCHAR","name":"xxh64_hash"},{"database_type":"DECIMAL(7,3)","name":"duration"},{"database_type":"INTEGER","name":"sample_rate"},{"database_type":"BOOLEAN","name":"maybe_solar_night"}],"limited":false,"query_executed":"SELECT file_name, xxh64_hash, duration, sample_rate, maybe_solar_night FROM file WHERE cluster_id = ? AND active = true ORDER BY created_at DESC LIMIT 3","row_count":3,"rows":[{"duration":"60","file_name":"20231204_123000.WAV","maybe_solar_night":false,"sample_rate":"250000","xxh64_hash":"f51d08eb40779d25"},{"duration":"60","file_name":"20231031_090000.WAV","maybe_solar_night":false,"sample_rate":"250000","xxh64_hash":"58e100c14b67c0f3"},{"duration":"59","file_name":"20231102_110001.WAV","maybe_solar_night":false,"sample_rate":"250000","xxh64_hash":"d05da810db87d31b"}]}}}
#!/bin/bash# Test suite for import_file tool# Usage: ./test_import_file.sh [db_path]# Always use test.duckdb for testing!DB_PATH="${1:-../db/skraak.duckdb}"# Get test IDs from databaseDATASET_ID=$(echo "SELECT id FROM dataset WHERE active = true LIMIT 1" | duckdb "$DB_PATH" -json | jq -r '.[0].id')LOCATION_ID=$(echo "SELECT id FROM location WHERE dataset_id = '$DATASET_ID' AND active = true LIMIT 1" | duckdb "$DB_PATH" -json | jq -r '.[0].id')CLUSTER_ID=$(echo "SELECT id FROM cluster WHERE location_id = '$LOCATION_ID' AND active = true LIMIT 1" | duckdb "$DB_PATH" -json | jq -r '.[0].id')# Get a real WAV file pathCLUSTER_PATH=$(echo "SELECT path FROM cluster WHERE id = '$CLUSTER_ID'" | duckdb "$DB_PATH" -json | jq -r '.[0].path')TEST_FILE=$(echo "SELECT file_name FROM file WHERE cluster_id = '$CLUSTER_ID' AND active = true LIMIT 1" | duckdb "$DB_PATH" -json | jq -r '.[0].file_name')# Construct full path (may or may not exist)if [ -n "$CLUSTER_PATH" ] && [ "$CLUSTER_PATH" != "null" ] && [ -n "$TEST_FILE" ] && [ "$TEST_FILE" != "null" ]; thenFULL_PATH="$CLUSTER_PATH/$TEST_FILE"elseFULL_PATH="/nonexistent/test.wav"fi{# Initialize MCP connectionecho '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"test","version":"1.0"}}}'sleep 0.2# Test 1: Import non-existent file (should error)echo "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/call\",\"params\":{\"name\":\"import_file\",\"arguments\":{\"file_path\":\"/nonexistent/path/to/file.wav\",\"dataset_id\":\"$DATASET_ID\",\"location_id\":\"$LOCATION_ID\",\"cluster_id\":\"$CLUSTER_ID\"}}}"sleep 0.2# Test 2: Import non-WAV file (should error)echo "{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"import_file\",\"arguments\":{\"file_path\":\"/etc/passwd\",\"dataset_id\":\"$DATASET_ID\",\"location_id\":\"$LOCATION_ID\",\"cluster_id\":\"$CLUSTER_ID\"}}}"sleep 0.2# Test 3: Import with invalid dataset_id (should error)echo "{\"jsonrpc\":\"2.0\",\"id\":4,\"method\":\"tools/call\",\"params\":{\"name\":\"import_file\",\"arguments\":{\"file_path\":\"/tmp/test.wav\",\"dataset_id\":\"invalid_id123\",\"location_id\":\"$LOCATION_ID\",\"cluster_id\":\"$CLUSTER_ID\"}}}"sleep 0.2# Test 4: Import with invalid cluster_id (should error)echo "{\"jsonrpc\":\"2.0\",\"id\":5,\"method\":\"tools/call\",\"params\":{\"name\":\"import_file\",\"arguments\":{\"file_path\":\"/tmp/test.wav\",\"dataset_id\":\"$DATASET_ID\",\"location_id\":\"$LOCATION_ID\",\"cluster_id\":\"invalid_id123\"}}}"sleep 0.2# Test 5: Import real file (if it exists)if [ -f "$FULL_PATH" ]; then# Escape path for JSONESCAPED_PATH=$(echo "$FULL_PATH" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g')echo "{\"jsonrpc\":\"2.0\",\"id\":6,\"method\":\"tools/call\",\"params\":{\"name\":\"import_file\",\"arguments\":{\"file_path\":\"$ESCAPED_PATH\",\"dataset_id\":\"$DATASET_ID\",\"location_id\":\"$LOCATION_ID\",\"cluster_id\":\"$CLUSTER_ID\"}}}"sleep 0.2# Test 6: Import same file again (should be duplicate)echo "{\"jsonrpc\":\"2.0\",\"id\":7,\"method\":\"tools/call\",\"params\":{\"name\":\"import_file\",\"arguments\":{\"file_path\":\"$ESCAPED_PATH\",\"dataset_id\":\"$DATASET_ID\",\"location_id\":\"$LOCATION_ID\",\"cluster_id\":\"$CLUSTER_ID\"}}}"sleep 0.2fi# Test 7: Query files to verifyecho "{\"jsonrpc\":\"2.0\",\"id\":8,\"method\":\"tools/call\",\"params\":{\"name\":\"execute_sql\",\"arguments\":{\"query\":\"SELECT file_name, xxh64_hash, duration, sample_rate, maybe_solar_night FROM file WHERE cluster_id = ? AND active = true ORDER BY created_at DESC LIMIT 3\",\"parameters\":[\"$CLUSTER_ID\"],\"limit\":3}}}"sleep 0.2} | ../skraak_mcp "$DB_PATH" 2>/dev/null
// Register import single filemcp.AddTool(server, &mcp.Tool{Name: "import_file",Description: "Import a single WAV file into the database. Automatically parses AudioMoth and filename timestamps, calculates hash, extracts metadata, and computes astronomical data. Skips if duplicate (by hash).",}, tools.ImportFile)
4. **test_import_selections.sh [db_path]** - Tests import_ml_selections tool setup5. **get_time.sh** - Quick test of get_current_time tool (no database needed)
4. **test_import_file.sh [db_path]** - Tests import_file tool (single file import)5. **test_import_selections.sh [db_path]** - Tests import_ml_selections tool setup6. **get_time.sh** - Quick test of get_current_time tool (no database needed)
- `import_file` - Import a single WAV file into the database- **Input**: Absolute path to WAV file, dataset/location/cluster IDs- **Processing**: Same as batch import (AudioMoth/filename timestamps, hash, metadata, astronomical data)- **Output**: Detailed file metadata including file_id, hash, duration, sample_rate, timestamps- **Duplicate detection**: Returns `is_duplicate=true` if file hash already exists- **Use case**: Import individual files without scanning folders- **Example**:```json{"name": "import_file","arguments": {"file_path": "/path/to/recording.wav","dataset_id": "abc123xyz789","location_id": "def456uvw012","cluster_id": "ghi789rst345"}}```- **Output**:```json{"file_id": "nB3xK8pLm9qR5sT7uV2wX","file_name": "recording.wav","hash": "a1b2c3d4e5f6g7h8","duration_seconds": 60.0,"sample_rate": 250000,"timestamp_local": "2024-01-15T20:30:00+13:00","is_audiomoth": true,"is_duplicate": false,"processing_time": "250ms"}```
### Latest Update: Single File Import Tool (2026-02-02)**New Feature: Import individual WAV files with `import_file` tool****Added:**- `tools/import_file.go` - Single file import implementation (~300 lines)- `shell_scripts/test_import_file.sh` - Integration test script**Features:**- **Single file import**: Import one WAV file at a time with detailed feedback- **Same processing pipeline**: Reuses all utilities from batch import (AudioMoth parsing, timestamp extraction, hash computation, astronomical calculations)- **Shared helper functions**: Reuses `validateImportInput()`, `getLocationData()`, `ensureClusterPath()` from import_files.go- **Detailed output**: Returns file_id, hash, duration, sample_rate, timestamps, processing time- **Duplicate detection**: Checks hash before insertion, returns `is_duplicate=true` if exists- **Fail-fast errors**: Single file import is atomic - succeeds completely or fails with clear error message**Input:**```json{"file_path": "/absolute/path/to/file.wav","dataset_id": "12-char-id","location_id": "12-char-id","cluster_id": "12-char-id"}```**Output:**```json{"file_id": "21-char-nanoid","file_name": "filename.wav","hash": "16-char-xxh64-hex","duration_seconds": 60.0,"sample_rate": 250000,"timestamp_local": "2024-01-15T20:30:00+13:00","is_audiomoth": true,"is_duplicate": false,"processing_time": "250ms"}```**Use Cases:**- Import files one at a time with detailed feedback per file- Programmatic import where you already know the file path- Import files from different locations without folder scanning- Get immediate feedback on duplicate detection- Alternative to batch import for small numbers of files**Tool Count Update**: Now **12 total tools** (read: 2, write: 8, import: 3)
**Last Updated**: 2026-01-29 12:15 NZDT**Status**: ML selection import tool implemented and tested**Current Tools**: 11 (read: 2, write: 8, import: 2)
**Last Updated**: 2026-02-02 09:40 NZDT**Status**: Single file import tool implemented and tested**Current Tools**: 12 (read: 2, write: 8, import: 3)