package tools

import (
	"context"
	"database/sql"
	"fmt"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"time"

	"github.com/modelcontextprotocol/go-sdk/mcp"

	"skraak_mcp/db"
	"skraak_mcp/utils"
)

// ImportMLSelectionsInput defines the input parameters for the import_ml_selections tool
type ImportMLSelectionsInput struct {
	FolderPath string `json:"folder_path" jsonschema:"required,Absolute path to Clips_* folder"`
	DatasetID  string `json:"dataset_id" jsonschema:"required,Dataset ID (12-character nanoid)"`
	ClusterID  string `json:"cluster_id" jsonschema:"required,Cluster ID (12-character nanoid)"`
}

// ImportMLSelectionsOutput defines the output structure for the import_ml_selections tool
type ImportMLSelectionsOutput struct {
	Summary      ImportSelectionSummary `json:"summary"`
	SelectionIDs []string               `json:"selection_ids"`
	Errors       []SelectionImportError `json:"errors,omitempty"`
}

// ImportSelectionSummary provides summary statistics for the import operation
type ImportSelectionSummary struct {
	FilterName         string `json:"filter_name"`
	TotalSelections    int    `json:"total_selections"`
	ImportedSelections int    `json:"imported_selections"`
	FailedSelections   int    `json:"failed_selections"`
	SpeciesCount       int    `json:"species_count"`
	CallTypeCount      int    `json:"call_type_count"`
	ProcessingTimeMs   int64  `json:"processing_time_ms"`
}

// SelectionImportError records errors encountered during selection processing
type SelectionImportError struct {
	FileName string `json:"file_name"`
	Species  string `json:"species,omitempty"`
	CallType string `json:"call_type,omitempty"`
	Error    string `json:"error"`
	Stage    string `json:"stage"` // "scan", "parse", "validate", "insert"
}

// scannedSelection holds data for a single selection to be imported
type scannedSelection struct {
	BaseFilename  string
	StartTime     float64
	EndTime       float64
	SpeciesLabel  string
	CallTypeLabel string // empty if no call type subfolder
	WAVPath       string
	PNGPath       string
}

// ImportMLSelections implements the import_ml_selections MCP tool
func ImportMLSelections(
	ctx context.Context,
	req *mcp.CallToolRequest,
	input ImportMLSelectionsInput,
) (*mcp.CallToolResult, ImportMLSelectionsOutput, error) {
	startTime := time.Now()
	var output ImportMLSelectionsOutput

	// Phase A: Input Validation & Folder Parsing
	folderName := filepath.Base(input.FolderPath)
	filterName, _, err := utils.ParseMLFolderName(folderName)
	if err != nil {
		return nil, output, fmt.Errorf("invalid folder name: %w", err)
	}

	// Verify folder exists
	if _, err := os.Stat(input.FolderPath); os.IsNotExist(err) {
		return nil, output, fmt.Errorf("folder does not exist: %s", input.FolderPath)
	}

	// Validate dataset and cluster
	if err := validateMLImportEntities(input.DatasetID, input.ClusterID); err != nil {
		return nil, output, fmt.Errorf("validation failed: %w", err)
	}

	// Phase B: Recursive Folder Scanning
	selections, scanErrors := scanMLFolderStructure(input.FolderPath)
	if len(selections) == 0 && len(scanErrors) > 0 {
		output.Errors = scanErrors
		output.Summary.FailedSelections = len(scanErrors)
		return &mcp.CallToolResult{}, output, nil
	}

	// Phase C: Comprehensive Pre-Import Validation
	validationErrors := validateAllSelectionEntities(
		filterName,
		input.DatasetID,
		input.ClusterID,
		selections,
	)

	if len(validationErrors) > 0 {
		output.Errors = append(scanErrors, validationErrors...)
		output.Summary.FilterName = filterName
		output.Summary.TotalSelections = len(selections)
		output.Summary.FailedSelections = len(output.Errors)
		output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
		return &mcp.CallToolResult{}, output, nil
	}

	// Phase D: Transactional Database Import
	selectionIDs, insertErrors := insertSelectionsIntoDB(
		filterName,
		input.DatasetID,
		input.ClusterID,
		selections,
	)

	// Calculate summary statistics
	speciesSet := make(map[string]bool)
	callTypeSet := make(map[string]bool)
	for _, sel := range selections {
		speciesSet[sel.SpeciesLabel] = true
		if sel.CallTypeLabel != "" {
			callTypeSet[sel.CallTypeLabel] = true
		}
	}

	allErrors := append(scanErrors, validationErrors...)
	allErrors = append(allErrors, insertErrors...)

	output = ImportMLSelectionsOutput{
		Summary: ImportSelectionSummary{
			FilterName:         filterName,
			TotalSelections:    len(selections),
			ImportedSelections: len(selectionIDs),
			FailedSelections:   len(allErrors),
			SpeciesCount:       len(speciesSet),
			CallTypeCount:      len(callTypeSet),
			ProcessingTimeMs:   time.Since(startTime).Milliseconds(),
		},
		SelectionIDs: selectionIDs,
		Errors:       allErrors,
	}

	return &mcp.CallToolResult{}, output, nil
}

// validateMLImportEntities validates that dataset and cluster exist and are linked
func validateMLImportEntities(datasetID, clusterID string) error {
	database, err := db.OpenWriteableDB(dbPath)
	if err != nil {
		return fmt.Errorf("failed to connect to database: %w", err)
	}
	defer database.Close()

	// Verify dataset exists
	var datasetActive bool
	err = database.QueryRow(`
		SELECT active FROM dataset WHERE id = ?
	`, datasetID).Scan(&datasetActive)
	if err == sql.ErrNoRows {
		return fmt.Errorf("dataset not found: %s", datasetID)
	}
	if err != nil {
		return fmt.Errorf("failed to query dataset: %w", err)
	}
	if !datasetActive {
		return fmt.Errorf("dataset is not active: %s", datasetID)
	}

	// Verify cluster exists and belongs to dataset's location
	var clusterActive bool
	var locationID string
	err = database.QueryRow(`
		SELECT c.active, c.location_id
		FROM cluster c
		JOIN location l ON c.location_id = l.id
		WHERE c.id = ? AND l.dataset_id = ?
	`, clusterID, datasetID).Scan(&clusterActive, &locationID)
	if err == sql.ErrNoRows {
		return fmt.Errorf("cluster not found or not linked to dataset: cluster=%s, dataset=%s", clusterID, datasetID)
	}
	if err != nil {
		return fmt.Errorf("failed to query cluster: %w", err)
	}
	if !clusterActive {
		return fmt.Errorf("cluster is not active: %s", clusterID)
	}

	return nil
}

// scanMLFolderStructure recursively scans the folder structure and builds scannedSelection list
func scanMLFolderStructure(rootPath string) ([]scannedSelection, []SelectionImportError) {
	var selections []scannedSelection
	var errors []SelectionImportError

	// Read immediate children (species folders)
	entries, err := os.ReadDir(rootPath)
	if err != nil {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Failed to read root folder: %v", err),
			Stage: "scan",
		})
		return selections, errors
	}

	for _, entry := range entries {
		if !entry.IsDir() {
			continue
		}

		speciesLabel := entry.Name()
		speciesPath := filepath.Join(rootPath, speciesLabel)

		// Read species folder contents
		speciesEntries, err := os.ReadDir(speciesPath)
		if err != nil {
			errors = append(errors, SelectionImportError{
				Species: speciesLabel,
				Error:   fmt.Sprintf("Failed to read species folder: %v", err),
				Stage:   "scan",
			})
			continue
		}

		// Separate files and subdirectories
		var wavFiles, pngFiles []string
		var subDirs []string

		for _, se := range speciesEntries {
			if se.IsDir() {
				subDirs = append(subDirs, se.Name())
			} else {
				ext := strings.ToLower(filepath.Ext(se.Name()))
				if ext == ".wav" {
					wavFiles = append(wavFiles, se.Name())
				} else if ext == ".png" {
					pngFiles = append(pngFiles, se.Name())
				}
			}
		}

		// Process direct WAV/PNG pairs in species folder (no call type)
		paired, mismatched := utils.ValidateWAVPNGPairs(wavFiles, pngFiles)
		for _, baseName := range paired {
			baseFilename, startTime, endTime, err := utils.ParseSelectionFilename(baseName + ".wav")
			if err != nil {
				errors = append(errors, SelectionImportError{
					FileName: baseName + ".wav",
					Species:  speciesLabel,
					Error:    err.Error(),
					Stage:    "parse",
				})
				continue
			}

			selections = append(selections, scannedSelection{
				BaseFilename:  baseFilename,
				StartTime:     startTime,
				EndTime:       endTime,
				SpeciesLabel:  speciesLabel,
				CallTypeLabel: "", // No call type
				WAVPath:       filepath.Join(speciesPath, baseName+".wav"),
				PNGPath:       filepath.Join(speciesPath, baseName+".png"),
			})
		}

		for _, mm := range mismatched {
			errors = append(errors, SelectionImportError{
				FileName: mm,
				Species:  speciesLabel,
				Error:    "Missing corresponding PNG file",
				Stage:    "scan",
			})
		}

		// Process call type subfolders
		for _, callTypeLabel := range subDirs {
			callTypePath := filepath.Join(speciesPath, callTypeLabel)

			callTypeEntries, err := os.ReadDir(callTypePath)
			if err != nil {
				errors = append(errors, SelectionImportError{
					Species:  speciesLabel,
					CallType: callTypeLabel,
					Error:    fmt.Sprintf("Failed to read call type folder: %v", err),
					Stage:    "scan",
				})
				continue
			}

			var ctWavFiles, ctPngFiles []string
			for _, cte := range callTypeEntries {
				if cte.IsDir() {
					continue // Skip nested directories
				}
				ext := strings.ToLower(filepath.Ext(cte.Name()))
				if ext == ".wav" {
					ctWavFiles = append(ctWavFiles, cte.Name())
				} else if ext == ".png" {
					ctPngFiles = append(ctPngFiles, cte.Name())
				}
			}

			ctPaired, ctMismatched := utils.ValidateWAVPNGPairs(ctWavFiles, ctPngFiles)
			for _, baseName := range ctPaired {
				baseFilename, startTime, endTime, err := utils.ParseSelectionFilename(baseName + ".wav")
				if err != nil {
					errors = append(errors, SelectionImportError{
						FileName: baseName + ".wav",
						Species:  speciesLabel,
						CallType: callTypeLabel,
						Error:    err.Error(),
						Stage:    "parse",
					})
					continue
				}

				selections = append(selections, scannedSelection{
					BaseFilename:  baseFilename,
					StartTime:     startTime,
					EndTime:       endTime,
					SpeciesLabel:  speciesLabel,
					CallTypeLabel: callTypeLabel,
					WAVPath:       filepath.Join(callTypePath, baseName+".wav"),
					PNGPath:       filepath.Join(callTypePath, baseName+".png"),
				})
			}

			for _, mm := range ctMismatched {
				errors = append(errors, SelectionImportError{
					FileName: mm,
					Species:  speciesLabel,
					CallType: callTypeLabel,
					Error:    "Missing corresponding PNG file",
					Stage:    "scan",
				})
			}
		}
	}

	return selections, errors
}

// validateAllSelectionEntities performs comprehensive pre-import validation
func validateAllSelectionEntities(
	filterName, datasetID, clusterID string,
	selections []scannedSelection,
) []SelectionImportError {
	var errors []SelectionImportError

	database, err := db.OpenWriteableDB(dbPath)
	if err != nil {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Failed to connect to database: %v", err),
			Stage: "validate",
		})
		return errors
	}
	defer database.Close()

	// 1. Validate filter exists
	var filterID string
	err = database.QueryRow(`
		SELECT id FROM filter WHERE name = ? AND active = true
	`, filterName).Scan(&filterID)
	if err == sql.ErrNoRows {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Filter '%s' not found in database", filterName),
			Stage: "validate",
		})
		return errors // Cannot proceed without filter
	}
	if err != nil {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Failed to query filter: %v", err),
			Stage: "validate",
		})
		return errors
	}

	// 2. Collect unique species and call types
	speciesSet := make(map[string]bool)
	callTypesBySpecies := make(map[string]map[string]bool)

	for _, sel := range selections {
		speciesSet[sel.SpeciesLabel] = true
		if sel.CallTypeLabel != "" {
			if callTypesBySpecies[sel.SpeciesLabel] == nil {
				callTypesBySpecies[sel.SpeciesLabel] = make(map[string]bool)
			}
			callTypesBySpecies[sel.SpeciesLabel][sel.CallTypeLabel] = true
		}
	}

	// 3. Batch validate species
	speciesLabels := make([]string, 0, len(speciesSet))
	for label := range speciesSet {
		speciesLabels = append(speciesLabels, label)
	}
	sort.Strings(speciesLabels)

	speciesIDMap := make(map[string]string) // label -> id
	if len(speciesLabels) > 0 {
		query := `
			SELECT s.id, s.label
			FROM species s
			JOIN species_dataset sd ON s.id = sd.species_id
			WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
			  AND sd.dataset_id = ?
			  AND s.active = true
		`
		args := make([]interface{}, len(speciesLabels)+1)
		for i, label := range speciesLabels {
			args[i] = label
		}
		args[len(speciesLabels)] = datasetID

		rows, err := database.Query(query, args...)
		if err != nil {
			errors = append(errors, SelectionImportError{
				Error: fmt.Sprintf("Failed to query species: %v", err),
				Stage: "validate",
			})
			return errors
		}
		defer rows.Close()

		for rows.Next() {
			var id, label string
			if err := rows.Scan(&id, &label); err != nil {
				continue
			}
			speciesIDMap[label] = id
		}

		// Check for missing species
		var missingSpecies []string
		for _, label := range speciesLabels {
			if _, exists := speciesIDMap[label]; !exists {
				missingSpecies = append(missingSpecies, label)
			}
		}
		if len(missingSpecies) > 0 {
			errors = append(errors, SelectionImportError{
				Error: fmt.Sprintf("Species not found or not linked to dataset: [%s]", strings.Join(missingSpecies, ", ")),
				Stage: "validate",
			})
		}
	}

	// 4. Batch validate call types
	callTypeIDMap := make(map[string]map[string]string) // species_label -> call_type_label -> id
	if len(callTypesBySpecies) > 0 {
		// Flatten call types for query
		var allCallTypeLabels []string
		for _, ctSet := range callTypesBySpecies {
			for ct := range ctSet {
				allCallTypeLabels = append(allCallTypeLabels, ct)
			}
		}
		// Deduplicate
		callTypeLabelSet := make(map[string]bool)
		for _, ct := range allCallTypeLabels {
			callTypeLabelSet[ct] = true
		}
		allCallTypeLabels = make([]string, 0, len(callTypeLabelSet))
		for ct := range callTypeLabelSet {
			allCallTypeLabels = append(allCallTypeLabels, ct)
		}
		sort.Strings(allCallTypeLabels)

		if len(allCallTypeLabels) > 0 && len(speciesLabels) > 0 {
			query := `
				SELECT ct.id, ct.label, s.label as species_label
				FROM call_type ct
				JOIN species s ON ct.species_id = s.id
				WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
				  AND ct.label IN (` + placeholders(len(allCallTypeLabels)) + `)
				  AND ct.active = true
			`
			args := make([]interface{}, len(speciesLabels)+len(allCallTypeLabels))
			for i, label := range speciesLabels {
				args[i] = label
			}
			for i, label := range allCallTypeLabels {
				args[len(speciesLabels)+i] = label
			}

			rows, err := database.Query(query, args...)
			if err != nil {
				errors = append(errors, SelectionImportError{
					Error: fmt.Sprintf("Failed to query call types: %v", err),
					Stage: "validate",
				})
				return errors
			}
			defer rows.Close()

			for rows.Next() {
				var id, label, speciesLabel string
				if err := rows.Scan(&id, &label, &speciesLabel); err != nil {
					continue
				}
				if callTypeIDMap[speciesLabel] == nil {
					callTypeIDMap[speciesLabel] = make(map[string]string)
				}
				callTypeIDMap[speciesLabel][label] = id
			}

			// Check for missing call types
			var missingCallTypes []string
			for speciesLabel, ctSet := range callTypesBySpecies {
				for ctLabel := range ctSet {
					if callTypeIDMap[speciesLabel] == nil || callTypeIDMap[speciesLabel][ctLabel] == "" {
						missingCallTypes = append(missingCallTypes, fmt.Sprintf("%s/%s", speciesLabel, ctLabel))
					}
				}
			}
			if len(missingCallTypes) > 0 {
				errors = append(errors, SelectionImportError{
					Error: fmt.Sprintf("Call types not found: [%s]", strings.Join(missingCallTypes, ", ")),
					Stage: "validate",
				})
			}
		}
	}

	// 5. Validate files with fuzzy matching (two-pass)
	baseFilenames := make(map[string]bool)
	for _, sel := range selections {
		baseFilenames[sel.BaseFilename] = true
	}

	// Convert to sorted slice
	baseFilenameList := make([]string, 0, len(baseFilenames))
	for bf := range baseFilenames {
		baseFilenameList = append(baseFilenameList, bf)
	}
	sort.Strings(baseFilenameList)

	// Pass 1: Exact match (try adding .wav extension)
	fileIDMap := make(map[string]struct {
		ID       string
		Duration float64
	})

	exactMatchNames := make([]string, len(baseFilenameList))
	for i, bf := range baseFilenameList {
		exactMatchNames[i] = bf + ".wav"
	}

	if len(exactMatchNames) > 0 {
		query := `
			SELECT id, file_name, duration
			FROM file
			WHERE file_name IN (` + placeholders(len(exactMatchNames)) + `)
			  AND cluster_id = ?
			  AND active = true
		`
		args := make([]interface{}, len(exactMatchNames)+1)
		for i, name := range exactMatchNames {
			args[i] = name
		}
		args[len(exactMatchNames)] = clusterID

		rows, err := database.Query(query, args...)
		if err != nil {
			errors = append(errors, SelectionImportError{
				Error: fmt.Sprintf("Failed to query files (exact match): %v", err),
				Stage: "validate",
			})
			return errors
		}
		defer rows.Close()

		for rows.Next() {
			var id, fileName string
			var duration float64
			if err := rows.Scan(&id, &fileName, &duration); err != nil {
				continue
			}
			// Remove .wav extension to match base filename
			baseKey := strings.TrimSuffix(fileName, ".wav")
			fileIDMap[baseKey] = struct {
				ID       string
				Duration float64
			}{ID: id, Duration: duration}
		}
	}

	// Pass 2: Pattern match for files not found
	var unmatchedFiles []string
	for _, bf := range baseFilenameList {
		if _, found := fileIDMap[bf]; !found {
			unmatchedFiles = append(unmatchedFiles, bf)
		}
	}

	if len(unmatchedFiles) > 0 {
		// Get all files in cluster for pattern matching
		rows, err := database.Query(`
			SELECT id, file_name, duration
			FROM file
			WHERE cluster_id = ? AND active = true
		`, clusterID)
		if err != nil {
			errors = append(errors, SelectionImportError{
				Error: fmt.Sprintf("Failed to query files (pattern match): %v", err),
				Stage: "validate",
			})
			return errors
		}
		defer rows.Close()

		// Build map of all files
		var allClusterFiles []struct {
			ID       string
			FileName string
			Duration float64
		}
		for rows.Next() {
			var id, fileName string
			var duration float64
			if err := rows.Scan(&id, &fileName, &duration); err != nil {
				continue
			}
			allClusterFiles = append(allClusterFiles, struct {
				ID       string
				FileName string
				Duration float64
			}{ID: id, FileName: fileName, Duration: duration})
		}

		// Try pattern matching for each unmatched file
		for _, bf := range unmatchedFiles {
			pattern, found := utils.ExtractDateTimePattern(bf)
			if !found {
				errors = append(errors, SelectionImportError{
					FileName: bf,
					Error:    "File not found in cluster and no date_time pattern detected",
					Stage:    "validate",
				})
				continue
			}

			// Find files containing this pattern
			var matches []struct {
				ID       string
				FileName string
				Duration float64
			}
			for _, f := range allClusterFiles {
				if strings.Contains(f.FileName, pattern) {
					matches = append(matches, f)
				}
			}

			if len(matches) == 0 {
				errors = append(errors, SelectionImportError{
					FileName: bf,
					Error:    fmt.Sprintf("File not found for base name (tried exact match and pattern '%s')", pattern),
					Stage:    "validate",
				})
			} else if len(matches) > 1 {
				matchNames := make([]string, len(matches))
				for i, m := range matches {
					matchNames[i] = m.FileName
				}
				errors = append(errors, SelectionImportError{
					FileName: bf,
					Error:    fmt.Sprintf("Ambiguous file match: multiple files found [%s]", strings.Join(matchNames, ", ")),
					Stage:    "validate",
				})
			} else {
				// Single match - use it
				baseKey := bf
				fileIDMap[baseKey] = struct {
					ID       string
					Duration float64
				}{ID: matches[0].ID, Duration: matches[0].Duration}
			}
		}
	}

	// 6. Validate selection bounds
	for _, sel := range selections {
		fileInfo, found := fileIDMap[sel.BaseFilename]
		if !found {
			// Already reported in file validation
			continue
		}

		if sel.StartTime >= sel.EndTime {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				CallType: sel.CallTypeLabel,
				Error:    fmt.Sprintf("Start time (%.2fs) must be less than end time (%.2fs)", sel.StartTime, sel.EndTime),
				Stage:    "validate",
			})
		}

		if sel.EndTime > fileInfo.Duration {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				CallType: sel.CallTypeLabel,
				Error:    fmt.Sprintf("Selection end time (%.2fs) exceeds file duration (%.2fs)", sel.EndTime, fileInfo.Duration),
				Stage:    "validate",
			})
		}
	}

	return errors
}

// insertSelectionsIntoDB inserts all selections in a single transaction
func insertSelectionsIntoDB(
	filterName, datasetID, clusterID string,
	selections []scannedSelection,
) ([]string, []SelectionImportError) {
	var selectionIDs []string
	var errors []SelectionImportError

	database, err := db.OpenWriteableDB(dbPath)
	if err != nil {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Failed to connect to database: %v", err),
			Stage: "insert",
		})
		return selectionIDs, errors
	}
	defer database.Close()

	// Begin transaction
	tx, err := database.Begin()
	if err != nil {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Failed to begin transaction: %v", err),
			Stage: "insert",
		})
		return selectionIDs, errors
	}
	defer tx.Rollback()

	// Get filter ID
	var filterID string
	err = tx.QueryRow(`SELECT id FROM filter WHERE name = ? AND active = true`, filterName).Scan(&filterID)
	if err != nil {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Failed to get filter ID: %v", err),
			Stage: "insert",
		})
		return selectionIDs, errors
	}

	// Get species ID map
	speciesSet := make(map[string]bool)
	for _, sel := range selections {
		speciesSet[sel.SpeciesLabel] = true
	}
	speciesLabels := make([]string, 0, len(speciesSet))
	for label := range speciesSet {
		speciesLabels = append(speciesLabels, label)
	}

	speciesIDMap := make(map[string]string)
	if len(speciesLabels) > 0 {
		query := `SELECT s.id, s.label FROM species s
		          JOIN species_dataset sd ON s.id = sd.species_id
		          WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
		            AND sd.dataset_id = ? AND s.active = true`
		args := make([]interface{}, len(speciesLabels)+1)
		for i, l := range speciesLabels {
			args[i] = l
		}
		args[len(speciesLabels)] = datasetID

		rows, err := tx.Query(query, args...)
		if err != nil {
			errors = append(errors, SelectionImportError{
				Error: fmt.Sprintf("Failed to query species IDs: %v", err),
				Stage: "insert",
			})
			return selectionIDs, errors
		}
		defer rows.Close()

		for rows.Next() {
			var id, label string
			if err := rows.Scan(&id, &label); err == nil {
				speciesIDMap[label] = id
			}
		}
	}

	// Get call type ID map
	callTypeIDMap := make(map[string]map[string]string) // species_label -> call_type_label -> id
	callTypesBySpecies := make(map[string]map[string]bool)
	for _, sel := range selections {
		if sel.CallTypeLabel != "" {
			if callTypesBySpecies[sel.SpeciesLabel] == nil {
				callTypesBySpecies[sel.SpeciesLabel] = make(map[string]bool)
			}
			callTypesBySpecies[sel.SpeciesLabel][sel.CallTypeLabel] = true
		}
	}

	if len(callTypesBySpecies) > 0 {
		var allCallTypes []string
		for _, ctSet := range callTypesBySpecies {
			for ct := range ctSet {
				allCallTypes = append(allCallTypes, ct)
			}
		}
		// Deduplicate
		ctSet := make(map[string]bool)
		for _, ct := range allCallTypes {
			ctSet[ct] = true
		}
		allCallTypes = make([]string, 0, len(ctSet))
		for ct := range ctSet {
			allCallTypes = append(allCallTypes, ct)
		}

		if len(allCallTypes) > 0 && len(speciesLabels) > 0 {
			query := `SELECT ct.id, ct.label, s.label as species_label
			          FROM call_type ct
			          JOIN species s ON ct.species_id = s.id
			          WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
			            AND ct.label IN (` + placeholders(len(allCallTypes)) + `)
			            AND ct.active = true`
			args := make([]interface{}, len(speciesLabels)+len(allCallTypes))
			for i, l := range speciesLabels {
				args[i] = l
			}
			for i, l := range allCallTypes {
				args[len(speciesLabels)+i] = l
			}

			rows, err := tx.Query(query, args...)
			if err != nil {
				errors = append(errors, SelectionImportError{
					Error: fmt.Sprintf("Failed to query call type IDs: %v", err),
					Stage: "insert",
				})
				return selectionIDs, errors
			}
			defer rows.Close()

			for rows.Next() {
				var id, label, speciesLabel string
				if err := rows.Scan(&id, &label, &speciesLabel); err == nil {
					if callTypeIDMap[speciesLabel] == nil {
						callTypeIDMap[speciesLabel] = make(map[string]string)
					}
					callTypeIDMap[speciesLabel][label] = id
				}
			}
		}
	}

	// Get file ID map (with fuzzy matching)
	fileIDMap := make(map[string]string) // base filename -> file ID

	// Pass 1: Exact match
	baseFilenames := make(map[string]bool)
	for _, sel := range selections {
		baseFilenames[sel.BaseFilename] = true
	}
	baseList := make([]string, 0, len(baseFilenames))
	for bf := range baseFilenames {
		baseList = append(baseList, bf)
	}

	exactNames := make([]string, len(baseList))
	for i, bf := range baseList {
		exactNames[i] = bf + ".wav"
	}

	if len(exactNames) > 0 {
		query := `SELECT id, file_name FROM file
		          WHERE file_name IN (` + placeholders(len(exactNames)) + `)
		            AND cluster_id = ? AND active = true`
		args := make([]interface{}, len(exactNames)+1)
		for i, n := range exactNames {
			args[i] = n
		}
		args[len(exactNames)] = clusterID

		rows, err := tx.Query(query, args...)
		if err != nil {
			errors = append(errors, SelectionImportError{
				Error: fmt.Sprintf("Failed to query file IDs (exact): %v", err),
				Stage: "insert",
			})
			return selectionIDs, errors
		}
		defer rows.Close()

		for rows.Next() {
			var id, fileName string
			if err := rows.Scan(&id, &fileName); err == nil {
				baseKey := strings.TrimSuffix(fileName, ".wav")
				fileIDMap[baseKey] = id
			}
		}
	}

	// Pass 2: Pattern match for unmatched
	var unmatchedBases []string
	for _, bf := range baseList {
		if _, found := fileIDMap[bf]; !found {
			unmatchedBases = append(unmatchedBases, bf)
		}
	}

	if len(unmatchedBases) > 0 {
		rows, err := tx.Query(`SELECT id, file_name FROM file WHERE cluster_id = ? AND active = true`, clusterID)
		if err != nil {
			errors = append(errors, SelectionImportError{
				Error: fmt.Sprintf("Failed to query all files for pattern match: %v", err),
				Stage: "insert",
			})
			return selectionIDs, errors
		}
		defer rows.Close()

		var allFiles []struct {
			ID       string
			FileName string
		}
		for rows.Next() {
			var id, fileName string
			if err := rows.Scan(&id, &fileName); err == nil {
				allFiles = append(allFiles, struct {
					ID       string
					FileName string
				}{ID: id, FileName: fileName})
			}
		}

		for _, bf := range unmatchedBases {
			pattern, found := utils.ExtractDateTimePattern(bf)
			if !found {
				continue
			}

			var matches []struct {
				ID       string
				FileName string
			}
			for _, f := range allFiles {
				if strings.Contains(f.FileName, pattern) {
					matches = append(matches, f)
				}
			}

			if len(matches) == 1 {
				fileIDMap[bf] = matches[0].ID
			}
		}
	}

	// Insert selections
	for _, sel := range selections {
		fileID, fileFound := fileIDMap[sel.BaseFilename]
		if !fileFound {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				CallType: sel.CallTypeLabel,
				Error:    "File ID not found (should have been caught in validation)",
				Stage:    "insert",
			})
			continue
		}

		speciesID, speciesFound := speciesIDMap[sel.SpeciesLabel]
		if !speciesFound {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				Error:    "Species ID not found (should have been caught in validation)",
				Stage:    "insert",
			})
			continue
		}

		// Insert selection
		selectionID, err := utils.GenerateShortID()
		if err != nil {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				CallType: sel.CallTypeLabel,
				Error:    fmt.Sprintf("Failed to generate selection ID: %v", err),
				Stage:    "insert",
			})
			continue
		}
		_, err = tx.Exec(`
			INSERT INTO selection (
				id, file_id, dataset_id, start_time, end_time,
				created_at, last_modified, active
			) VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, true)
		`, selectionID, fileID, datasetID, sel.StartTime, sel.EndTime)
		if err != nil {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				CallType: sel.CallTypeLabel,
				Error:    fmt.Sprintf("Failed to insert selection: %v", err),
				Stage:    "insert",
			})
			continue
		}

		// Insert label
		labelID, err := utils.GenerateShortID()
		if err != nil {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				CallType: sel.CallTypeLabel,
				Error:    fmt.Sprintf("Failed to generate label ID: %v", err),
				Stage:    "insert",
			})
			continue
		}
		_, err = tx.Exec(`
			INSERT INTO label (
				id, selection_id, species_id, filter_id, certainty,
				created_at, last_modified, active
			) VALUES (?, ?, ?, ?, NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, true)
		`, labelID, selectionID, speciesID, filterID)
		if err != nil {
			errors = append(errors, SelectionImportError{
				FileName: sel.BaseFilename,
				Species:  sel.SpeciesLabel,
				CallType: sel.CallTypeLabel,
				Error:    fmt.Sprintf("Failed to insert label: %v", err),
				Stage:    "insert",
			})
			continue
		}

		// Insert label_subtype if call type exists
		if sel.CallTypeLabel != "" {
			callTypeID := ""
			if callTypeIDMap[sel.SpeciesLabel] != nil {
				callTypeID = callTypeIDMap[sel.SpeciesLabel][sel.CallTypeLabel]
			}

			if callTypeID == "" {
				errors = append(errors, SelectionImportError{
					FileName: sel.BaseFilename,
					Species:  sel.SpeciesLabel,
					CallType: sel.CallTypeLabel,
					Error:    "Call type ID not found (should have been caught in validation)",
					Stage:    "insert",
				})
				continue
			}

			subtypeID, err := utils.GenerateShortID()
			if err != nil {
				errors = append(errors, SelectionImportError{
					FileName: sel.BaseFilename,
					Species:  sel.SpeciesLabel,
					CallType: sel.CallTypeLabel,
					Error:    fmt.Sprintf("Failed to generate label_subtype ID: %v", err),
					Stage:    "insert",
				})
				continue
			}
			_, err = tx.Exec(`
				INSERT INTO label_subtype (
					id, label_id, calltype_id, filter_id, certainty,
					created_at, last_modified, active
				) VALUES (?, ?, ?, ?, NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, true)
			`, subtypeID, labelID, callTypeID, filterID)
			if err != nil {
				errors = append(errors, SelectionImportError{
					FileName: sel.BaseFilename,
					Species:  sel.SpeciesLabel,
					CallType: sel.CallTypeLabel,
					Error:    fmt.Sprintf("Failed to insert label_subtype: %v", err),
					Stage:    "insert",
				})
				continue
			}
		}

		selectionIDs = append(selectionIDs, selectionID)
	}

	// Commit transaction
	if err := tx.Commit(); err != nil {
		errors = append(errors, SelectionImportError{
			Error: fmt.Sprintf("Failed to commit transaction: %v", err),
			Stage: "insert",
		})
		return []string{}, errors
	}

	return selectionIDs, errors
}

// placeholders generates SQL placeholder string for IN clauses
func placeholders(n int) string {
	if n == 0 {
		return ""
	}
	placeholders := make([]string, n)
	for i := range placeholders {
		placeholders[i] = "?"
	}
	return strings.Join(placeholders, ", ")
}