package tools
import (
"context"
"database/sql"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/modelcontextprotocol/go-sdk/mcp"
"skraak_mcp/db"
"skraak_mcp/utils"
)
type ImportMLSelectionsInput struct {
FolderPath string `json:"folder_path" jsonschema:"required,Absolute path to Clips_* folder"`
DatasetID string `json:"dataset_id" jsonschema:"required,Dataset ID (12-character nanoid)"`
ClusterID string `json:"cluster_id" jsonschema:"required,Cluster ID (12-character nanoid)"`
}
type ImportMLSelectionsOutput struct {
Summary ImportSelectionSummary `json:"summary"`
SelectionIDs []string `json:"selection_ids"`
Errors []SelectionImportError `json:"errors,omitempty"`
}
type ImportSelectionSummary struct {
FilterName string `json:"filter_name"`
TotalSelections int `json:"total_selections"`
ImportedSelections int `json:"imported_selections"`
FailedSelections int `json:"failed_selections"`
SpeciesCount int `json:"species_count"`
CallTypeCount int `json:"call_type_count"`
ProcessingTimeMs int64 `json:"processing_time_ms"`
}
type SelectionImportError struct {
FileName string `json:"file_name"`
Species string `json:"species,omitempty"`
CallType string `json:"call_type,omitempty"`
Error string `json:"error"`
Stage string `json:"stage"` }
type scannedSelection struct {
BaseFilename string
StartTime float64
EndTime float64
SpeciesLabel string
CallTypeLabel string WAVPath string
PNGPath string
}
func ImportMLSelections(
ctx context.Context,
req *mcp.CallToolRequest,
input ImportMLSelectionsInput,
) (*mcp.CallToolResult, ImportMLSelectionsOutput, error) {
startTime := time.Now()
var output ImportMLSelectionsOutput
folderName := filepath.Base(input.FolderPath)
filterName, _, err := utils.ParseMLFolderName(folderName)
if err != nil {
return nil, output, fmt.Errorf("invalid folder name: %w", err)
}
if _, err := os.Stat(input.FolderPath); os.IsNotExist(err) {
return nil, output, fmt.Errorf("folder does not exist: %s", input.FolderPath)
}
if err := validateMLImportEntities(input.DatasetID, input.ClusterID); err != nil {
return nil, output, fmt.Errorf("validation failed: %w", err)
}
selections, scanErrors := scanMLFolderStructure(input.FolderPath)
if len(selections) == 0 && len(scanErrors) > 0 {
output.Errors = scanErrors
output.Summary.FailedSelections = len(scanErrors)
return &mcp.CallToolResult{}, output, nil
}
validationErrors := validateAllSelectionEntities(
filterName,
input.DatasetID,
input.ClusterID,
selections,
)
if len(validationErrors) > 0 {
output.Errors = append(scanErrors, validationErrors...)
output.Summary.FilterName = filterName
output.Summary.TotalSelections = len(selections)
output.Summary.FailedSelections = len(output.Errors)
output.Summary.ProcessingTimeMs = time.Since(startTime).Milliseconds()
return &mcp.CallToolResult{}, output, nil
}
selectionIDs, insertErrors := insertSelectionsIntoDB(
filterName,
input.DatasetID,
input.ClusterID,
selections,
)
speciesSet := make(map[string]bool)
callTypeSet := make(map[string]bool)
for _, sel := range selections {
speciesSet[sel.SpeciesLabel] = true
if sel.CallTypeLabel != "" {
callTypeSet[sel.CallTypeLabel] = true
}
}
allErrors := append(scanErrors, validationErrors...)
allErrors = append(allErrors, insertErrors...)
output = ImportMLSelectionsOutput{
Summary: ImportSelectionSummary{
FilterName: filterName,
TotalSelections: len(selections),
ImportedSelections: len(selectionIDs),
FailedSelections: len(allErrors),
SpeciesCount: len(speciesSet),
CallTypeCount: len(callTypeSet),
ProcessingTimeMs: time.Since(startTime).Milliseconds(),
},
SelectionIDs: selectionIDs,
Errors: allErrors,
}
return &mcp.CallToolResult{}, output, nil
}
func validateMLImportEntities(datasetID, clusterID string) error {
database, err := db.OpenWriteableDB(dbPath)
if err != nil {
return fmt.Errorf("failed to connect to database: %w", err)
}
defer database.Close()
var datasetActive bool
err = database.QueryRow(`
SELECT active FROM dataset WHERE id = ?
`, datasetID).Scan(&datasetActive)
if err == sql.ErrNoRows {
return fmt.Errorf("dataset not found: %s", datasetID)
}
if err != nil {
return fmt.Errorf("failed to query dataset: %w", err)
}
if !datasetActive {
return fmt.Errorf("dataset is not active: %s", datasetID)
}
var clusterActive bool
var locationID string
err = database.QueryRow(`
SELECT c.active, c.location_id
FROM cluster c
JOIN location l ON c.location_id = l.id
WHERE c.id = ? AND l.dataset_id = ?
`, clusterID, datasetID).Scan(&clusterActive, &locationID)
if err == sql.ErrNoRows {
return fmt.Errorf("cluster not found or not linked to dataset: cluster=%s, dataset=%s", clusterID, datasetID)
}
if err != nil {
return fmt.Errorf("failed to query cluster: %w", err)
}
if !clusterActive {
return fmt.Errorf("cluster is not active: %s", clusterID)
}
return nil
}
func scanMLFolderStructure(rootPath string) ([]scannedSelection, []SelectionImportError) {
var selections []scannedSelection
var errors []SelectionImportError
entries, err := os.ReadDir(rootPath)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to read root folder: %v", err),
Stage: "scan",
})
return selections, errors
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}
speciesLabel := entry.Name()
speciesPath := filepath.Join(rootPath, speciesLabel)
speciesEntries, err := os.ReadDir(speciesPath)
if err != nil {
errors = append(errors, SelectionImportError{
Species: speciesLabel,
Error: fmt.Sprintf("Failed to read species folder: %v", err),
Stage: "scan",
})
continue
}
var wavFiles, pngFiles []string
var subDirs []string
for _, se := range speciesEntries {
if se.IsDir() {
subDirs = append(subDirs, se.Name())
} else {
ext := strings.ToLower(filepath.Ext(se.Name()))
if ext == ".wav" {
wavFiles = append(wavFiles, se.Name())
} else if ext == ".png" {
pngFiles = append(pngFiles, se.Name())
}
}
}
paired, mismatched := utils.ValidateWAVPNGPairs(wavFiles, pngFiles)
for _, baseName := range paired {
baseFilename, startTime, endTime, err := utils.ParseSelectionFilename(baseName + ".wav")
if err != nil {
errors = append(errors, SelectionImportError{
FileName: baseName + ".wav",
Species: speciesLabel,
Error: err.Error(),
Stage: "parse",
})
continue
}
selections = append(selections, scannedSelection{
BaseFilename: baseFilename,
StartTime: startTime,
EndTime: endTime,
SpeciesLabel: speciesLabel,
CallTypeLabel: "", WAVPath: filepath.Join(speciesPath, baseName+".wav"),
PNGPath: filepath.Join(speciesPath, baseName+".png"),
})
}
for _, mm := range mismatched {
errors = append(errors, SelectionImportError{
FileName: mm,
Species: speciesLabel,
Error: "Missing corresponding PNG file",
Stage: "scan",
})
}
for _, callTypeLabel := range subDirs {
callTypePath := filepath.Join(speciesPath, callTypeLabel)
callTypeEntries, err := os.ReadDir(callTypePath)
if err != nil {
errors = append(errors, SelectionImportError{
Species: speciesLabel,
CallType: callTypeLabel,
Error: fmt.Sprintf("Failed to read call type folder: %v", err),
Stage: "scan",
})
continue
}
var ctWavFiles, ctPngFiles []string
for _, cte := range callTypeEntries {
if cte.IsDir() {
continue }
ext := strings.ToLower(filepath.Ext(cte.Name()))
if ext == ".wav" {
ctWavFiles = append(ctWavFiles, cte.Name())
} else if ext == ".png" {
ctPngFiles = append(ctPngFiles, cte.Name())
}
}
ctPaired, ctMismatched := utils.ValidateWAVPNGPairs(ctWavFiles, ctPngFiles)
for _, baseName := range ctPaired {
baseFilename, startTime, endTime, err := utils.ParseSelectionFilename(baseName + ".wav")
if err != nil {
errors = append(errors, SelectionImportError{
FileName: baseName + ".wav",
Species: speciesLabel,
CallType: callTypeLabel,
Error: err.Error(),
Stage: "parse",
})
continue
}
selections = append(selections, scannedSelection{
BaseFilename: baseFilename,
StartTime: startTime,
EndTime: endTime,
SpeciesLabel: speciesLabel,
CallTypeLabel: callTypeLabel,
WAVPath: filepath.Join(callTypePath, baseName+".wav"),
PNGPath: filepath.Join(callTypePath, baseName+".png"),
})
}
for _, mm := range ctMismatched {
errors = append(errors, SelectionImportError{
FileName: mm,
Species: speciesLabel,
CallType: callTypeLabel,
Error: "Missing corresponding PNG file",
Stage: "scan",
})
}
}
}
return selections, errors
}
func validateAllSelectionEntities(
filterName, datasetID, clusterID string,
selections []scannedSelection,
) []SelectionImportError {
var errors []SelectionImportError
database, err := db.OpenWriteableDB(dbPath)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to connect to database: %v", err),
Stage: "validate",
})
return errors
}
defer database.Close()
var filterID string
err = database.QueryRow(`
SELECT id FROM filter WHERE name = ? AND active = true
`, filterName).Scan(&filterID)
if err == sql.ErrNoRows {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Filter '%s' not found in database", filterName),
Stage: "validate",
})
return errors }
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query filter: %v", err),
Stage: "validate",
})
return errors
}
speciesSet := make(map[string]bool)
callTypesBySpecies := make(map[string]map[string]bool)
for _, sel := range selections {
speciesSet[sel.SpeciesLabel] = true
if sel.CallTypeLabel != "" {
if callTypesBySpecies[sel.SpeciesLabel] == nil {
callTypesBySpecies[sel.SpeciesLabel] = make(map[string]bool)
}
callTypesBySpecies[sel.SpeciesLabel][sel.CallTypeLabel] = true
}
}
speciesLabels := make([]string, 0, len(speciesSet))
for label := range speciesSet {
speciesLabels = append(speciesLabels, label)
}
sort.Strings(speciesLabels)
speciesIDMap := make(map[string]string) if len(speciesLabels) > 0 {
query := `
SELECT s.id, s.label
FROM species s
JOIN species_dataset sd ON s.id = sd.species_id
WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
AND sd.dataset_id = ?
AND s.active = true
`
args := make([]interface{}, len(speciesLabels)+1)
for i, label := range speciesLabels {
args[i] = label
}
args[len(speciesLabels)] = datasetID
rows, err := database.Query(query, args...)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query species: %v", err),
Stage: "validate",
})
return errors
}
defer rows.Close()
for rows.Next() {
var id, label string
if err := rows.Scan(&id, &label); err != nil {
continue
}
speciesIDMap[label] = id
}
var missingSpecies []string
for _, label := range speciesLabels {
if _, exists := speciesIDMap[label]; !exists {
missingSpecies = append(missingSpecies, label)
}
}
if len(missingSpecies) > 0 {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Species not found or not linked to dataset: [%s]", strings.Join(missingSpecies, ", ")),
Stage: "validate",
})
}
}
callTypeIDMap := make(map[string]map[string]string) if len(callTypesBySpecies) > 0 {
var allCallTypeLabels []string
for _, ctSet := range callTypesBySpecies {
for ct := range ctSet {
allCallTypeLabels = append(allCallTypeLabels, ct)
}
}
callTypeLabelSet := make(map[string]bool)
for _, ct := range allCallTypeLabels {
callTypeLabelSet[ct] = true
}
allCallTypeLabels = make([]string, 0, len(callTypeLabelSet))
for ct := range callTypeLabelSet {
allCallTypeLabels = append(allCallTypeLabels, ct)
}
sort.Strings(allCallTypeLabels)
if len(allCallTypeLabels) > 0 && len(speciesLabels) > 0 {
query := `
SELECT ct.id, ct.label, s.label as species_label
FROM call_type ct
JOIN species s ON ct.species_id = s.id
WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
AND ct.label IN (` + placeholders(len(allCallTypeLabels)) + `)
AND ct.active = true
`
args := make([]interface{}, len(speciesLabels)+len(allCallTypeLabels))
for i, label := range speciesLabels {
args[i] = label
}
for i, label := range allCallTypeLabels {
args[len(speciesLabels)+i] = label
}
rows, err := database.Query(query, args...)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query call types: %v", err),
Stage: "validate",
})
return errors
}
defer rows.Close()
for rows.Next() {
var id, label, speciesLabel string
if err := rows.Scan(&id, &label, &speciesLabel); err != nil {
continue
}
if callTypeIDMap[speciesLabel] == nil {
callTypeIDMap[speciesLabel] = make(map[string]string)
}
callTypeIDMap[speciesLabel][label] = id
}
var missingCallTypes []string
for speciesLabel, ctSet := range callTypesBySpecies {
for ctLabel := range ctSet {
if callTypeIDMap[speciesLabel] == nil || callTypeIDMap[speciesLabel][ctLabel] == "" {
missingCallTypes = append(missingCallTypes, fmt.Sprintf("%s/%s", speciesLabel, ctLabel))
}
}
}
if len(missingCallTypes) > 0 {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Call types not found: [%s]", strings.Join(missingCallTypes, ", ")),
Stage: "validate",
})
}
}
}
baseFilenames := make(map[string]bool)
for _, sel := range selections {
baseFilenames[sel.BaseFilename] = true
}
baseFilenameList := make([]string, 0, len(baseFilenames))
for bf := range baseFilenames {
baseFilenameList = append(baseFilenameList, bf)
}
sort.Strings(baseFilenameList)
fileIDMap := make(map[string]struct {
ID string
Duration float64
})
exactMatchNames := make([]string, len(baseFilenameList))
for i, bf := range baseFilenameList {
exactMatchNames[i] = bf + ".wav"
}
if len(exactMatchNames) > 0 {
query := `
SELECT id, file_name, duration
FROM file
WHERE file_name IN (` + placeholders(len(exactMatchNames)) + `)
AND cluster_id = ?
AND active = true
`
args := make([]interface{}, len(exactMatchNames)+1)
for i, name := range exactMatchNames {
args[i] = name
}
args[len(exactMatchNames)] = clusterID
rows, err := database.Query(query, args...)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query files (exact match): %v", err),
Stage: "validate",
})
return errors
}
defer rows.Close()
for rows.Next() {
var id, fileName string
var duration float64
if err := rows.Scan(&id, &fileName, &duration); err != nil {
continue
}
baseKey := strings.TrimSuffix(fileName, ".wav")
fileIDMap[baseKey] = struct {
ID string
Duration float64
}{ID: id, Duration: duration}
}
}
var unmatchedFiles []string
for _, bf := range baseFilenameList {
if _, found := fileIDMap[bf]; !found {
unmatchedFiles = append(unmatchedFiles, bf)
}
}
if len(unmatchedFiles) > 0 {
rows, err := database.Query(`
SELECT id, file_name, duration
FROM file
WHERE cluster_id = ? AND active = true
`, clusterID)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query files (pattern match): %v", err),
Stage: "validate",
})
return errors
}
defer rows.Close()
var allClusterFiles []struct {
ID string
FileName string
Duration float64
}
for rows.Next() {
var id, fileName string
var duration float64
if err := rows.Scan(&id, &fileName, &duration); err != nil {
continue
}
allClusterFiles = append(allClusterFiles, struct {
ID string
FileName string
Duration float64
}{ID: id, FileName: fileName, Duration: duration})
}
for _, bf := range unmatchedFiles {
pattern, found := utils.ExtractDateTimePattern(bf)
if !found {
errors = append(errors, SelectionImportError{
FileName: bf,
Error: "File not found in cluster and no date_time pattern detected",
Stage: "validate",
})
continue
}
var matches []struct {
ID string
FileName string
Duration float64
}
for _, f := range allClusterFiles {
if strings.Contains(f.FileName, pattern) {
matches = append(matches, f)
}
}
if len(matches) == 0 {
errors = append(errors, SelectionImportError{
FileName: bf,
Error: fmt.Sprintf("File not found for base name (tried exact match and pattern '%s')", pattern),
Stage: "validate",
})
} else if len(matches) > 1 {
matchNames := make([]string, len(matches))
for i, m := range matches {
matchNames[i] = m.FileName
}
errors = append(errors, SelectionImportError{
FileName: bf,
Error: fmt.Sprintf("Ambiguous file match: multiple files found [%s]", strings.Join(matchNames, ", ")),
Stage: "validate",
})
} else {
baseKey := bf
fileIDMap[baseKey] = struct {
ID string
Duration float64
}{ID: matches[0].ID, Duration: matches[0].Duration}
}
}
}
for _, sel := range selections {
fileInfo, found := fileIDMap[sel.BaseFilename]
if !found {
continue
}
if sel.StartTime >= sel.EndTime {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Start time (%.2fs) must be less than end time (%.2fs)", sel.StartTime, sel.EndTime),
Stage: "validate",
})
}
if sel.EndTime > fileInfo.Duration {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Selection end time (%.2fs) exceeds file duration (%.2fs)", sel.EndTime, fileInfo.Duration),
Stage: "validate",
})
}
}
return errors
}
func insertSelectionsIntoDB(
filterName, datasetID, clusterID string,
selections []scannedSelection,
) ([]string, []SelectionImportError) {
var selectionIDs []string
var errors []SelectionImportError
database, err := db.OpenWriteableDB(dbPath)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to connect to database: %v", err),
Stage: "insert",
})
return selectionIDs, errors
}
defer database.Close()
tx, err := database.Begin()
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to begin transaction: %v", err),
Stage: "insert",
})
return selectionIDs, errors
}
defer tx.Rollback()
var filterID string
err = tx.QueryRow(`SELECT id FROM filter WHERE name = ? AND active = true`, filterName).Scan(&filterID)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to get filter ID: %v", err),
Stage: "insert",
})
return selectionIDs, errors
}
speciesSet := make(map[string]bool)
for _, sel := range selections {
speciesSet[sel.SpeciesLabel] = true
}
speciesLabels := make([]string, 0, len(speciesSet))
for label := range speciesSet {
speciesLabels = append(speciesLabels, label)
}
speciesIDMap := make(map[string]string)
if len(speciesLabels) > 0 {
query := `SELECT s.id, s.label FROM species s
JOIN species_dataset sd ON s.id = sd.species_id
WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
AND sd.dataset_id = ? AND s.active = true`
args := make([]interface{}, len(speciesLabels)+1)
for i, l := range speciesLabels {
args[i] = l
}
args[len(speciesLabels)] = datasetID
rows, err := tx.Query(query, args...)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query species IDs: %v", err),
Stage: "insert",
})
return selectionIDs, errors
}
defer rows.Close()
for rows.Next() {
var id, label string
if err := rows.Scan(&id, &label); err == nil {
speciesIDMap[label] = id
}
}
}
callTypeIDMap := make(map[string]map[string]string) callTypesBySpecies := make(map[string]map[string]bool)
for _, sel := range selections {
if sel.CallTypeLabel != "" {
if callTypesBySpecies[sel.SpeciesLabel] == nil {
callTypesBySpecies[sel.SpeciesLabel] = make(map[string]bool)
}
callTypesBySpecies[sel.SpeciesLabel][sel.CallTypeLabel] = true
}
}
if len(callTypesBySpecies) > 0 {
var allCallTypes []string
for _, ctSet := range callTypesBySpecies {
for ct := range ctSet {
allCallTypes = append(allCallTypes, ct)
}
}
ctSet := make(map[string]bool)
for _, ct := range allCallTypes {
ctSet[ct] = true
}
allCallTypes = make([]string, 0, len(ctSet))
for ct := range ctSet {
allCallTypes = append(allCallTypes, ct)
}
if len(allCallTypes) > 0 && len(speciesLabels) > 0 {
query := `SELECT ct.id, ct.label, s.label as species_label
FROM call_type ct
JOIN species s ON ct.species_id = s.id
WHERE s.label IN (` + placeholders(len(speciesLabels)) + `)
AND ct.label IN (` + placeholders(len(allCallTypes)) + `)
AND ct.active = true`
args := make([]interface{}, len(speciesLabels)+len(allCallTypes))
for i, l := range speciesLabels {
args[i] = l
}
for i, l := range allCallTypes {
args[len(speciesLabels)+i] = l
}
rows, err := tx.Query(query, args...)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query call type IDs: %v", err),
Stage: "insert",
})
return selectionIDs, errors
}
defer rows.Close()
for rows.Next() {
var id, label, speciesLabel string
if err := rows.Scan(&id, &label, &speciesLabel); err == nil {
if callTypeIDMap[speciesLabel] == nil {
callTypeIDMap[speciesLabel] = make(map[string]string)
}
callTypeIDMap[speciesLabel][label] = id
}
}
}
}
fileIDMap := make(map[string]string)
baseFilenames := make(map[string]bool)
for _, sel := range selections {
baseFilenames[sel.BaseFilename] = true
}
baseList := make([]string, 0, len(baseFilenames))
for bf := range baseFilenames {
baseList = append(baseList, bf)
}
exactNames := make([]string, len(baseList))
for i, bf := range baseList {
exactNames[i] = bf + ".wav"
}
if len(exactNames) > 0 {
query := `SELECT id, file_name FROM file
WHERE file_name IN (` + placeholders(len(exactNames)) + `)
AND cluster_id = ? AND active = true`
args := make([]interface{}, len(exactNames)+1)
for i, n := range exactNames {
args[i] = n
}
args[len(exactNames)] = clusterID
rows, err := tx.Query(query, args...)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query file IDs (exact): %v", err),
Stage: "insert",
})
return selectionIDs, errors
}
defer rows.Close()
for rows.Next() {
var id, fileName string
if err := rows.Scan(&id, &fileName); err == nil {
baseKey := strings.TrimSuffix(fileName, ".wav")
fileIDMap[baseKey] = id
}
}
}
var unmatchedBases []string
for _, bf := range baseList {
if _, found := fileIDMap[bf]; !found {
unmatchedBases = append(unmatchedBases, bf)
}
}
if len(unmatchedBases) > 0 {
rows, err := tx.Query(`SELECT id, file_name FROM file WHERE cluster_id = ? AND active = true`, clusterID)
if err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to query all files for pattern match: %v", err),
Stage: "insert",
})
return selectionIDs, errors
}
defer rows.Close()
var allFiles []struct {
ID string
FileName string
}
for rows.Next() {
var id, fileName string
if err := rows.Scan(&id, &fileName); err == nil {
allFiles = append(allFiles, struct {
ID string
FileName string
}{ID: id, FileName: fileName})
}
}
for _, bf := range unmatchedBases {
pattern, found := utils.ExtractDateTimePattern(bf)
if !found {
continue
}
var matches []struct {
ID string
FileName string
}
for _, f := range allFiles {
if strings.Contains(f.FileName, pattern) {
matches = append(matches, f)
}
}
if len(matches) == 1 {
fileIDMap[bf] = matches[0].ID
}
}
}
for _, sel := range selections {
fileID, fileFound := fileIDMap[sel.BaseFilename]
if !fileFound {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: "File ID not found (should have been caught in validation)",
Stage: "insert",
})
continue
}
speciesID, speciesFound := speciesIDMap[sel.SpeciesLabel]
if !speciesFound {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
Error: "Species ID not found (should have been caught in validation)",
Stage: "insert",
})
continue
}
selectionID, err := utils.GenerateShortID()
if err != nil {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Failed to generate selection ID: %v", err),
Stage: "insert",
})
continue
}
_, err = tx.Exec(`
INSERT INTO selection (
id, file_id, dataset_id, start_time, end_time,
created_at, last_modified, active
) VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, true)
`, selectionID, fileID, datasetID, sel.StartTime, sel.EndTime)
if err != nil {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Failed to insert selection: %v", err),
Stage: "insert",
})
continue
}
labelID, err := utils.GenerateShortID()
if err != nil {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Failed to generate label ID: %v", err),
Stage: "insert",
})
continue
}
_, err = tx.Exec(`
INSERT INTO label (
id, selection_id, species_id, filter_id, certainty,
created_at, last_modified, active
) VALUES (?, ?, ?, ?, NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, true)
`, labelID, selectionID, speciesID, filterID)
if err != nil {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Failed to insert label: %v", err),
Stage: "insert",
})
continue
}
if sel.CallTypeLabel != "" {
callTypeID := ""
if callTypeIDMap[sel.SpeciesLabel] != nil {
callTypeID = callTypeIDMap[sel.SpeciesLabel][sel.CallTypeLabel]
}
if callTypeID == "" {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: "Call type ID not found (should have been caught in validation)",
Stage: "insert",
})
continue
}
subtypeID, err := utils.GenerateShortID()
if err != nil {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Failed to generate label_subtype ID: %v", err),
Stage: "insert",
})
continue
}
_, err = tx.Exec(`
INSERT INTO label_subtype (
id, label_id, calltype_id, filter_id, certainty,
created_at, last_modified, active
) VALUES (?, ?, ?, ?, NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, true)
`, subtypeID, labelID, callTypeID, filterID)
if err != nil {
errors = append(errors, SelectionImportError{
FileName: sel.BaseFilename,
Species: sel.SpeciesLabel,
CallType: sel.CallTypeLabel,
Error: fmt.Sprintf("Failed to insert label_subtype: %v", err),
Stage: "insert",
})
continue
}
}
selectionIDs = append(selectionIDs, selectionID)
}
if err := tx.Commit(); err != nil {
errors = append(errors, SelectionImportError{
Error: fmt.Sprintf("Failed to commit transaction: %v", err),
Stage: "insert",
})
return []string{}, errors
}
return selectionIDs, errors
}
func placeholders(n int) string {
if n == 0 {
return ""
}
placeholders := make([]string, n)
for i := range placeholders {
placeholders[i] = "?"
}
return strings.Join(placeholders, ", ")
}