package tools
import (
"context"
"database/sql"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"time"
"skraak/db"
"skraak/utils"
)
type ImportUnstructuredInput struct {
DatasetID string `json:"dataset_id"`
FolderPath string `json:"folder_path"`
Recursive *bool `json:"recursive,omitempty"`
}
type ImportUnstructuredOutput struct {
TotalFiles int `json:"total_files"`
ImportedFiles int `json:"imported_files"`
SkippedFiles int `json:"skipped_files"` FailedFiles int `json:"failed_files"`
TotalDuration float64 `json:"total_duration_seconds"`
ProcessingTime string `json:"processing_time"`
Errors []utils.FileImportError `json:"errors,omitempty"`
}
func ImportUnstructured(
ctx context.Context,
input ImportUnstructuredInput,
) (ImportUnstructuredOutput, error) {
startTime := time.Now()
var output ImportUnstructuredOutput
recursive := true
if input.Recursive != nil {
recursive = *input.Recursive
}
if err := validateUnstructuredInput(input); err != nil {
return output, fmt.Errorf("validation failed: %w", err)
}
files, scanErrors := scanWavFiles(input.FolderPath, recursive)
output.Errors = append(output.Errors, scanErrors...)
output.TotalFiles = len(files)
if len(files) == 0 {
output.ProcessingTime = time.Since(startTime).String()
return output, nil
}
err := db.WithWriteTx(ctx, dbPath, "import_unstructured", func(database *sql.DB, tx *db.LoggedTx) error {
for _, filePath := range files {
fileResult, procErr := processUnstructuredFile(tx, filePath, input.DatasetID)
if procErr != nil {
output.FailedFiles++
output.Errors = append(output.Errors, utils.FileImportError{
FileName: filepath.Base(filePath),
Error: procErr.Error(),
Stage: utils.StageProcess,
})
continue
}
if fileResult.Skipped {
output.SkippedFiles++
} else {
output.ImportedFiles++
output.TotalDuration += fileResult.Duration
}
}
return nil
})
if err != nil {
return output, err
}
output.ProcessingTime = time.Since(startTime).String()
return output, nil
}
type unstructuredFileResult struct {
Skipped bool Duration float64 }
func processUnstructuredFile(tx *db.LoggedTx, filePath, datasetID string) (*unstructuredFileResult, error) {
result := &unstructuredFileResult{}
metadata, err := utils.ParseWAVHeader(filePath)
if err != nil {
return nil, fmt.Errorf("WAV header parsing failed: %w", err)
}
hash, err := utils.ComputeXXH64(filePath)
if err != nil {
return nil, fmt.Errorf("hash calculation failed: %w", err)
}
_, isDuplicate, err := utils.CheckDuplicateHash(tx, hash)
if err != nil {
return nil, fmt.Errorf("duplicate check failed: %w", err)
}
if isDuplicate {
result.Skipped = true
result.Duration = metadata.Duration
return result, nil
}
fileID, err := utils.GenerateLongID()
if err != nil {
return nil, fmt.Errorf("ID generation failed: %w", err)
}
timestamp := metadata.FileModTime
_, err = tx.Exec(`
INSERT INTO file (
id, file_name, xxh64_hash, location_id, cluster_id,
timestamp_local, duration, sample_rate,
maybe_solar_night, maybe_civil_night, moon_phase,
active
) VALUES (?, ?, ?, NULL, NULL, ?, ?, ?, NULL, NULL, NULL, TRUE)
`,
fileID,
filepath.Base(filePath),
hash,
timestamp,
metadata.Duration,
metadata.SampleRate,
)
if err != nil {
return nil, fmt.Errorf("file insert failed: %w", err)
}
_, err = tx.Exec(
"INSERT INTO file_dataset (file_id, dataset_id) VALUES (?, ?)",
fileID, datasetID,
)
if err != nil {
return nil, fmt.Errorf("file_dataset insert failed: %w", err)
}
result.Duration = metadata.Duration
return result, nil
}
func validateUnstructuredInput(input ImportUnstructuredInput) error {
if err := utils.ValidateShortID(input.DatasetID, "dataset_id"); err != nil {
return err
}
info, err := os.Stat(input.FolderPath)
if err != nil {
return fmt.Errorf("folder not accessible: %w", err)
}
if !info.IsDir() {
return fmt.Errorf("path is not a directory: %s", input.FolderPath)
}
return db.WithReadDB(dbPath, func(database *sql.DB) error {
if _, err := db.DatasetExistsAndActive(database, input.DatasetID); err != nil {
return err
}
if err := db.ValidateDatasetTypeUnstructured(database, input.DatasetID); err != nil {
return err
}
return nil
})
}
func scanWavFiles(folderPath string, recursive bool) ([]string, []utils.FileImportError) {
var files []string
var errors []utils.FileImportError
walkFunc := func(path string, d fs.DirEntry, err error) error {
if err != nil {
errors = append(errors, utils.FileImportError{
FileName: path,
Error: err.Error(),
Stage: utils.StageScan,
})
return nil
}
if d.IsDir() {
if !recursive && path != folderPath {
return fs.SkipDir
}
return nil
}
if strings.HasSuffix(strings.ToLower(d.Name()), ".wav") {
files = append(files, path)
}
return nil
}
if recursive {
if err := filepath.WalkDir(folderPath, walkFunc); err != nil {
errors = append(errors, utils.FileImportError{
FileName: folderPath,
Error: err.Error(),
Stage: utils.StageScan,
})
}
} else {
entries, err := os.ReadDir(folderPath)
if err != nil {
errors = append(errors, utils.FileImportError{
FileName: folderPath,
Error: err.Error(),
Stage: utils.StageScan,
})
return nil, errors
}
for _, entry := range entries {
if !entry.IsDir() && strings.HasSuffix(strings.ToLower(entry.Name()), ".wav") {
files = append(files, filepath.Join(folderPath, entry.Name()))
}
}
}
return files, errors
}