package cmd
import (
"context"
"encoding/json"
"flag"
"fmt"
"os"
"skraak/db"
"skraak/tools"
)
func RunImport(args []string) {
if len(args) < 1 {
printImportUsage()
os.Exit(1)
}
switch args[0] {
case "bulk":
runImportBulk(args[1:])
case "file":
runImportFile(args[1:])
case "folder":
runImportFolder(args[1:])
case "segments":
runImportSegments(args[1:])
case "unstructured":
runImportUnstructured(args[1:])
default:
fmt.Fprintf(os.Stderr, "Unknown import subcommand: %s\n\n", args[0])
printImportUsage()
os.Exit(1)
}
}
func printImportUsage() {
fmt.Fprintf(os.Stderr, "Usage: skraak import <subcommand> [options]\n\n")
fmt.Fprintf(os.Stderr, "Subcommands:\n")
fmt.Fprintf(os.Stderr, " bulk Bulk import WAV files from CSV (structured datasets)\n")
fmt.Fprintf(os.Stderr, " file Import a single WAV file (structured datasets)\n")
fmt.Fprintf(os.Stderr, " folder Import all WAV files from a folder (structured datasets)\n")
fmt.Fprintf(os.Stderr, " segments Import segments from AviaNZ .data files (structured datasets)\n")
fmt.Fprintf(os.Stderr, " unstructured Import WAV files into unstructured dataset (no location/cluster)\n")
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import bulk --db ./db/skraak.duckdb --dataset abc123 --csv import.csv --log progress.log\n")
fmt.Fprintf(os.Stderr, " skraak import file --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --file /path/to/file.wav\n")
fmt.Fprintf(os.Stderr, " skraak import folder --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --folder /path/to/folder\n")
fmt.Fprintf(os.Stderr, " skraak import segments --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --folder /path/to/folder --mapping mapping.json\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --folder /path/to/folder\n")
}
func runImportBulk(args []string) {
fs := flag.NewFlagSet("import bulk", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
csvPath := fs.String("csv", "", "Path to CSV file (required)")
logPath := fs.String("log", "", "Path to progress log file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import bulk [options]\n\n")
fmt.Fprintf(os.Stderr, "Bulk import WAV files across multiple locations/clusters using a CSV file.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nCSV format: location_name,location_id,directory_path,date_range,sample_rate,file_count\n")
fmt.Fprintf(os.Stderr, "\nMonitor progress: tail -f <log-file>\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *csvPath == "" {
missing = append(missing, "--csv")
}
if *logPath == "" {
missing = append(missing, "--log")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.BulkFileImportInput{
DatasetID: *datasetID,
CSVPath: *csvPath,
LogFilePath: *logPath,
}
fmt.Fprintf(os.Stderr, "Starting bulk import...\n")
fmt.Fprintf(os.Stderr, " Database: %s\n", *dbPath)
fmt.Fprintf(os.Stderr, " Dataset: %s\n", *datasetID)
fmt.Fprintf(os.Stderr, " CSV: %s\n", *csvPath)
fmt.Fprintf(os.Stderr, " Log: %s\n", *logPath)
fmt.Fprintf(os.Stderr, "\nMonitor progress: tail -f %s\n\n", *logPath)
output, err := tools.BulkFileImport(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
if output.TotalLocations > 0 || output.FilesImported > 0 {
printJSON(output)
}
os.Exit(1)
}
printJSON(output)
}
func runImportFile(args []string) {
fs := flag.NewFlagSet("import file", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
filePath := fs.String("file", "", "Path to WAV file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import file [options]\n\n")
fmt.Fprintf(os.Stderr, "Import a single WAV file into the database.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import file --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --file /path/to/file.wav\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *locationID == "" {
missing = append(missing, "--location")
}
if *clusterID == "" {
missing = append(missing, "--cluster")
}
if *filePath == "" {
missing = append(missing, "--file")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportFileInput{
FilePath: *filePath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
}
fmt.Fprintf(os.Stderr, "Importing file: %s\n", *filePath)
output, err := tools.ImportFile(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
printJSON(output)
}
func runImportFolder(args []string) {
fs := flag.NewFlagSet("import folder", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
folderPath := fs.String("folder", "", "Path to folder containing WAV files (required)")
recursive := fs.Bool("recursive", true, "Scan subfolders recursively (default: true)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import folder [options]\n\n")
fmt.Fprintf(os.Stderr, "Import all WAV files from a folder into the database.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import folder --db ./db/skraak.duckdb --dataset abc123 --location loc456 --cluster clust789 --folder /path/to/folder\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *locationID == "" {
missing = append(missing, "--location")
}
if *clusterID == "" {
missing = append(missing, "--cluster")
}
if *folderPath == "" {
missing = append(missing, "--folder")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportAudioFilesInput{
FolderPath: *folderPath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
Recursive: recursive,
}
fmt.Fprintf(os.Stderr, "Importing from folder: %s\n", *folderPath)
if *recursive {
fmt.Fprintf(os.Stderr, "Scanning recursively...\n")
}
output, err := tools.ImportAudioFiles(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
if len(output.FileIDs) > 0 {
printJSON(output)
}
os.Exit(1)
}
printJSON(output)
}
func runImportSegments(args []string) {
fs := flag.NewFlagSet("import segments", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required)")
locationID := fs.String("location", "", "Location ID (required)")
clusterID := fs.String("cluster", "", "Cluster ID (required)")
folderPath := fs.String("folder", "", "Path to folder containing .data files (required)")
mappingPath := fs.String("mapping", "", "Path to mapping JSON file (required)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import segments [options]\n\n")
fmt.Fprintf(os.Stderr, "Import segments from AviaNZ .data files into the database.\n")
fmt.Fprintf(os.Stderr, "Applies species/calltype mapping from JSON file.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nMapping file format:\n")
fmt.Fprintf(os.Stderr, " {\n")
fmt.Fprintf(os.Stderr, " \"GSK\": {\"species\": \"Roroa\", \"calltypes\": {\"Male\": \"Male - Solo\"}},\n")
fmt.Fprintf(os.Stderr, " \"Don't Know\": {\"species\": \"Don't Know\"}\n")
fmt.Fprintf(os.Stderr, " }\n")
fmt.Fprintf(os.Stderr, "\nInvariants:\n")
fmt.Fprintf(os.Stderr, " - All file hashes must already exist in database for the cluster\n")
fmt.Fprintf(os.Stderr, " - All files must have no existing labels (fresh imports only)\n")
fmt.Fprintf(os.Stderr, " - All filters, species, and calltypes must exist in database\n")
fmt.Fprintf(os.Stderr, " - Bookmark flags are ignored (not stored in database)\n")
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import segments --db ./db/skraak.duckdb --dataset dset_id123 --location loc_id456 --cluster clust_id789 --folder /path/to/data --mapping mapping.json\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *locationID == "" {
missing = append(missing, "--location")
}
if *clusterID == "" {
missing = append(missing, "--cluster")
}
if *folderPath == "" {
missing = append(missing, "--folder")
}
if *mappingPath == "" {
missing = append(missing, "--mapping")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportSegmentsInput{
Folder: *folderPath,
Mapping: *mappingPath,
DatasetID: *datasetID,
LocationID: *locationID,
ClusterID: *clusterID,
ProgressHandler: func(processed, total int, message string) {
if total > 0 {
percent := float64(processed) / float64(total) * 100
fmt.Fprintf(os.Stderr, "\rProcessing .data files: %d/%d (%.0f%%) - %s", processed, total, percent, message)
if processed == total {
fmt.Fprintf(os.Stderr, "\n")
}
}
},
}
fmt.Fprintf(os.Stderr, "Importing segments from: %s\n", *folderPath)
fmt.Fprintf(os.Stderr, "Using mapping: %s\n", *mappingPath)
output, err := tools.ImportSegments(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "\nError: %v\n", err)
if len(output.Segments) > 0 || len(output.Errors) > 0 {
printJSON(output)
}
os.Exit(1)
}
fmt.Fprintf(os.Stderr, "\nImport complete:\n")
fmt.Fprintf(os.Stderr, " Data files processed: %d\n", output.Summary.DataFilesProcessed)
fmt.Fprintf(os.Stderr, " Segments imported: %d\n", output.Summary.ImportedSegments)
fmt.Fprintf(os.Stderr, " Labels imported: %d\n", output.Summary.ImportedLabels)
fmt.Fprintf(os.Stderr, " Subtypes imported: %d\n", output.Summary.ImportedSubtypes)
printJSON(output)
}
func runImportUnstructured(args []string) {
fs := flag.NewFlagSet("import unstructured", flag.ExitOnError)
dbPath := fs.String("db", "", "Path to DuckDB database (required)")
datasetID := fs.String("dataset", "", "Dataset ID (required - must be 'unstructured' type)")
folderPath := fs.String("folder", "", "Path to folder containing WAV files (required)")
recursive := fs.Bool("recursive", true, "Scan subfolders recursively (default: true)")
fs.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: skraak import unstructured [options]\n\n")
fmt.Fprintf(os.Stderr, "Import WAV files into an unstructured dataset.\n")
fmt.Fprintf(os.Stderr, "Files are stored with minimal metadata (hash, duration, sample_rate, file modification time).\n")
fmt.Fprintf(os.Stderr, "No location/cluster hierarchy required.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
fs.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --folder /path/to/folder\n")
fmt.Fprintf(os.Stderr, " skraak import unstructured --db ./db/skraak.duckdb --dataset abc123 --folder /path/to/folder --recursive=false\n")
}
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
missing := []string{}
if *dbPath == "" {
missing = append(missing, "--db")
}
if *datasetID == "" {
missing = append(missing, "--dataset")
}
if *folderPath == "" {
missing = append(missing, "--folder")
}
if len(missing) > 0 {
fmt.Fprintf(os.Stderr, "Error: missing required flags: %v\n\n", missing)
fs.Usage()
os.Exit(1)
}
tools.SetDBPath(*dbPath)
eventLogPath := *dbPath + ".events.jsonl"
db.SetEventLogConfig(db.EventLogConfig{
Enabled: true,
Path: eventLogPath,
})
defer db.CloseEventLog()
input := tools.ImportUnstructuredInput{
DatasetID: *datasetID,
FolderPath: *folderPath,
Recursive: recursive,
}
fmt.Fprintf(os.Stderr, "Importing into unstructured dataset: %s\n", *datasetID)
fmt.Fprintf(os.Stderr, "Scanning folder: %s\n", *folderPath)
if *recursive {
fmt.Fprintf(os.Stderr, "Scanning recursively...\n")
}
output, err := tools.ImportUnstructured(context.Background(), input)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
printJSON(output)
}
func printJSON(v any) {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(v)
}