E27ZWCDPESXDEHYZONCAKYL2U4K4ZLVXWX4453ICWSH4TGMQI4KQC // Parse format chunk - need at least 16 bytes of dataif chunkSize >= 16 && offset+16 <= len(data) {metadata.Channels = int(binary.LittleEndian.Uint16(data[offset+2 : offset+4]))metadata.SampleRate = int(binary.LittleEndian.Uint32(data[offset+4 : offset+8]))metadata.BitsPerSample = int(binary.LittleEndian.Uint16(data[offset+14 : offset+16]))}
parseFmtChunkData(data[offset:], chunkSize, metadata)
// Calculate duration from data chunk size// We only need the chunkSize from the header, not the actual audio dataif metadata.SampleRate > 0 && metadata.Channels > 0 && metadata.BitsPerSample > 0 {bytesPerSample := metadata.BitsPerSample / 8bytesPerSecond := metadata.SampleRate * metadata.Channels * bytesPerSampleif bytesPerSecond > 0 {metadata.Duration = float64(chunkSize) / float64(bytesPerSecond)}}// Data chunk content is the audio data - we don't need to read it
calcDataChunkDuration(chunkSize, metadata)
// Parse LIST chunk for INFO metadataif chunkSize >= 4 && offset+chunkSize <= len(data) {listType := string(data[offset : offset+4])if listType == "INFO" {parseINFOChunk(data[offset+4:offset+chunkSize], metadata)}}
parseLISTChunkData(data[offset:], chunkSize, metadata)
return metadata, nil
// parseFmtChunkData extracts format info from a fmt chunk.func parseFmtChunkData(data []byte, chunkSize int, m *WAVMetadata) {if chunkSize >= 16 && len(data) >= 16 {m.Channels = int(binary.LittleEndian.Uint16(data[2:4]))m.SampleRate = int(binary.LittleEndian.Uint32(data[4:8]))m.BitsPerSample = int(binary.LittleEndian.Uint16(data[14:16]))}}// calcDataChunkDuration computes duration from the data chunk size.func calcDataChunkDuration(chunkSize int, m *WAVMetadata) {if m.SampleRate > 0 && m.Channels > 0 && m.BitsPerSample > 0 {bytesPerSample := m.BitsPerSample / 8bytesPerSecond := m.SampleRate * m.Channels * bytesPerSampleif bytesPerSecond > 0 {m.Duration = float64(chunkSize) / float64(bytesPerSecond)}}
// numpy.arange(0, fullDuration, increment): half-open interval// stop when start >= fullDuration
switch finalClip {case FinalClipNone:return dedupClips(clipWindowsNone(starts, ends, fullDuration)), nilcase FinalClipRemainder:return dedupClips(clipWindowsRemainder(starts, ends, fullDuration)), nilcase FinalClipFull:return dedupClips(clipWindowsFull(starts, ends, fullDuration)), nilcase FinalClipExtend:return dedupClips(clipWindowsExtend(starts, ends)), nildefault:return nil, fmt.Errorf("invalid FinalClipMode %d", finalClip)}}// buildClipStartsEnds generates the start and end arrays for clips.func buildClipStartsEnds(fullDuration, clipDuration, clipOverlap float64, roundingPrecision int) ([]float64, []float64) {increment := clipDuration - clipOverlap
switch finalClip {case FinalClipNone:// Drop any window whose end exceeds fullDuration.kept := make([]ClipWindow, 0, len(starts))for i := range starts {if ends[i] <= fullDuration {kept = append(kept, ClipWindow{Start: starts[i], End: ends[i]})}
// clipWindowsNone drops any window whose end exceeds fullDuration.func clipWindowsNone(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {if ends[i] <= fullDuration {out = append(out, ClipWindow{Start: starts[i], End: ends[i]})
case FinalClipRemainder:// Trim ends > fullDuration down to fullDuration.out := make([]ClipWindow, 0, len(starts))for i := range starts {e := ends[i]if e > fullDuration {e = fullDuration}out = append(out, ClipWindow{Start: starts[i], End: e})
// clipWindowsRemainder trims ends beyond fullDuration down to fullDuration.func clipWindowsRemainder(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {e := ends[i]if e > fullDuration {e = fullDuration
case FinalClipFull:// Shift any window whose end exceeds fullDuration back so its end == fullDuration.// Keep clip length == clipDuration. Clamp start to >= 0 (audio shorter than clip_duration).out := make([]ClipWindow, 0, len(starts))for i := range starts {s := starts[i]e := ends[i]if e > fullDuration {delta := e - fullDurations -= deltae = fullDurationif s < 0 {s = 0}
// clipWindowsFull shifts windows whose end exceeds fullDuration back so end == fullDuration.func clipWindowsFull(starts, ends []float64, fullDuration float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {s, e := starts[i], ends[i]if e > fullDuration {s -= e - fullDuratione = fullDurationif s < 0 {s = 0
case FinalClipExtend:// Keep ends as-is, even past fullDuration.out := make([]ClipWindow, 0, len(starts))for i := range starts {out = append(out, ClipWindow{Start: starts[i], End: ends[i]})}return dedupClips(out), nildefault:return nil, fmt.Errorf("invalid FinalClipMode %d", finalClip)
// clipWindowsExtend keeps ends as-is, even past fullDuration.func clipWindowsExtend(starts, ends []float64) []ClipWindow {out := make([]ClipWindow, 0, len(starts))for i := range starts {out = append(out, ClipWindow{Start: starts[i], End: ends[i]})
datasetType := db.DatasetTypeStructured // Defaultif input.Type != nil {typeStr := strings.ToLower(strings.TrimSpace(*input.Type))switch typeStr {case "structured":datasetType = db.DatasetTypeStructuredcase "unstructured":datasetType = db.DatasetTypeUnstructuredcase "test":datasetType = db.DatasetTypeTestcase "train":datasetType = db.DatasetTypeTraindefault:return output, fmt.Errorf("invalid type '%s': must be 'structured', 'unstructured', 'test', or 'train'", *input.Type)}
datasetType, err := parseDatasetType(input.Type)if err != nil {return output, err
// Dataset with this name already exists - return existing (consistent duplicate handling)var dataset db.Dataseterr = tx.QueryRowContext(ctx,"SELECT id, name, description, created_at, last_modified, active, type FROM dataset WHERE id = ?",existingID,).Scan(&dataset.ID, &dataset.Name, &dataset.Description, &dataset.CreatedAt, &dataset.LastModified, &dataset.Active, &dataset.Type)if err != nil {return output, fmt.Errorf("failed to fetch existing dataset: %w", err)}
return handleExistingDataset(ctx, tx, existingID)}return insertNewDataset(ctx, tx, *input.Name, input.Description, datasetType)}
if err = tx.Commit(); err != nil {return output, fmt.Errorf("failed to commit transaction: %w", err)
// parseDatasetType validates and returns the dataset type from input.func parseDatasetType(t *string) (db.DatasetType, error) {datasetType := db.DatasetTypeStructured // Defaultif t != nil {typeStr := strings.ToLower(strings.TrimSpace(*t))switch typeStr {case "structured":datasetType = db.DatasetTypeStructuredcase "unstructured":datasetType = db.DatasetTypeUnstructuredcase "test":datasetType = db.DatasetTypeTestcase "train":datasetType = db.DatasetTypeTraindefault:return "", fmt.Errorf("invalid type '%s': must be 'structured', 'unstructured', 'test', or 'train'", *t)
output.Dataset = datasetoutput.Message = fmt.Sprintf("Dataset with name '%s' already exists (ID: %s) - returning existing dataset", dataset.Name, dataset.ID)return output, nil
// handleExistingDataset returns an existing dataset found by ID within a transaction.func handleExistingDataset(ctx context.Context, tx *db.LoggedTx, existingID string) (DatasetOutput, error) {var dataset db.Dataseterr := tx.QueryRowContext(ctx,"SELECT id, name, description, created_at, last_modified, active, type FROM dataset WHERE id = ?",existingID,).Scan(&dataset.ID, &dataset.Name, &dataset.Description, &dataset.CreatedAt, &dataset.LastModified, &dataset.Active, &dataset.Type)if err != nil {return DatasetOutput{}, fmt.Errorf("failed to fetch existing dataset: %w", err)}if err = tx.Commit(); err != nil {return DatasetOutput{}, fmt.Errorf("failed to commit transaction: %w", err)
// Generate ID
return DatasetOutput{Dataset: dataset,Message: fmt.Sprintf("Dataset with name '%s' already exists (ID: %s) - returning existing dataset", dataset.Name, dataset.ID),}, nil}// insertNewDataset inserts a new dataset row and returns it within a transaction.func insertNewDataset(ctx context.Context, tx *db.LoggedTx, name string, description *string, datasetType db.DatasetType) (DatasetOutput, error) {
// Verify dataset exists and is activedatasetName, err := verifyDatasetForCluster(ctx, tx, *input.DatasetID)if err != nil {return output, err}
// Verify location exists, is active, and belongs to the specified datasetlocationName, err := verifyLocationForCluster(ctx, tx, *input.LocationID, *input.DatasetID, datasetName)
// Verify parent references exist and are activedatasetName, locationName, err := verifyClusterParentRefs(ctx, tx, input)
}// Verify cyclic recording pattern if providedif input.CyclicRecordingPatternID != nil && strings.TrimSpace(*input.CyclicRecordingPatternID) != "" {if err := verifyPatternExists(ctx, tx, *input.CyclicRecordingPatternID); err != nil {return output, err}
// Generate ID and insert
return insertNewCluster(ctx, tx, input, datasetName, locationName)}// verifyClusterParentRefs validates that the dataset, location, and optional pattern exist and are active.func verifyClusterParentRefs(ctx context.Context, tx *db.LoggedTx, input ClusterInput) (string, string, error) {datasetName, err := verifyDatasetForCluster(ctx, tx, *input.DatasetID)if err != nil {return "", "", err}locationName, err := verifyLocationForCluster(ctx, tx, *input.LocationID, *input.DatasetID, datasetName)if err != nil {return "", "", err}if input.CyclicRecordingPatternID != nil && strings.TrimSpace(*input.CyclicRecordingPatternID) != "" {if err := verifyPatternExists(ctx, tx, *input.CyclicRecordingPatternID); err != nil {return "", "", err}}return datasetName, locationName, nil}// insertNewCluster inserts a new cluster row and returns it within a transaction.func insertNewCluster(ctx context.Context, tx *db.LoggedTx, input ClusterInput, datasetName, locationName string) (ClusterOutput, error) {
return output, nil
return ClusterOutput{Cluster: cluster,Message: fmt.Sprintf("Successfully created cluster '%s' with ID %s in location '%s' at dataset '%s' (sample rate: %d Hz)",cluster.Name, cluster.ID, locationName, datasetName, cluster.SampleRate),}, nil
// Use DuckDB's duckdb_constraints() function for accurate FK infoquery := `
dependsOnMe, tables, err := buildFKDependencyGraph(db)if err != nil {return nil, err}if err := collectAllTables(db, tables); err != nil {return nil, err}return topologicalSort(tables, dependsOnMe), nil}// buildFKDependencyGraph queries FK constraints and builds a reverse dependency graph.// Returns dependsOnMe (referenced table -> list of tables that depend on it) and// the set of tables seen.func buildFKDependencyGraph(db *sql.DB) (map[string][]string, map[string]bool, error) {dependsOnMe := make(map[string][]string)tables := make(map[string]bool)rows, err := db.Query(`
// Get all tables from the databasetableRows, err := db.Query(`
// collectAllTables adds all base tables from the database schema to the tables set.func collectAllTables(db *sql.DB, tables map[string]bool) error {rows, err := db.Query(`
// Count how many FKs each table has (tables it depends on)fkCount := make(map[string]int)
// topologicalSort orders tables so dependencies come first (Kahn's algorithm).// Tables in cycles are appended at the end.func topologicalSort(tables map[string]bool, dependsOnMe map[string][]string) []string {fkCount := make(map[string]int, len(tables))