package utils
import (
"encoding/json"
"fmt"
"os"
"sort"
"strings"
)
type SpeciesMapping struct {
Species string `json:"species"`
Calltypes map[string]string `json:"calltypes,omitempty"`
}
type MappingFile map[string]SpeciesMapping
func LoadMappingFile(path string) (MappingFile, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read mapping file: %w", err)
}
var mapping MappingFile
if err := json.Unmarshal(data, &mapping); err != nil {
return nil, fmt.Errorf("failed to parse mapping JSON: %w", err)
}
if len(mapping) == 0 {
return nil, fmt.Errorf("mapping file is empty")
}
for dataSpecies, sm := range mapping {
if sm.Species == "" {
return nil, fmt.Errorf("mapping entry '%s' has empty species field", dataSpecies)
}
}
return mapping, nil
}
type MappingValidationResult struct {
MissingSpecies []string MissingDBSpecies []string MissingCalltypes map[string]string }
func (r MappingValidationResult) HasErrors() bool {
return len(r.MissingSpecies) > 0 ||
len(r.MissingDBSpecies) > 0 ||
len(r.MissingCalltypes) > 0
}
func (r MappingValidationResult) Error() string {
var parts []string
if len(r.MissingSpecies) > 0 {
parts = append(parts, fmt.Sprintf("species in .data but not in mapping: [%s]",
strings.Join(r.MissingSpecies, ", ")))
}
if len(r.MissingDBSpecies) > 0 {
parts = append(parts, fmt.Sprintf("mapped species not found in DB: [%s]",
strings.Join(r.MissingDBSpecies, ", ")))
}
if len(r.MissingCalltypes) > 0 {
var ctErrors []string
for k, v := range r.MissingCalltypes {
ctErrors = append(ctErrors, fmt.Sprintf("%s->%s", k, v))
}
sort.Strings(ctErrors)
parts = append(parts, fmt.Sprintf("calltypes not found in DB: [%s]",
strings.Join(ctErrors, ", ")))
}
return strings.Join(parts, "; ")
}
func ValidateMappingAgainstDB(
queryer DB,
mapping MappingFile,
dataSpeciesSet map[string]bool,
dataCalltypes map[string]map[string]bool, ) (MappingValidationResult, error) {
result := MappingValidationResult{
MissingSpecies: make([]string, 0),
MissingDBSpecies: make([]string, 0),
MissingCalltypes: make(map[string]string),
}
for species := range dataSpeciesSet {
if _, exists := mapping[species]; !exists {
result.MissingSpecies = append(result.MissingSpecies, species)
}
}
sort.Strings(result.MissingSpecies)
mappedSpeciesSet, mappedCalltypes := collectMappedLabels(mapping, dataCalltypes)
if err := validateMappedSpecies(queryer, mappedSpeciesSet, &result); err != nil {
return result, err
}
if err := validateMappedCalltypes(queryer, mappedCalltypes, &result); err != nil {
return result, err
}
return result, nil
}
func collectMappedLabels(mapping MappingFile, dataCalltypes map[string]map[string]bool) (map[string]bool, map[string]map[string]string) {
mappedSpeciesSet := make(map[string]bool)
mappedCalltypes := make(map[string]map[string]string)
for _, sm := range mapping {
if sm.Species == MappingNegative || sm.Species == MappingIgnore {
continue
}
mappedSpeciesSet[sm.Species] = true
if len(sm.Calltypes) > 0 {
if mappedCalltypes[sm.Species] == nil {
mappedCalltypes[sm.Species] = make(map[string]string)
}
for dataCT, dbCT := range sm.Calltypes {
mappedCalltypes[sm.Species][dbCT] = dataCT
}
}
}
for dataSpecies, ctSet := range dataCalltypes {
sm, exists := mapping[dataSpecies]
if !exists {
continue }
dbSpecies := sm.Species
for dataCT := range ctSet {
dbCT := dataCT
if sm.Calltypes != nil {
if mapped, ok := sm.Calltypes[dataCT]; ok {
dbCT = mapped
}
}
if mappedCalltypes[dbSpecies] == nil {
mappedCalltypes[dbSpecies] = make(map[string]string)
}
mappedCalltypes[dbSpecies][dbCT] = dataCT
}
}
return mappedSpeciesSet, mappedCalltypes
}
func validateMappedSpecies(queryer DB, mappedSpeciesSet map[string]bool, result *MappingValidationResult) error {
speciesLabels := make([]string, 0, len(mappedSpeciesSet))
for s := range mappedSpeciesSet {
speciesLabels = append(speciesLabels, s)
}
sort.Strings(speciesLabels)
if len(speciesLabels) == 0 {
return nil
}
query := `SELECT label FROM species WHERE label IN (` + Placeholders(len(speciesLabels)) + `) AND active = true`
args := make([]any, len(speciesLabels))
for i, s := range speciesLabels {
args[i] = s
}
rows, err := queryer.Query(query, args...)
if err != nil {
return fmt.Errorf("failed to query species: %w", err)
}
defer rows.Close()
foundSpecies := make(map[string]bool)
for rows.Next() {
var label string
if err := rows.Scan(&label); err == nil {
foundSpecies[label] = true
}
}
for _, s := range speciesLabels {
if !foundSpecies[s] {
result.MissingDBSpecies = append(result.MissingDBSpecies, s)
}
}
return nil
}
func validateMappedCalltypes(queryer DB, mappedCalltypes map[string]map[string]string, result *MappingValidationResult) error {
for dbSpecies, ctMap := range mappedCalltypes {
if len(ctMap) == 0 {
continue
}
ctLabels := make([]string, 0, len(ctMap))
for dbCT := range ctMap {
ctLabels = append(ctLabels, dbCT)
}
sort.Strings(ctLabels)
query := `
SELECT ct.label
FROM call_type ct
JOIN species s ON ct.species_id = s.id
WHERE s.label = ? AND ct.label IN (` + Placeholders(len(ctLabels)) + `) AND ct.active = true`
args := make([]any, 1+len(ctLabels))
args[0] = dbSpecies
for i, ct := range ctLabels {
args[1+i] = ct
}
rows, err := queryer.Query(query, args...)
if err != nil {
return fmt.Errorf("failed to query calltypes for species %s: %w", dbSpecies, err)
}
defer rows.Close()
foundCT := make(map[string]bool)
for rows.Next() {
var label string
if err := rows.Scan(&label); err == nil {
foundCT[label] = true
}
}
for dbCT, dataCT := range ctMap {
if !foundCT[dbCT] {
key := fmt.Sprintf("%s/%s", dbSpecies, dataCT)
value := fmt.Sprintf("%s/%s", dbSpecies, dbCT)
result.MissingCalltypes[key] = value
}
}
}
return nil
}
func (m MappingFile) GetDBSpecies(dataSpecies string) (string, bool) {
sm, exists := m[dataSpecies]
if !exists {
return "", false
}
return sm.Species, true
}
func (m MappingFile) GetDBCalltype(dataSpecies, dataCalltype string) string {
sm, exists := m[dataSpecies]
if !exists || sm.Calltypes == nil {
return dataCalltype
}
if dbCT, ok := sm.Calltypes[dataCalltype]; ok {
return dbCT
}
return dataCalltype
}
const (
MappingNegative = "__NEGATIVE__"
MappingIgnore = "__IGNORE__"
)
type MappingKind int
const (
MappingReal MappingKind = iota
MappingNeg
MappingIgn
)
func (m MappingFile) Classify(dataSpecies string) (canonical string, kind MappingKind, ok bool) {
sm, exists := m[dataSpecies]
if !exists {
return "", MappingReal, false
}
switch sm.Species {
case MappingNegative:
return "", MappingNeg, true
case MappingIgnore:
return "", MappingIgn, true
default:
return sm.Species, MappingReal, true
}
}
func (m MappingFile) ValidateCoversSpecies(speciesSet map[string]bool) []string {
missing := make([]string, 0)
for s := range speciesSet {
if _, exists := m[s]; !exists {
missing = append(missing, s)
}
}
sort.Strings(missing)
return missing
}
func (m MappingFile) Classes() []string {
set := make(map[string]bool)
for _, sm := range m {
switch sm.Species {
case MappingNegative, MappingIgnore, "":
continue
default:
set[sm.Species] = true
}
}
out := make([]string, 0, len(set))
for s := range set {
out = append(out, s)
}
sort.Strings(out)
return out
}