-- NOTE: DBML does not like functions and materialised views
-- from this: $npm install -g @dbml/cli
-- sql2dbml schema.sql --postgres -o schema.dbml
-- from this: $npm install -g @softwaretechnik/dbml-renderer
-- dbml-renderer -i schema.dbml -o schema.svg

CREATE TYPE dataset_type AS ENUM ('structured', 'unstructured', 'test', 'train');

CREATE TABLE dataset (
    id VARCHAR(12) PRIMARY KEY, 
    name VARCHAR(255) UNIQUE NOT NULL,
    description VARCHAR(255),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    type dataset_type NOT NULL DEFAULT 'structured'
);

CREATE TABLE location (
    id VARCHAR(12) PRIMARY KEY, 
    dataset_id VARCHAR(12) NOT NULL, 
    name VARCHAR(140) NOT NULL,
    latitude DECIMAL(10, 7) NOT NULL CHECK (latitude BETWEEN -90.0 AND 90.0), 
    longitude DECIMAL(10, 7) NOT NULL CHECK (longitude BETWEEN -180.0 AND 180.0), 
    description VARCHAR(255), 
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    timezone_id VARCHAR(40) NOT NULL, 
    FOREIGN KEY (dataset_id) REFERENCES dataset(id),
    UNIQUE (dataset_id, name)
);

CREATE TABLE cyclic_recording_pattern (
    id VARCHAR(12) PRIMARY KEY, 
    record_s INTEGER NOT NULL,
    sleep_s INTEGER NOT NULL,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    UNIQUE (record_s, sleep_s)
);

CREATE TABLE cluster (
    id VARCHAR(12) PRIMARY KEY, 
    dataset_id VARCHAR(12) NOT NULL, 
    location_id VARCHAR(12) NOT NULL, 
    name VARCHAR(140) NOT NULL,
    description VARCHAR(255), 
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    cyclic_recording_pattern_id VARCHAR(12),
    sample_rate INTEGER NOT NULL,
    path VARCHAR(255) NULL, 
    FOREIGN KEY (dataset_id) REFERENCES dataset(id),
    FOREIGN KEY (location_id) REFERENCES location(id),
    FOREIGN KEY (cyclic_recording_pattern_id) REFERENCES cyclic_recording_pattern(id),
    UNIQUE (location_id, name)
);

CREATE TYPE gain_level AS ENUM ('low', 'low-medium', 'medium', 'medium-high', 'high');

CREATE TABLE file (
    id VARCHAR(21) PRIMARY KEY, 
    file_name VARCHAR(255) NOT NULL,
    xxh64_hash VARCHAR(16) UNIQUE NOT NULL, 
    location_id VARCHAR(12), 
    timestamp_local TIMESTAMP WITH TIME ZONE NOT NULL, 
    cluster_id VARCHAR(12), 
    duration DECIMAL(7, 3) NOT NULL CHECK (duration > 0), 
    sample_rate INTEGER NOT NULL,
    description VARCHAR(255), 
    maybe_solar_night BOOLEAN, 
    maybe_civil_night BOOLEAN, 
    moon_phase DECIMAL(3,2) CHECK (moon_phase BETWEEN 0.00 AND 1.00), 
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (location_id) REFERENCES location(id),
    FOREIGN KEY (cluster_id) REFERENCES cluster(id)
);

CREATE TABLE moth_metadata (
    file_id VARCHAR(21) PRIMARY KEY,
    timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
    recorder_id VARCHAR(16), 
    gain gain_level NULL, 
    battery_v DECIMAL(2, 1) CHECK (battery_v >= 0), 
    temp_c DECIMAL(3, 1), 
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (file_id) REFERENCES file(id)
);

CREATE TABLE file_metadata (
    file_id VARCHAR(21) PRIMARY KEY,
    json JSON, 
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (file_id) REFERENCES file(id)
);


CREATE TABLE file_dataset (
    file_id VARCHAR(21) NOT NULL,
    dataset_id VARCHAR(12) NOT NULL,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (file_id, dataset_id),
    FOREIGN KEY (file_id) REFERENCES file(id),
    FOREIGN KEY (dataset_id) REFERENCES dataset(id)
);


CREATE TABLE segment(
    id VARCHAR(21) PRIMARY KEY, 
    file_id VARCHAR(21) NOT NULL, 
    dataset_id VARCHAR(12) NOT NULL, 
    start_time DECIMAL(7,3) NOT NULL, 
    end_time DECIMAL(7,3) NOT NULL, 
    freq_low DECIMAL(9,3) CHECK (freq_low < 300000), 
    freq_high DECIMAL(9,3) CHECK (freq_high < 300000), 
    description VARCHAR(255), 
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (file_id) REFERENCES file(id),
    FOREIGN KEY (dataset_id) REFERENCES dataset(id),
    FOREIGN KEY (file_id, dataset_id) REFERENCES file_dataset(file_id, dataset_id) 
);

CREATE TABLE ebird_taxonomy (
    id VARCHAR(12) PRIMARY KEY,
    taxonomy_version VARCHAR(4) NOT NULL,
    taxon_order INTEGER NOT NULL,
    category VARCHAR(15) NOT NULL,
    species_code VARCHAR(15) NOT NULL,
    taxon_concept_id VARCHAR(15),
    primary_com_name VARCHAR(100) NOT NULL,
    sci_name VARCHAR(100) NOT NULL,
    bird_order VARCHAR(30),
    family VARCHAR(100),
    species_group VARCHAR(100),
    report_as VARCHAR(15),
    valid_from DATE NOT NULL, -- Need to drop
    valid_to DATE, -- Need to drop
    active BOOLEAN DEFAULT TRUE,
    UNIQUE (species_code, taxonomy_version)
);

CREATE TABLE species (
    id VARCHAR(12) PRIMARY KEY, 
    label VARCHAR(100) UNIQUE NOT NULL,
    ebird_code VARCHAR(12), 
    taxonomy_version VARCHAR(4),
    description VARCHAR(255),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (ebird_code, taxonomy_version) REFERENCES ebird_taxonomy(species_code, taxonomy_version)
);

CREATE TABLE call_type (
    id VARCHAR(12) PRIMARY KEY, 
    species_id VARCHAR(12) NOT NULL, 
    label VARCHAR(100) NOT NULL, 
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (species_id) REFERENCES species(id)
);

CREATE TABLE filter (
    id VARCHAR(12) PRIMARY KEY, 
    name VARCHAR(140) NOT NULL,
    description VARCHAR(255),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN NOT NULL DEFAULT true
);

CREATE TABLE label (
    id VARCHAR(21) PRIMARY KEY, 
    segment_id VARCHAR(21) NOT NULL, 
    species_id VARCHAR(12) NOT NULL, 
    filter_id VARCHAR(12) NOT NULL,  
    certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN NOT NULL DEFAULT true,
    FOREIGN KEY (segment_id) REFERENCES segment(id),
    FOREIGN KEY (species_id) REFERENCES species(id),
    FOREIGN KEY (filter_id) REFERENCES filter(id)
);

CREATE TABLE label_metadata (
    label_id VARCHAR(21) PRIMARY KEY,
    json JSON,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN DEFAULT TRUE,
    FOREIGN KEY (label_id) REFERENCES label(id)
);

CREATE TABLE label_subtype (
    id VARCHAR(21) PRIMARY KEY, 
    label_id VARCHAR(21) NOT NULL, 
    calltype_id VARCHAR(12) NOT NULL, 
    filter_id VARCHAR(12),
    certainty DECIMAL(5,2) CHECK (certainty <= 100 AND certainty >= 0),
    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    last_modified TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
    active BOOLEAN NOT NULL DEFAULT true,
    FOREIGN KEY (label_id) REFERENCES label(id),
    FOREIGN KEY (calltype_id) REFERENCES call_type(id),
    FOREIGN KEY (filter_id) REFERENCES filter(id)
    );

-- FK indexes on file table (1.26M rows)
CREATE INDEX idx_file_location ON file(location_id);
CREATE INDEX idx_file_cluster ON file(cluster_id);
-- Performance index on file for time-based queries
CREATE INDEX idx_file_timestamp_local ON file(timestamp_local);
-- FK indexes on segment table (201K rows)
CREATE INDEX idx_segment_file ON segment(file_id);
CREATE INDEX idx_segment_dataset ON segment(dataset_id);
-- FK indexes on label table (200K rows)
CREATE INDEX idx_label_segment_id ON label(segment_id);
CREATE INDEX idx_label_species_id ON label(species_id);
-- FK indexes on label_subtype table (114K rows)
CREATE INDEX idx_label_subtype_label_id ON label_subtype(label_id);
CREATE INDEX idx_label_subtype_calltype_id ON label_subtype(calltype_id);
CREATE INDEX idx_label_subtype_filter_id ON label_subtype(filter_id);
-- FK lookup for ebird taxonomy (used by species table FK)
CREATE INDEX idx_ebird_taxonomy_species_code ON ebird_taxonomy(species_code, taxonomy_version);
-- Junction table reverse lookups
CREATE INDEX idx_file_dataset_dataset ON file_dataset(dataset_id);