FCL6FKHMM6LX7HNI7F3CH4GRUIXCC2APQIZUKTJSLSIS6SRP2SPQC
# Train.jl
export train #beware Flux.train! is not Skraak.train
import Base: length, getindex
import MLBase
using CUDA, Dates, Images, Flux, Glob, JLD2, Noise
using Random: shuffle!, seed!
using Metalhead: ResNet
#=
function train(
model_name::String,
train_epochs::Int64,
images::Vector{String},
pretrain::Model=true,
train_test_split::Float64 = 0.8,
batch_size::Int64 = 64,
)
Note:
Dont forget temp env, julia -t 4
Assumes 224x224 pixel RGB images as png's
Saves jld2's in current directory
Use like:
using Skraak, Glob
images = Glob.glob("kiwi_set*/*/[N,K]/*.png") #11699814-element Vector{String}
model = "/media/david/SSD2/PrimaryDataset/model_K1-9_original_set_CPU_epoch-7-0.9924-2024-03-05.jld2"
train("K1-10_total_set_no_augumentation", 2, images, model, 0.97, 64)
images = Glob.glob("*/[D,F,M,N]/*.png") #from SSD2/Clips
model = "/media/david/SSD2/PrimaryDataset/model_K1-5_CPU_epoch-6-0.9795-2023-12-16.jld2"
train("DFMN1-5", 20, images, model)
=#
const LABELTOINDEX::Dict{String,Int32} = Dict()
Model = Union{Bool,String}
function train(
model_name::String,
train_epochs::Int64,
images::Vector{String}, #glob_pattern::String = "*/*.png"
pretrain::Model = true,
train_test_split::Float64 = 0.8,
batch_size::Int64 = 64,
)
epochs = 1:train_epochs
#images = Glob.glob(glob_pattern) #|> shuffle! |> x -> x[1:640]
@assert !isempty(images) "No png images found"
@info "$(length(images)) images in dataset"
label_to_index = labels_to_dict(images)
register_label_to_index!(label_to_index)
@info "Text labels translate to: " label_to_index
classes = length(label_to_index)
@assert classes >= 2 "At least 2 label classes are required, for example: kiwi, not_kiwi"
@info "$classes classes in dataset"
@info "Device: $device"
ceiling = seil(length(images), batch_size)
train_test_index = train_test_idx(ceiling, batch_size, train_test_split)
train, train_sample, test = process_data(images, train_test_index, ceiling, batch_size)
@info "Made data loaders"
model = load_model(pretrain, classes)
@info "Loaded model"
opt = Flux.setup(Flux.Optimisers.Adam(1e-5), model)
@info "Setup optimiser"
@info "Training for $epochs epochs: " now()
training_loop!(
model,
opt,
train,
train_sample,
test,
epochs,
model_name,
classes,
label_to_index,
)
@info "Finished $(last(epochs)) epochs: " now()
end
struct ImageContainer{T<:Vector}
img::T
end
struct ValidationImageContainer{T<:Vector}
img::T
end
Container = Union{ImageContainer,ValidationImageContainer}
function seil(n::Int, batch_size::Int)
return n ÷ batch_size * batch_size
end
function train_test_idx(ceiling::Int, batch_size::Int, train_test_split::Float64)::Int
t =
#! format: off
ceiling ÷ batch_size * train_test_split |>
round |>
x -> x * batch_size |>
x -> convert(Int, x)
#! format: on
end
function labels_to_dict(list::Vector{String})::Dict{String,Int32}
l =
#! format: off
map(x -> split(x, "/")[end-1], list) |>
unique |>
sort |>
x -> zip(x, 1:length(x)) |>
Dict
#! format: on
return l
end
"""
register_label_to_index!(label_to_index::Dict{String,Int32})
This will replace the content of the global variable LABELTOINDEX
with the content intended by the caller.
Thanks algunion
https://discourse.julialang.org/t/dataloader-scope-troubles/105207/4
"""
function register_label_to_index!(label_to_index::Dict{String,Int32})
empty!(LABELTOINDEX)
merge!(LABELTOINDEX, label_to_index)
end
device = CUDA.functional() ? gpu : cpu
function process_data(array_of_file_names, train_test_index, ceiling, batch_size)
seed!(1234)
images = shuffle!(array_of_file_names)
train =
ImageContainer(images[1:train_test_index]) |> x -> make_dataloader(x, batch_size)
train_sample =
ValidationImageContainer(images[1:(ceiling-train_test_index)]) |>
x -> make_dataloader(x, batch_size)
test =
ValidationImageContainer(images[train_test_index+1:ceiling]) |>
x -> make_dataloader(x, batch_size)
return train, train_sample, test
end
length(data::ImageContainer) = length(data.img)
length(data::ValidationImageContainer) = length(data.img)
function getindex(data::ImageContainer{Vector{String}}, index::Int)
path = data.img[index]
img =
#! format: off
Images.load(path) |>
#x -> Images.imresize(x, 224, 224) |>
#x -> Images.RGB.(x) |>
x -> Noise.add_gauss(x, (rand() * 0.2)) |>
x -> apply_mask!(x, 3, 3, 12) |>
x -> collect(channelview(float32.(x))) |>
x -> permutedims(x, (3, 2, 1))
#! format: on
y = LABELTOINDEX[(split(path, "/")[end-1])]
return img, y
end
function getindex(data::ValidationImageContainer{Vector{String}}, index::Int)
path = data.img[index]
img =
#! format: off
Images.load(path) |>
#x -> Images.imresize(x, 224, 224) |>
#x -> Images.RGB.(x) |>
x -> collect(channelview(float32.(x))) |>
x -> permutedims(x, (3, 2, 1))
#! format: on
y = LABELTOINDEX[(split(path, "/")[end-1])]
return img, y
end
# assumes 224px square images
function apply_mask!(
img::Array{RGB{N0f8},2},
max_number::Int = 3,
min_size::Int = 3,
max_size::Int = 22,
)
# horizontal
for range in get_random_ranges(max_number, min_size, max_size)
img[range, :] .= RGB{N0f8}(0.7, 0.7, 0.7)
end
# vertical
for range in get_random_ranges(max_number, min_size, max_size)
img[:, range] .= RGB{N0f8}(0.7, 0.7, 0.7)
end
return img
end
# assumes 224px square images
function get_random_ranges(max_number::Int, min_size::Int, max_size::Int)
number = rand(0:max_number)
ranges = []
while length(ranges) < number
start = rand(1:224)
size = rand(min_size:max_size)
if start + size > 224
continue
end
push!(ranges, start:start+size)
end
return ranges
end
function make_dataloader(container::Container, batch_size::Int)
data =
Flux.DataLoader(container; batchsize = batch_size, collate = true, parallel = true)
device == gpu ? data = CuIterator(data) : nothing
return data
end
# see load_model() from predict, and below
function load_model(pretrain::Bool, classes::Int64)
fst = Metalhead.ResNet(18, pretrain = pretrain).layers
lst = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => classes))
model = Flux.Chain(fst[1], lst) |> device
return model
end
#If model classes == desired classes I don't empty the last layer
#That means that I can just train from where I left off for new data, DFMN model
#Could be a gotcha if I want to train a different 4 class model, no need for a switch just yet
function load_model(model_path::String, classes::Int64)
model_state = JLD2.load(model_path, "model_state")
model_classes = length(model_state[1][2][1][3][2])
f = Metalhead.ResNet(18, pretrain = false).layers
l = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))
m = Flux.Chain(f[1], l)
Flux.loadmodel!(m, model_state)
if classes == model_classes
model = m |> device
else
fst = m.layers
lst = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => classes))
model = Flux.Chain(fst[1], lst) |> device
end
return model
end
function evaluate(m, d, c)
good = 0
count = 0
pred = Int64[]
actual = Int64[]
for (x, y) in d
p = Flux.onecold(m(x))
good += sum(p .== y)
count += length(y)
append!(pred, p)
append!(actual, y)
end
accuracy = round(good / count, digits = 4)
confusion_matrix = MLBase.confusmat(c, actual, pred)
#freqtable(DataFrames.DataFrame(targets = actual, predicts = pred), :targets, :predicts)
#roc=MLBase.roc(actual, pred, 100)
#f1=MLBase.f1score(roc)
return accuracy, confusion_matrix #, roc, f1
end
function train_epoch!(model; opt, train, classes)
Flux.train!(model, train, opt) do m, x, y
Flux.Losses.logitcrossentropy(m(x), Flux.onehotbatch(y, 1:classes))
end
end
function dict_to_text_file(dict, model_name)
text = ""
for (key, value) in dict
text = text * "$(key) => $(value)\n"
end
open("labels_$(model_name)-$(today()).txt", "w") do file
write(file, text)
end
@info "Saved labels to file for future reference"
end
function training_loop!(
model,
opt,
train,
train_sample,
test,
epochs::UnitRange{Int64},
model_name::String,
classes,
label_to_index,
)
@time eval, vcm = evaluate(model, test, classes)
@info "warm up accuracy" accuracy = eval
@info "warm up confusion matrix" vcm
a = 0
for epoch in epochs
println("")
@info "Starting Epoch: $epoch"
epoch == 1 && dict_to_text_file(label_to_index, model_name)
@time train_epoch!(model; opt, train, classes)
@time train_accuracy, train_confusion_matrix =
evaluate(model, train_sample, classes)
@info "Epoch: $epoch"
@info "train" accuracy = train_accuracy
@info "train" train_confusion_matrix
@time test_accuracy, test_confusion_matrix = evaluate(model, test, classes)
@info "test" accuracy = test_accuracy
@info "test" test_confusion_matrix
# number kiwi guessed right, assumes kiwi=1, not=2 (alphabetical)
#test_confusion_matrix[1,1] > a && begin
#a = test_confusion_matrix[1,1]
let _model = cpu(model)
jldsave(
"/media/david/SSD2/model_$(model_name)_CPU_epoch-$epoch-$test_accuracy-$(today()).jld2";
model_state = Flux.state(_model),
)
@info "Saved a best_model"
end
#end
end
end
module SkraakML
greet() = print("Hello World!")
end # module SkraakML
# Predict.jl
export predict
export get_images_from_audio
using WAV,
DSP, Images, ThreadsX, Dates, DataFrames, CSV, Flux, CUDA, Metalhead, JLD2, FLAC, Glob
import Base: length, getindex
##Dependency, duplicated from Utility
function _resample_to_16000hz(signal, freq)
signal = DSP.resample(signal, 16000.0f0 / freq; dims = 1)
freq = 16000
return signal, freq
end
##Dependency, duplicated from Clips
function _get_image_from_sample(sample, f) #sample::Vector{Float64}
S = DSP.spectrogram(sample, 400, 2; fs = convert(Int, f))
i = S.power
if minimum(i) == 0.0
l = i |> vec |> unique |> sort
replace!(i, 0.0 => l[2])
end
image =
#! format: off
DSP.pow2db.(i) |>
x -> x .+ abs(minimum(x)) |>
x -> x ./ maximum(x) |>
x -> reverse(x, dims = 1) |>
x -> PerceptualColourMaps.applycolourmap(x, cmap("L4")) |>
#x -> RGB.(x) |>
x -> imresize(x, 224, 224) |>
x -> Float32.(x)
#! format: on
return image
end
"""
predict(glob_pattern::String, model::String)
This function takes a glob pattern for folders (or a vector of folders) to run over, and a model path. It saves results in a csv in each folder, similar to opensoundscape
Args:
• glob pattern (folder/) or a vector of folders
• model path
Returns: Nothing - This function saves csv files.
I use this function to find kiwi from new data gathered on a trip. And to predict D/F/M/N for images clipped from primary detections.
It works on both audio (wav or flac) and png images.
Note:
From Pomona-3/Pomona-3/
julia -t 4
Dont forget temp environment: ] activate --temp
Use like:
using Skraak
glob_pattern = "*/*/"
model = "/media/david/SSD2/PrimaryDataset/model_K1-9_original_set_CPU_epoch-7-0.9924-2024-03-05.jld2"
glob_pattern = "Clips_2024-10-21/"
model = "/media/david/SSD1/Clips/model_DFMN1-5_CPU_epoch-18-0.9132-2024-01-29.jld2"
predict(glob_pattern, model)
"""
function predict(glob_pattern::String, model::String)
model = load_model_pred(model) |> device
folders = Glob.glob(glob_pattern)
@info "Folders: $folders"
for folder in folders
@info "Working on: $folder"
predict_folder(folder, model)
end
end
function predict(folders::Vector{String}, model::String)
model = load_model_pred(model) |> device
@info "Folders: $folders"
for folder in folders
@info "Working on: $folder"
predict_folder(folder, model)
end
end
#~~~~~ The guts ~~~~~#
# see load_model() from train, different input types
function load_model_pred(model_path::String)
model_state = JLD2.load(model_path, "model_state")
model_classes = length(model_state[1][2][1][3][2])
@info "Model classes: $model_classes"
f = Metalhead.ResNet(18, pretrain = false).layers
l = Flux.Chain(AdaptiveMeanPool((1, 1)), Flux.flatten, Dense(512 => model_classes))
model = Flux.Chain(f[1], l)
Flux.loadmodel!(model, model_state)
return model
end
#=
function load_bson(model_path::String)
BSON.@load model_path model
end
=#
function predict_folder(folder::String, model)
wav = Glob.glob("$folder/*.[W,w][A,a][V,v]")
flac = Glob.glob("$folder/*.flac")
audio_files = vcat(wav, flac) #if wav and flac both present will predict on all
png_files = Glob.glob("$folder/*.png")
#it will predict on images when both images and audio present
if isempty(png_files)
length(audio_files) > 0 ? predict_audio_folder(audio_files, model, folder) :
@info "No png, flac, wav, WAV files present in $folder"
else
predict_image_folder(png_files, model, folder)
end
end
device = CUDA.functional() ? gpu : cpu
# Predict from png images
struct PredictImageContainer{T<:Vector}
img::T
end
length(data::PredictImageContainer) = length(data.img)
function getindex(data::PredictImageContainer{Vector{String}}, idx::Int)
path = data.img[idx]
img =
#! format: off
Images.load(path) |>
x -> Images.imresize(x, 224, 224)|>
x -> Images.RGB.(x) |>
x -> collect(channelview(float32.(x))) |>
x -> permutedims(x, (3, 2, 1))
#! format: on
return img, path
end
function predict_image_folder(png_files::Vector{String}, model, folder::String)
l = length(png_files)
@assert (l > 0) "No png files present in $folder"
@info "$(l) png_files in $folder"
save_path = "$folder/preds-$(today()).csv"
loader = png_loader(png_files)
@time preds, files = predict_pngs(model, loader)
f = split.(files, "/") |> x -> last.(x)
df = DataFrames.DataFrame(file = f, label = preds)
CSV.write("$save_path", df)
end
function png_loader(png_files::Vector{String})
loader = Flux.DataLoader(
PredictImageContainer(png_files);
batchsize = 64,
collate = true,
parallel = true,
)
device == gpu ? loader = CuIterator(loader) : nothing
return loader
end
function predict_pngs(m, d)
@info "Predicting..."
pred = []
path = []
for (x, pth) in d
p = Flux.onecold(m(x))
append!(pred, p)
append!(path, pth)
end
return pred, path
end
# Predict from audio files
function predict_audio_folder(audio_files::Vector{String}, model, folder::String)
l = length(audio_files)
@assert (l > 0) "No wav or flac audio files present in $folder"
@info "$(l) audio_files in $folder"
df = DataFrames.DataFrame(
file = String[],
start_time = Float64[],
end_time = Float64[],
label = Int[],
)
save_path = "$folder/preds-$(today()).csv"
CSV.write("$save_path", df)
for file in audio_files
df = predict_audio_file(file, model)
CSV.write("$save_path", df, append = true)
end
end
function predict_audio_file(file::String, model)
#check form of opensoundscape preds.csv and needed by my make_clips
@info "File: $file"
@time data = audio_loader(file)
pred = []
time = []
@time for (x, t) in data
p = Flux.onecold(model(x))
append!(pred, p)
append!(time, t)
end
f = (repeat(["$file"], length(time)))
df = DataFrames.DataFrame(
:file => f,
:start_time => first.(time),
:end_time => last.(time),
:label => pred,
)
sort!(df)
return df
end
function audio_loader(file::String, increment::Int = 5, divisor::Int = 2)
raw_images, n_samples = get_images_from_audio(file::String, increment, divisor)
images = reshape_images(raw_images, n_samples)
# Start time and end time for each 5s audio clip, in seconds relative to the start of the file.
start_time = 0:(increment/divisor):(n_samples-1)*(increment/divisor)
end_time = increment:(increment/divisor):(n_samples+1)*(increment/divisor)
time = collect(zip(start_time, end_time))
loader = Flux.DataLoader((images, time), batchsize = n_samples, shuffle = false)
device == gpu ? loader = CuIterator(loader) : nothing #check this works with gpu
return loader
end
function reshape_images(raw_images, n_samples)
images =
#! format: off
hcat(raw_images...) |>
x -> reshape(x, (224, 224, 3, n_samples))
#! format: on
return images
end
#= not needed
function get_image_for_inference(sample, f)
image =
#! format: off
_get_image_from_sample(sample, f) |>
# x -> collect(channelview(float32.(x))) |>
x -> permutedims(x, (3, 2, 1))
#! format: on
return image
end
=#
# need to change divisor to a overlap fraction, chech interaction with audioloader()
# if divisor is 0, then no overlap atm
function get_images_from_audio(file::String, increment::Int = 5, divisor::Int = 2) #5s sample, 2.5s hop
signal, freq = load_audio_file(file)
if freq > 16000
signal, freq = _resample_to_16000hz(signal, freq)
end
f = convert(Int, freq)
inc = increment * f
#hop = f * increment ÷ divisor #need guarunteed Int, maybe not anymore, refactor
hop = 0 #f * increment / divisor |> x -> x == Inf ? 0 : trunc(Int, x)
split_signal = DSP.arraysplit(signal[:, 1], inc, hop)
raw_images = ThreadsX.map(x -> _get_image_from_sample(x, f), split_signal)
n_samples = length(raw_images)
return raw_images, n_samples
end
function load_audio_file(file::String)
ext = split(file, ".")[end]
@assert ext in ["WAV", "wav", "flac"] "Unsupported audio file type, requires wav or flac."
if ext in ["WAV", "wav"]
signal, freq = WAV.WAV.wavread(file)
else
signal, freq = load(file)
end
@assert !isempty(signal[:, 1]) "$file seems to be empty, could it be corrupted?\nYou could delete it, or replace it with a known\ngood version from SD card or backup."
return signal, freq
end
############### PYTHON Opensoundscape ################
#=
# Python 3.8.12, opensoundscape 0.7.1
# Dont forget conda activate opensoundscape
# Dont forget to modify file names and glob pattern
# Run script in Pomona-2, hard code trip date in the glob
# python /media/david/USB/Skraak/src/predict.py
from opensoundscape.torch.models.cnn import load_model
import opensoundscape
import torch
from pathlib import Path
import numpy as np
import pandas as pd
from glob import glob
import os
from datetime import datetime
model = load_model('/home/david/best.model0')
# folders = Glob.glob('./*/2023-?????/')
# folders = Glob.glob('./*/*/2024-05-0?')
folders = Glob.glob('./*/2024-10-18/')
for folder in folders:
os.chdir(folder)
print(folder, ' start: ', datetime.now())
# Beware, secretary island files are .wav
field_recordings = Glob.glob('./*.[W,w][A,a][V,v]')
scores, preds, unsafe = model.predict(
field_recordings,
binary_preds = 'single_target',
overlap_fraction = 0.5,
batch_size = 128,
num_workers = 12)
scores.to_csv("scores-2024-10-21.csv")
preds.to_csv("preds-2024-10-21.csv")
os.chdir('../..') # Be careful this matches the glob on line 284
print(folder, ' done: ', datetime.now())
print()
print()
=#
#=Kahurangi
folders = Glob.glob('./*/')
for folder in folders:
os.chdir(folder)
print(folder, ' start: ', datetime.now())
# Beware, secretary island files are .wav
field_recordings = Glob.glob('./*.[W,w][A,a][V,v]')
scores, preds, unsafe = model.predict(
field_recordings,
binary_preds = 'single_target',
overlap_fraction = 0.5,
batch_size = 128,
num_workers = 12)
scores.to_csv("scores-2024-10-21.csv")
preds.to_csv("preds-2024-10-21.csv")
os.chdir('./..') # Be careful this matches the glob on line 284
print(folder, ' done: ', datetime.now())
print()
print()
=#
# Skraak
Identify bird calls using AI, and monitor call frequency.
__Skraak is intended to be simple to use for simple people like me.__
This package serves [skraak.kiwi](https://skraak.kiwi).
Most of the skraak.kiwi data has been recorded using Open Acoustics AudioMoth's or μMoth's at 16000 Hz. DOC recorders at 8000hz work fine.
It is a good idea to use an Nvidia GPU. Everything should work fine on CPU, just slow.
AMD and Mac Silicone GPU's are not supported but should be easy for you to get working with julia AMD or Metal packages.
If you are doing serious work, start the julia repl with: julia -t n where n is up to 1/2 the number of cores you have. I do 4, this is enough to keep up with a gamer style GPU.
__You can use Skraak too.__
```
[Install Julia](https://julialang.org/downloads/platform/), Julia-1.10 or newer
[git clone the Skraak project](https://github.com/quietlight/Skraak), if you dont have git or the git cli, you can download a zip file by clicking the <code> button.
cd to your Skraak folder
start the julia repl with $julia
(You will want to install Revise and OhMyREPL, just do 'using Revise, OhMyREPL' in the Julia repl, add 'using Revise, OhMyREPL' to ~/.julia/config/startup.jl)
type: ] (to enter Pkg mode)
type: activate .
type: instantiate
backspace to exit Pkg mode
exit repl with ctrl-D.
```
Later:
```
start the julia repl with $julia
type: ] (to enter Pkg mode)
type: dev path/to/Skraak (to make it a local package)
backspace to exit Pkg mode
type: using Skraak, Glob (glob is only here to help you refine your glob patterns)
WORK...
When finished working you can if you like do 'free Skraak' in Pkg mode (accessed with ']')
```
1. Take some WAV's organised into a file structure LOCATION/TRIP_DATE/WAV_FILES
2. and labels saved in a csv in the form:
* file(String),start_time,end_time,label(Integer) (where start_time and end_time are in seconds from the start of the wav file)
* at least 2 label classes are required, for example Kiwi, Not
3. Generate a primary dataset of spectrogram images with the following file structure:
* DATASET/AUDIO_FILE*/LABEL*/PNG's (png files must be 224X224 px square RGB).
* This structure is required, when training, __the parent folder of a file is the label__.
* This function creates a folder for each file, creates subfolders for each label, then saves png files in the appropriate label sub folder.
* Space is needed. It uses the whole audio file. (I aim for 96% Not, 4% Kiwi)
* and saves a flac copy for reference
> I use labels, [K, N] in words [Kiwi, Not]. Anything will work, the unique text labels are sorted alphabetically and mapped to integer labels in the training process.
> More than 2 label classes is fine, but keep it simple until you have a lot of data.
> It is better __not__ to have everything in big folders, 100_000 files in a folder on a Fat32 removable drive will rapidly grind to a stand still.
> You could have many thousands of K and N folders, for example, the model does not care.
> Native file systems on mac/linux will work ok. I use ext4 (linux) file systems on exteranl SSD's for both linux and mac.
```
```
4. Train a Resnet18 model, either pretrained on Imagenet, or preferably the pretrained Skraak Kiwi model, which is currently trained on 7_700_000 images.
Skraak trains on 5 second clips, converted to 224x224 pixel RGB spectrogram images.
```
using Skraak
glob_pattern_1 = "Clips*/[D,F,M,N]/*.png" #for example. Note: requires png's as input.
glob_pattern_2 = "Dataset*/[K, N]/*.png"
# Train a model named Test1 for 2 epochs on png files found by glob_pattern,
# start with a pretrained model.
train("Test1", 2, glob_pattern_1, true)
# Train a model named Test2 for 2 epochs on png files found by glob_pattern,
# train using model found at "path/to/model.jld2"
train("Test2", 2, glob_pattern_2, "path/to/model.jld2")
# Note: Your unique text labels are sorted alphabetically, and converted to
# integers, [1,2,3...] to be consumed by the flux model
# A text file will be saved beside the model.jld2, with the label to
# integer mapping.
```
5. Run inference on raw data using a trained model
Skraak will try to find png images first, in the folders covered by the glob pattern. If there are no png's found it will predict on wav or flac files, using 5 second audio clips, converted to 224x224 pixel RGB spectrogram images, with a 2.5 second hop.
> You are responsible for providing an appropriate model.
> I use a binary Kiwi/Not model for finding calls in audio data, and a Duet/Female/Male/Not model on png clips made from calls detected by the binary model.
> Find some models to start with in the Models folder
```
using Skraak
glob_pattern = "*/*/" #Note: requires folders as input. Folders contain flac, wav or png files.
# Predict label classes of png, wav or flac files found in folders specified by
# glob_pattern using model.jld2. A preds.csv file is saved in current directory
predict(glob_pattern, "path/to/model.jld2")
```
6. Generate audio clips and spectrogram images of all calls found.
```
# Make clips from a preds.csv file of the form:
# file(String),start_time,end_time,label(Int)
# 1 is the label, it can be any int present in the label field of preds.csv
# It saves clips in a folder 'Clips_2023-11-09'
make_clips("preds.csv", 1)
```
7. Sort calls into subclasses (say: Duet, Female, Male, Nothing) manually, or using a model combined with human supervision. TODO
8. Store data from calls and file metadata in a DuckDB database for statistical analysis using SQL, DataFrames, Plots.
```
I will not document this until the DuckDB storage api has stabilised.
For now always store a csv backup using "EXPORT DATABASE 'Backup_2023-10-10';" in the duckdb cli.
I highly recommend storing data in a duckdb database.
Querying a duckdb database with SQL is faster than even julia DataFrames, both leave Pandas in the dust.
```
9. Repeat, iterating on your models as you accumulate more data. It's hard until it gets easy.
Managing datasets is like gardening, it takes some weeding and a _lot_ of compost (aka data) to get a good model growing.
Julia is great for machine learning because it is realtively simple to get a GPU working. It does have disadvantages at GPT-4 scale, but for this kind of work it is excelent. Julia shines with any scientific computing task.
name = "SkraakML"
uuid = "960381bc-3737-4297-a0a0-71f7f33f3c12"
authors = ["David Cary <cdecary@gmail.com>"]
version = "0.1.0"
[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
FLAC = "abae9e3b-a9a0-4778-b5c6-ca109b507d99"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
MLBase = "f0e99cf1-93fa-52ec-9ecc-5026115318e0"
Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
Noise = "81d43f40-5267-43b7-ae1c-8b967f377efa"
PerceptualColourMaps = "54e51dfa-9dd7-5231-aa84-a4037b83483a"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ThreadsX = "ac1d9e8a-700a-412c-b207-f0111f4b6c0d"
WAV = "8149f6b0-98f6-5db9-b78f-408fbbb8ef88"
cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
MIT License
Copyright (c) 2023 David Cary <cdecary@gmail.com> and contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.