2UBDFCJH2BG6U6SY2YDJ7QK4JOLUJAHOYZS3YRQ7E7U4UGP4YR5QC # Labels.jlexport aggregate_labels, audiodata_dbusing CSV, DataFrames, Glob, Random, DBInterface, DuckDBusing DataFramesMeta: @transform!, @subset!, @byrow, @passmissing#=actual.csv must be list of qualified png file names:D/C05-2023-04-15-20230219_223000-380-470.pngusing Glob, DataFrames, CSVa=glob("[M,F,D,N]/*.png")df = DataFrame(file=a)CSV.write("actual_mfdn.csv", df)make a folder D,F,M,Nmkpath.(["D", "F", "M", "N"])move wavs to match pngsdf=DataFrame(CSV.File("actual_mfdn.csv"))for row in eachrow(df)src=split(row.file, "/")[2]dst=row.filemv(src, dst)mv(chop(src, tail=3)*"wav", chop(dst, tail=3)*"wav")end=#"""actual_from_folders(labels::Vector{String})::DataFramerun from parent folder of label folderssaves actual.csv and returns a dflabels=["D", "F", "M", "N"]"""function actual_from_folders(labels::Vector{String})::DataFramepaths = String[]for l in labelspaths = append!(paths, glob("$l/*.png"))enddf = DataFrame(file = paths)CSV.write("actual.csv", df)return dfend"""aggregate_labels(actual="actual.csv", outfile="labels.csv")file[D, F, M, N]/C05-2023-04-15-20230219_223000-380-470.pngThis function takes the csv output from my hand classification and ouputs a df, and csv for insertion into AudioData.duckdb using the duckdb cli or using DFto.audiodata_db()assumes run from Clips_xxxx-xx-xx folder and that "actual.csv" present if not specified.returns a dataframeusing CSV, DataFrames, DataFramesMeta"""#=df=aggregate_labels()audiodata_db(df, "pomona_labels_20230418") NOT_WORKING maybe titlesto use cli, need to remove header rowduckdb /media/david/SSD1/AudioData.duckdbCOPY pomona_labels_20230418 FROM 'DB_Labels/pomona_labels_2023-11-02.csv';COPY pomona_files FROM 'DB_Files/pomona_files_20231102.csv';Then backup with:EXPORT DATABASE 'AudioDataBackup_2023-11-14';.quitThen quit and backup using cp on the db file, dated copyThen rsync ssd to usbrsync -avzr --delete /media/david/SSD1/ /media/david/USB/note: run on maccd skraak.kiwijulia-1.9using Franklinserve()=## New one, without noise and distance, does not do :box anymore therefore requires new db schemafunction aggregate_labels(actual::String = "actual.csv",outfile::String = "labels.csv",hdr::Bool = false, #header for outfile)::DataFramedf = DataFrame(CSV.File(actual))# location, f, start_time, end_time@transform!(df, @byrow :location = split(split(:file, "/")[2], "-")[1])@transform!(df, @byrow :f = split(split(:file, "/")[2], "-")[5] * ".WAV")@transform!(df, @byrow :start_time = split(split(:file, "/")[2], "-")[end-1])@transform!(df,@byrow :end_time = chop(split(split(:file, "/")[2], "-")[end], tail = 4))#@transform!( df, @byrow :box = "[$(split(split(:file, "/")[2], "-")[end-1]), $(chop(split(split(:file, "/")[2], "-")[end], tail=4))]")# male, female, duet, not@transform!(df, @byrow @passmissing :male = split(:file, "/")[1] == "M" ? true : false)@transform!(df,@byrow @passmissing :female = split(:file, "/")[1] == "F" ? true : false)@transform!(df, @byrow @passmissing :duet = split(:file, "/")[1] == "D" ? true : false)@transform!(df,@byrow @passmissing :not_kiwi =split(:file, "/")[1] in ["KA", "KE", "N", "Q"] ? true : false)# other_label@transform!(df,@byrow @passmissing :other_label =split(:file, "/")[1] in ["KA", "KE", "Q"] ? split(:file, "/")[1] : missing)# remove unwanted cols, rename f to fileselect!(df, Not([:file]))rename!(df, :f => :file)CSV.write(outfile, df; header = hdr)return dfend"""audiodata_db(df::DataFrame, table::String)Use to upload labels to AudioData.duckdbTakes a dataframe and inserts into AudioData.db table.audiodata_db(df, "pomona_labels_20230418")using DataFrames, DBInterface, DuckDB, Random"""function audiodata_db(df::DataFrame, table::String)if Sys.islinux()con = DBInterface.connect(DuckDB.DB, "/media/david/SSD1/AudioData.duckdb")elsecon = DBInterface.connect(DuckDB.DB, "/Volumes/SSD1/AudioData.duckdb")endtemp_name = randstring(6)DuckDB.register_data_frame(con, df, temp_name)DBInterface.execute(con,"""INSERTINTO $tableSELECT *FROM '$temp_name'""",)DBInterface.close!(con)end
# FileMetaData.jlexport file_metadata_to_dfusing DataFrames, Dates, Glob, Random, SHA, TimeZones, WAV, XMLDict#DelimitedFiles, DuckDB, JSON3#=used like:using Glob, Skraak, CSVfolders=glob("*/2023-11-02/")for folder in folderscd(folder)trydf = Skraak.file_metadata_to_df()CSV.write("/media/david/Pomona-3/Pomona-3/pomona_files_20231102.csv", df; append=true)catch@warn "error with $folder"endcd("/media/david/Pomona-3/Pomona-3/")endThen using duckdb cli from SSD:duckdb AudioData.duckdbshow tables;SELECT * FROM pomona_files;COPY pomona_files FROM '/media/david/Pomona-3/Pomona-3/pomona_files_20231019.csv';SELECT * FROM pomona_files;Then backup with:EXPORT DATABASE 'AudioDataBackup_2023-07-29';.quitThen quit and backup using cp on the db fileThen rsync ssd to usbrsync -avzr --delete /media/david/SSD1/ /media/david/USB/=#"""file_metadata_to_df()This function takes a file name, extracts wav metadata, gpx location, recording period start/end and returnes a dataframe.This function needs raw audiomoth wav files and a gpx.This function needs /media/david/SSD1/dawn_dusk.csv"""function file_metadata_to_df()# Initialise dataframe with columns: disk, location, trip_date, file, lattitude, longitude, start_recording_period_localt, finish_recording_period_localt, duration, sample_rate, zdt, ldt, moth_id, gain, battery, temperaturedf = DataFrame(disk = String[],location = String[],trip_date = String[],file = String[],latitude = Float64[],longitude = Float64[],start_recording_period_localt = String[],finish_recording_period_localt = String[],duration = Float64[],sample_rate = Int[],utc = String[],ldt = String[],moth_id = String[],gain = String[],battery = Float64[],temperature = Float64[],sha2_256 = String[],night = Bool[],)#Get WAV list for folderwav_list = glob("*.WAV") |> sort#Return empty df if nothing in the folderif length(wav_list) == 0return dfend#Get path info from file systemraw_path_vec = split(pwd(), "/")[end-2:end]disk = raw_path_vec[1]location = raw_path_vec[2]trip_date = raw_path_vec[3]#Get location, assumes 1 gpx is in the follderwaypoint = glob("*.gpx")length(waypoint) != 1 && @error "no gpx file in $trip_date $location"loc = read(waypoint[1], String) |> xml_dictlatitude = parse(Float64, (loc["gpx"]["wpt"][:lat]))longitude = parse(Float64, (loc["gpx"]["wpt"][:lon]))#Start of recording period_, _, _, binary_metadata_start = wavread(wav_list[1])c_v_s = split(wav_info_read(binary_metadata_start)[:ICMT], " ")comment_vector_start = length(c_v_s) < 22 ? c_v_s : c_v_s[1:19]date_start = split(comment_vector_start[4], "/")time_start = split(comment_vector_start[3], ":")tz_start = chop(comment_vector_start[5], head = 4, tail = 1)time_zone_start = isempty(tz_start) ? "+00" : tz_start#zdt1 = ZonedDateTime(parse(Int, date_start[3]), parse(Int, date_start[2]), parse(Int, date_start[1]), parse(Int, time_start[1]), parse(Int, time_start[2]), parse(Int, time_start[3]), tz"UTC")time_string_start =date_start[3] *"-" *date_start[2] *"-" *date_start[1] *"T" *time_start[1] *":" *time_start[2] *":" *time_start[3] *"." *"000" *time_zone_startzdt1 = ZonedDateTime(time_string_start)start_recording_period_localt =Dates.format(astimezone(zdt1, tz"Pacific/Auckland"), "yyyy-mm-dd HH:MM:SSzzzz")#End of recording period_, _, _, binary_metadata_end = wavread(wav_list[end])c_v_e = split(wav_info_read(binary_metadata_end)[:ICMT], " ")comment_vector_end = length(c_v_e) < 22 ? c_v_e : c_v_e[1:19]date_end = split(comment_vector_end[4], "/")time_end = split(comment_vector_end[3], ":")tz_end = chop(comment_vector_start[5], head = 4, tail = 1)time_zone_end = isempty(tz_end) ? "+00" : tz_end#zdt2 = ZonedDateTime(parse(Int, date_end[3]), parse(Int, date_end[2]), parse(Int, date_end[1]),parse(Int, time_end[1]), parse(Int, time_end[2]), parse(Int, time_end[3]), tz"UTC")time_string_end =date_end[3] *"-" *date_end[2] *"-" *date_end[1] *"T" *time_end[1] *":" *time_end[2] *":" *time_end[3] *"." *"000" *time_zone_endzdt2 = ZonedDateTime(time_string_end)finish_recording_period_localt =Dates.format(astimezone(zdt2, tz"Pacific/Auckland"), "yyyy-mm-dd HH:MM:SSzzzz")dict = Skraak.construct_dawn_dusk_dict("/media/david/SSD1/dawn_dusk.csv")#So I know what it is doingprintln(raw_path_vec)#Loop over file listfor file in wav_list#print(file)tryaudio_data, sample_rate, _, binary_metadata = wavread(file)c_v = split(wav_info_read(binary_metadata)[:ICMT], " ")comment_vector = length(c_v) < 22 ? c_v : c_v[1:19]duration = Float64(length(audio_data) / sample_rate)date = split(comment_vector[4], "/")time = split(comment_vector[3], ":")tz = chop(comment_vector[5], head = 4, tail = 1)time_zone = isempty(tz) ? "+00" : tz#preformatting_zdt = ZonedDateTime(parse(Int, date[3]), parse(Int, date[2]), parse(Int, date[1]), parse(Int, time[1]), parse(Int, time[2]), parse(Int, time[3]), tz"UTC")time_string =date[3] *"-" *date[2] *"-" *date[1] *"T" *time[1] *":" *time[2] *":" *time[3] *"." *"000" *time_zonepreformatting_zdt = ZonedDateTime(time_string)#zdt = Dates.format(preformatting_zdt, "yyyy-mm-dd HH:MM:SSzzzz")preformatting_utc = astimezone(preformatting_zdt, tz"UTC")utc = Dates.format(preformatting_utc, "yyyy-mm-dd HH:MM:SSzzzz")preformatting_ldt = astimezone(preformatting_zdt, tz"Pacific/Auckland")ldt = Dates.format(preformatting_ldt, "yyyy-mm-dd HH:MM:SSzzzz")moth_id = comment_vector[8]gain = comment_vector[10]#index back from end because if V > 4.9 the wording chaaangesbattery = parse(Float64, chop(comment_vector[end-4], tail = 1))temperature = parse(Float64, chop(comment_vector[end], tail = 2))sha2_256 = bytes2hex(sha256(file))#assumes 15 minute file and calculates on half way timent = Skraak.night(DateTime(preformatting_ldt + Minute(7) + Second(30)), dict)#Populate row to push into dfrow = [disk,location,trip_date,file,latitude,longitude,start_recording_period_localt,finish_recording_period_localt,duration,Int(sample_rate),utc,ldt,moth_id,gain,battery,temperature,sha2_256,nt,]push!(df, row)print(".")catch@warn "error with $folder $file"endendreturn dfend
# Clips.jlexport make_clips, move_clips_to_foldersusing CSV, DataFrames, Dates, DSP, Glob, JSON, Random, TimeZones, WAV, PNGFiles, Imagesusing DataFramesMeta: @transform!, @subset!, @byrow, @passmissing"""make_clips(preds_path::String, dawn_dusk_dict::Dict{Dates.Date, Tuple{Dates.DateTime, Dates.DateTime}} = construct_dawn_dusk_dict("/media/david/SSD1/dawn_dusk.csv"))This function takes a preds.csv files and generatesfile names, wav's, spectrograms etc to be reviewed.it calls night() and may call construct_dawn_dusk_dict() unless the dict is globally defined and passed inIt should be run from Pomona-1/, Pomona-2/ or Pomona-3/, assumes it is, it uses the pathIt saves wav and png files to /home/david/Upload/need to use a try/catch because the 2 assert functions thow an error to short circuit the functionusing Glob, Skraakpredictions = glob("*/2023-09-11*/preds*")predictions = glob("path/to/preds*")for file in predictions #[1:6][7:12][13:18][19:24]trymake_clips(file)catch xprintln(x)endendif needed to change headers in preds csvshift, control, f in sublfile,start_time,end_time,label/media/david/Pomona-2,<project filters>, preds-2023-02-27.csvfile,start_time,end_time,absent,presentusing Glob, CSV, DataFrames, DataFramesMeta, Dates, DSP, Plots, Random, WAV"""# Assumes run on linux# Assumes function run from Pomona-1 or Pomona-2#dawn_dusk_dict::Dict{Dates.Date,Tuple{Dates.DateTime,Dates.DateTime}} = construct_dawn_dusk_dict("/media/david/SSD1/dawn_dusk.csv",),function make_clips(preds_path::String,label::Int = 1,night::Bool = true,dawn_dusk_dict = dddict,)# Assumes function run from Pomona-1 or Pomona-2location, trip_date, _ = split(preds_path, "/")# Load and group data frame by filegdf =#! format: offDataFrame(CSV.File(preds_path)) |>x -> assert_not_empty(x, preds_path) |>x -> rename_column!(x, "1.0", "label") |> #can remove now, needs to be labelx -> assert_detections_present(x, label, location, trip_date) |>x -> filter_positives!(x, label) |>insert_datetime_column! |>x -> night_or_day!(x, dawn_dusk_dict, night) |> #true=night, false=daygroup_by_file!#! format: on# Make clip and spectrogramfor (k, v) in pairs(gdf)#file_name = chop(v.file[1], head = 2, tail = 4)file_name = path_to_file_string(v.file[1])start_times = v[!, :start_time] |> sortdetections = cluster_detections(start_times)isempty(detections) && continuesignal, freq = wavread("$location/$trip_date/$file_name.WAV")length_signal = length(signal)for detection in detectionsst, en = calculate_clip_start_end(detection, freq, length_signal)name = "$location-$trip_date-$file_name-$(Int(floor(st/freq)))-$(Int(ceil(en/freq)))"f = "Clips_$(today())"mkpath(f)outfile = "$f/$name"sample = signal[Int(st):Int(en)]wavwrite(sample, "$outfile.wav", Fs = Int(freq))#plot = plot_spectrogram(sample, freq)#savefig(plot, "$outfile.png")image = get_image_from_sample(sample, freq)PNGFiles.save("$outfile.png", image)endprint(".")endprintln("\ndone $location/$trip_date \n")end#######################################################################function assert_not_empty(df::DataFrame, preds_path::String)::DataFramesize(df) != (0, 0) ? (return df) : @error "Empty dataframe at $preds_path"#return dfendfunction rename_column!(df::DataFrame, old_name::String, new_name::String)::DataFrameold_name in names(df) && rename!(df, old_name => new_name)return dfend# assumes kiwi, binary classifier from opensoundscape# needed to remove ::String annotation for location, trip_date to make it workfunction assert_detections_present(df::DataFrame,label::Int,location,trip_date,)::DataFramelabel in levels(df.label) ? (return df) :@error "No detections for label = $label at $location/$trip_date"end# assumes kiwifunction filter_positives!(df::DataFrame, label)::DataFrame#filter!(row -> row.kiwi > 0, df)filter!(row -> row.label == label, df)return dfendfunction path_to_file_string(path) #becareful path::String won't work: no method matching path_to_file_string(::InlineStrings.String31) line 70f = split(path, "/")[end] |> x -> split(x, ".") |> first#f = chop(file, head = 2, tail = 4)return fendfunction filename_to_datetime!(file)::DateTime#file_string = chop(file, head = 2, tail = 4)file_string = path_to_file_string(file)date_time =length(file_string) > 13 ? DateTime(file_string, dateformat"yyyymmdd_HHMMSS") :DateTime((file_string[1:4] * "20" * file_string[5:end]),dateformat"ddmmyyyy_HHMMSS",)return date_timeendfunction insert_datetime_column!(df::DataFrame)::DataFrame@transform!(df, @byrow :DateTime = filename_to_datetime!(String(:file)))return dfend# calls night(), needs dawn_dusk_dict in local time formatfunction night_or_day!(df::DataFrame,dawn_dusk_dict::Dict{Dates.Date,Tuple{Dates.DateTime,Dates.DateTime}},night_time::Bool = true,)::DataFramenight_time ? @subset!(df, @byrow night(:DateTime, dawn_dusk_dict)) :@subset!(df, @byrow !night(:DateTime, dawn_dusk_dict))return dfendfunction group_by_file!(df::DataFrame)gdf = groupby(df, :file)return gdfendfunction cluster_detections(start_times::Vector{Float64})::Vector{Vector{Float64}}s = Vector{Float64}[]t = Float64[start_times[1]]for time in start_times[2:end]if time - last(t) <= 15.0push!(t, time)elsepush!(s, copy(t))t = Float64[time]endendpush!(s, copy(t))detections = filter(x -> length(x) > 1, s)return detectionsend# assumes it is operating on 5 second clipsfunction calculate_clip_start_end(detection::Vector{Float64},freq::Float32,length_signal::Int64,)::Tuple{Float64,Float64}first(detection) > 0 ? st = first(detection) * freq : st = 1(last(detection) + 5.0) * freq <= length_signal ? en = (last(detection) + 5.0) * freq :en = length_signalreturn st, enend# f neeeds to be an Intfunction get_image_from_sample(sample, f) #sample::Vector{Float64}S = DSP.spectrogram(sample, 400, 2; fs = convert(Int, f))i = S.powerif minimum(i) == 0.0l = i |> vec |> unique |> sortreplace!(i, 0.0 => l[2])endimage =#! format: offDSP.pow2db.(i) |>x -> x .+ abs(minimum(x)) |>x -> x ./ maximum(x) |>x -> reverse(x, dims = 1) |>x -> RGB.(x) |>x -> imresize(x, 224, 224)#! format: onreturn imageend"""construct_dawn_dusk_dict(file::String)::Dict{Date,Tuple{DateTime,DateTime}}sun = DataFrame(CSV.File(file))Takes dawn dusk.csv and returns a dict to be consumeed by night().~/dawn_dusk.csvAt present it goes from the start of 2019 to the end of 2024The csv contains local time sunrise and sunsetI use this to decide if a file with a local time encoded name was recorded at nightdict = construct_dawn_dusk_dict("/Volumes/SSD1/dawn_dusk.csv")dict = Utility.construct_dawn_dusk_dict("/media/david/SSD1/dawn_dusk.csv")using CSV, DataFrames"""function construct_dawn_dusk_dict(file::String)::Dict{Date,Tuple{DateTime,DateTime}}sun = DataFrame(CSV.File(file))x = Tuple(zip(sun.Dawn, sun.Dusk))y = Dict(zip(sun.Date, x))return yend"""night(call_time::DateTime, dict::Dict{Date, Tuple{DateTime, DateTime}})::BoolReturns true if time is at night, ie between civil twilights, dusk to dawn.Consumes dict from construct_dawn_dusk_dicttime=DateTime("2021-11-02T21:14:35",dateformat"yyyy-mm-ddTHH:MM:SS")Utility.night(time, dict)"""function night(call_time::DateTime, dict::Dict{Date,Tuple{DateTime,DateTime}})::Booldawn = dict[Date(call_time)][1]dusk = dict[Date(call_time)][2]if call_time <= dawn || call_time >= duskreturn trueelsereturn falseendend########################################################################INBETWEEN STEP: use secondary model to sort clips, move clips into D, F, M, N, and hand classify, generate actual.csv."""move_clips_to_folders(df::DataFrame)Takes a 2 column dataframe: file, labelfile must be list of png images, assumes wav's are there toowill move mp4's from video folder if they are present"""function move_clips_to_folders(df::DataFrame)p = glob("*.png")w = glob("*.[W,w][A,a][V,v]")@assert (first(df.file) |> x -> split(x, ".")[end] |> x -> x == "png") "df.file must be a list of png's"@assert issetequal(df.file, p) "All png files in dataframe must be present in folder"@assert issetequal(chop.(df.file, head = 0, tail = 4), chop.(w, head = 0, tail = 4)) "There must be a wav for every png in the dataframe"for row in eachrow(df)src = row.filedst = "$(row.label)/$(row.file)"mkpath("$(row.label)/")trymv(src, dst)mv(chop(src, tail = 3) * "wav", chop(dst, tail = 3) * "wav")if isdir(video)mkpath("video/$(row.label)/")mv("video/" * chop(src, tail = 3) * "mp4","video/" * chop(dst, tail = 3) * "mp4",)endcatch e@info eendendend