#get json into db
CREATE TABLE submissions_2023_03_24 AS
SELECT unnest(submissions, recursive:= true) AS ducks 
FROM './formspree_mpzvpdvp_2024-03-24T02-13-06_export.json';

ALTER TABLE submissions_2023_03_24 DROP open;
ALTER TABLE submissions_2023_03_24 DROP delivered;
ALTER TABLE submissions_2023_03_24 DROP processed;
ALTER TABLE submissions_2023_03_24 DROP dispatched;

#get a csv
COPY submissions_2023_03_24 TO 'submissions_2023_03_24.csv' (HEADER, DELIMITER ',');

#get dataframe
] activate --temp
using CSV, DataFrames, DataFramesMeta, PromptingTools, VegaLite
df = DataFrame(CSV.File("submissions_2023_03_24_working.csv"))

#gpt to summarise message column
tpl = """
You are an expert user of backpacks.

This text is a survey response to the question 'What is most important in a backpack?'

Your task is to summarise the survey response in as few words as possible. Use very short sentences, one sentence per idea. Do not include any boilerplate or extra words.

### Survey Response

{{details}}
"""
details= df.message[1]
msg = aigenerate(tpl; details)

x=map(details -> aigenerate(tpl; details), df.message) 

#make new column most_important using x

#easy plots
p1 = df |> @vlplot(:bar, height=700, width=700, config={axis={
      labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={:how_often, axis={title="How often do you use a pack?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("how_often2.png", x)

p2 = df |> @vlplot(:bar, height=700, width=700, config={axis={
      labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={:where, axis={title="Where do you use your pack most?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("where2.png", x)

p3 = df |> @vlplot(:bar, height=700, width=700, config={axis={
      labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={:what, axis={title="What kind of load do you carry?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("what2.png", x)

#most_used plots
x=String[]
for item in skipmissing(df.most_used)
	y=split(item, ",")
	for i in y
		z=strip(i)
		push!(x, z)
	end
end

function is_an_int(a)::Bool
   return tryparse(Int, a) !== nothing
end

make=String[]
size=String[]
for i in x
	q=split(i, " ")
	push!(make, q[1])
	if is_an_int(last(q))
		push!(size, last(q))
	end
end

p4 = @vlplot(:bar, height=700, width=700, config={axis={
      labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={make, axis={title="What is your all time most used pack?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("most_used_make2.png", x)

p5 = @vlplot(:bar, height=700, width=700, config={axis={
      labelFontSize=22, titleFontSize=26},scale={fontSize=22}} , x={size, axis={title="Size of most used pack?"}, sort="-y", bin={maxbins=6}}, y={"count()", axis={title="Count"}}) |> x -> save("most_used_size2.png", x)

#most_important plot
most_important=String[]
for item in skipmissing(df.most_important)
	y=split(item, ",")
	for i in y
		z=strip(i) |> lowercase
		push!(most_important, z)
	end
end

l=levels(most_important) |> sort! #check categories and simplify

p6 = @vlplot(:bar, height=700, width=700, config={axis={
      labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={most_important, axis={title="What is most important in a pack?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("most_important.png", x)