#get json into db
CREATE TABLE submissions_2023_03_24 AS
SELECT unnest(submissions, recursive:= true) AS ducks
FROM './formspree_mpzvpdvp_2024-03-24T02-13-06_export.json';
ALTER TABLE submissions_2023_03_24 DROP open;
ALTER TABLE submissions_2023_03_24 DROP delivered;
ALTER TABLE submissions_2023_03_24 DROP processed;
ALTER TABLE submissions_2023_03_24 DROP dispatched;
#get a csv
COPY submissions_2023_03_24 TO 'submissions_2023_03_24.csv' (HEADER, DELIMITER ',');
#get dataframe
] activate --temp
using CSV, DataFrames, DataFramesMeta, PromptingTools, VegaLite
df = DataFrame(CSV.File("submissions_2023_03_24_working.csv"))
#gpt to summarise message column
tpl = """
You are an expert user of backpacks.
This text is a survey response to the question 'What is most important in a backpack?'
Your task is to summarise the survey response in as few words as possible. Use very short sentences, one sentence per idea. Do not include any boilerplate or extra words.
### Survey Response
{{details}}
"""
details= df.message[1]
msg = aigenerate(tpl; details)
x=map(details -> aigenerate(tpl; details), df.message)
#make new column most_important using x
#easy plots
p1 = df |> @vlplot(:bar, height=700, width=700, config={axis={
labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={:how_often, axis={title="How often do you use a pack?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("how_often2.png", x)
p2 = df |> @vlplot(:bar, height=700, width=700, config={axis={
labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={:where, axis={title="Where do you use your pack most?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("where2.png", x)
p3 = df |> @vlplot(:bar, height=700, width=700, config={axis={
labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={:what, axis={title="What kind of load do you carry?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("what2.png", x)
#most_used plots
x=String[]
for item in skipmissing(df.most_used)
y=split(item, ",")
for i in y
z=strip(i)
push!(x, z)
end
end
function is_an_int(a)::Bool
return tryparse(Int, a) !== nothing
end
make=String[]
size=String[]
for i in x
q=split(i, " ")
push!(make, q[1])
if is_an_int(last(q))
push!(size, last(q))
end
end
p4 = @vlplot(:bar, height=700, width=700, config={axis={
labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={make, axis={title="What is your all time most used pack?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("most_used_make2.png", x)
p5 = @vlplot(:bar, height=700, width=700, config={axis={
labelFontSize=22, titleFontSize=26},scale={fontSize=22}} , x={size, axis={title="Size of most used pack?"}, sort="-y", bin={maxbins=6}}, y={"count()", axis={title="Count"}}) |> x -> save("most_used_size2.png", x)
#most_important plot
most_important=String[]
for item in skipmissing(df.most_important)
y=split(item, ",")
for i in y
z=strip(i) |> lowercase
push!(most_important, z)
end
end
l=levels(most_important) |> sort! #check categories and simplify
p6 = @vlplot(:bar, height=700, width=700, config={axis={
labelFontSize=22, titleFontSize=26},scale={fontSize=22}}, x={most_important, axis={title="What is most important in a pack?"}, sort="-y"}, y={"count()", axis={title="Count"}}) |> x -> save("most_important.png", x)