defmodule SomethingErlang.AwfulApi.Thread do require Logger alias SomethingErlang.AwfulApi.Client def compile(id, page, user) do doc = Client.thread_doc(id, page, user) html = Floki.parse_document!(doc) thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored") title = Floki.find(html, "title") |> Floki.text() title = title |> String.replace(" - The Something Awful Forums", "") page_count = case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do "" -> 1 s -> String.to_integer(s) end posts = for post <- Floki.find(thread, "table.post") do %{ userinfo: post |> userinfo(), postdate: post |> postdate(), postbody: post |> postbody() } end %{id: id, title: title, page: page, page_count: page_count, posts: posts} end defp userinfo(post) do user = Floki.find(post, "dl.userinfo") name = user |> Floki.find("dt") |> Floki.text() regdate = user |> Floki.find("dd.registered") |> Floki.text() title = user |> Floki.find("dd.title") |> List.first() |> Floki.children() |> Floki.raw_html() %{ name: name, regdate: regdate, title: title } end defp postdate(post) do date = Floki.find(post, "td.postdate") |> Floki.find("td.postdate") |> Floki.text() [month_text, day, year, hours, minutes] = date |> String.split(~r{[\s,:]}, trim: true) |> Enum.drop(1) month = 1 + Enum.find_index( ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], fn m -> m == month_text end ) NaiveDateTime.new!( year |> String.to_integer(), month, day |> String.to_integer(), hours |> String.to_integer(), minutes |> String.to_integer(), 0 ) end defp postbody(post) do body = Floki.find(post, "td.postbody") |> List.first() |> Floki.filter_out(:comment) Floki.traverse_and_update(body, fn {"img", attrs, []} -> transform(:img, attrs) {"a", attrs, children} -> transform(:a, attrs, children) other -> other end) |> Floki.children() |> Floki.raw_html() end defp transform(elem, attr, children \\ []) defp transform(:img, attrs, _children) do {"class", class} = List.keyfind(attrs, "class", 0, {"class", ""}) if class == "sa-smilie" do {"img", attrs, []} else t_attrs = List.keyreplace(attrs, "class", 0, {"class", "img-responsive"}) {"img", [{"loading", "lazy"} | t_attrs], []} end end defp transform(:a, attrs, children) do {"href", href} = List.keyfind(attrs, "href", 0, {"href", ""}) cond do # skip internal links String.starts_with?(href, "/") -> {"a", [{"href", href}], children} # mp4 String.ends_with?(href, ".mp4") -> transform_link(:mp4, href) # gifv String.ends_with?(href, ".gifv") -> transform_link(:gifv, href) # youtube String.starts_with?(href, "https://www.youtube.com/watch") -> transform_link(:ytlong, href) String.starts_with?(href, "https://youtu.be/") -> transform_link(:ytshort, href) true -> Logger.debug("no transform for #{href}") {"a", [{"href", href}], children} end end defp transform_link(:mp4, href), do: {"div", [{"class", "responsive-embed"}], [ {"video", [{"class", "img-responsive"}, {"controls", ""}], [{"source", [{"src", href}, {"type", "video/mp4"}], []}]} ]} defp transform_link(:gifv, href), do: {"div", [{"class", "responsive-embed"}], [ {"video", [{"class", "img-responsive"}, {"controls", ""}], [ {"source", [{"src", String.replace(href, ".gifv", ".webm")}, {"type", "video/webm"}], []}, {"source", [{"src", String.replace(href, ".gifv", ".mp4")}, {"type", "video/mp4"}], []} ]} ]} defp transform_link(:ytlong, href) do String.replace(href, "/watch?v=", "/embed/") |> youtube_iframe() end defp transform_link(:ytshort, href) do String.replace(href, "youtu.be/", "www.youtube.com/embed/") |> youtube_iframe() end defp youtube_iframe(src), do: {"div", [{"class", "responsive-embed"}], [ {"iframe", [ {"class", "youtube-player"}, {"loading", "lazy"}, {"allow", "fullscreen"}, {"src", src} ], []} ]} end