use crate::DynResult;
use clap::ArgEnum;
use std::borrow::Borrow;
#[derive(ArgEnum, Debug, PartialEq, Eq, Copy, Clone)]
pub enum Gender {
Masculine,
Feminine,
Neutral,
}
impl Gender {
pub fn allow(&self, other: &Self) -> bool {
match self {
Self::Masculine => other != &Self::Feminine,
Self::Feminine => other != &Self::Masculine,
Self::Neutral => other == &Self::Neutral,
}
}
}
pub fn all_names(filter: Gender) -> tokio::sync::mpsc::Receiver<Vec<String>> {
let (tx, rx) = tokio::sync::mpsc::channel(1);
tokio::spawn(async move {
let client = reqwest::Client::builder().cookie_store(true).user_agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:95.0) Gecko/20100101 Firefox/95.0").build().unwrap();
let mut fetch = Fetch::start(&client);
loop {
match fetch.next().await {
Err(_) => {
tokio::time::sleep(std::time::Duration::new(120, 0)).await;
}
Ok(None) => {
break;
}
Ok(Some(page)) => {
let wait =
tokio::time::sleep(std::time::Duration::new(5, 0));
match tx
.send(
page.into_iter()
.filter_map(|(name, gender)| {
if filter.allow(&gender) {
Some(name)
} else {
None
}
})
.collect(),
)
.await
{
Ok(_) => (),
Err(_) => break,
};
wait.await;
}
};
}
});
rx
}
struct Fetch<'a> {
client: &'a reqwest::Client,
current_letter: char,
url: String,
form: Option<Vec<(String, String)>>,
}
impl<'a> Fetch<'a> {
fn start(client: &'a reqwest::Client) -> Self {
Self {
client,
current_letter: 'A',
url: String::from("https://babynames.com/names/A"),
form: None,
}
}
async fn next(&mut self) -> DynResult<Option<Vec<(String, Gender)>>> {
if self.current_letter > 'Z' {
return Ok(None);
}
let response = match &self.form {
None => self.client.get(&self.url),
Some(fields) => self.client.post(&self.url).form(fields),
}
.send()
.await?;
StatusError::ensure_success(response.status())?;
let text = response.text().await?;
let (names, next_url, form) = tokio::task::spawn_blocking(move || {
let doc = scraper::Html::parse_document(text.borrow());
let next_button =
scraper::Selector::parse("input.next-btn").unwrap();
let mut next_button = doc.select(&next_button);
let has_next = next_button.next().is_some();
let name_selector =
scraper::Selector::parse("ul.searchresults a").unwrap();
let names = doc
.select(&name_selector)
.map(|item| {
let gender = match item.value().attr("class") {
Some("M") => Gender::Masculine,
Some("F") => Gender::Feminine,
_ => Gender::Neutral,
};
let name: String = item.text().map(|s| s.trim()).collect();
(name, gender)
})
.collect();
let stepper =
scraper::Selector::parse("div.next-previous form").unwrap();
match doc.select(&stepper).next() {
Some(stepper) => {
let action = stepper.value().attr("action");
let form = scraper::Selector::parse("input").unwrap();
let form = stepper
.select(&form)
.filter_map(|input| {
let input = input.value();
if input.attr("type") == Some("submit")
&& input.attr("class") != Some("next-btn")
{
None
} else {
Some((
input.attr("name").map(String::from)?,
input.attr("value").map(String::from)?,
))
}
})
.collect();
if has_next {
(
names,
action.map(|url| {
if url.starts_with("http") {
String::from(url)
} else {
format!("https://babynames.com{}", url)
}
}),
Some(form),
)
} else {
(names, None, None)
}
}
None => (names, None, None),
}
})
.await?;
match form {
None => {
self.current_letter = unsafe {
char::from_u32_unchecked(self.current_letter as u32 + 1)
};
self.form = None;
}
Some(form) => {
self.form = Some(form);
}
}
self.url = match next_url {
None => {
format!("https://babynames.com/names/{}", self.current_letter)
}
Some(url) => url,
};
Ok(Some(names))
}
}
pub struct StatusError(reqwest::StatusCode);
impl StatusError {
fn ensure_success(status: reqwest::StatusCode) -> Result<(), Self> {
if status.is_success() {
Ok(())
} else {
Err(Self(status))
}
}
}
impl std::error::Error for StatusError {}
impl std::fmt::Debug for StatusError {
fn fmt(
&self,
formatter: &mut std::fmt::Formatter<'_>,
) -> Result<(), std::fmt::Error> {
<reqwest::StatusCode as std::fmt::Debug>::fmt(&self.0, formatter)
}
}
impl std::fmt::Display for StatusError {
fn fmt(
&self,
formatter: &mut std::fmt::Formatter<'_>,
) -> Result<(), std::fmt::Error> {
<reqwest::StatusCode as std::fmt::Display>::fmt(&self.0, formatter)
}
}