use std::borrow::Borrow;
#[derive(PartialEq, Eq, Copy, Clone)]
pub enum Gender {
Masculine,
Feminine,
Neutral,
}
impl Gender {
pub fn allow(&self, other: &Self) -> bool {
match self {
Self::Masculine => other != &Self::Feminine,
Self::Feminine => other != &Self::Masculine,
Self::Neutral => other == &Self::Neutral,
}
}
}
pub fn all_names(filter: Gender) -> tokio::sync::mpsc::Receiver<String> {
let (tx, rx) = tokio::sync::mpsc::channel(256);
let client = reqwest::Client::new();
for initial in 'A'..='Z' {
let tx = tx.clone();
let client = client.clone();
tokio::spawn(async move {
let mut page = 0;
loop {
let results =
fetch_name_page(&client, initial, page).await.unwrap();
for (name, gender) in &results.names {
if filter.allow(gender) {
tx.send(name.clone()).await.unwrap();
}
}
if results.has_next {
page += 1;
} else {
break;
}
}
});
}
rx
}
struct ResultPage {
names: Vec<(String, Gender)>,
has_next: bool,
}
async fn fetch_name_page(
client: &reqwest::Client,
letter: char,
page: usize,
) -> Result<ResultPage, Box<dyn std::error::Error>> {
let page_header = format!("+Baby+Names+starting+with+{}", letter);
let starts = String::from(letter);
let mut form = vec![
("advanced", "1"),
("starts", starts.borrow()),
("end", ""),
("meaning", ""),
("origin", ""),
("nat", ""),
("startswith", ""),
("endswith", ""),
("gender", ""),
("cat", ""),
("syl", ""),
("page_header", page_header.borrow()),
];
let mut offset = String::new();
match page {
0 => {
form.extend(
[("offset", "66"), ("offset", "66"), ("Next", "Previous")]
.iter(),
);
}
1 => {
form.extend([("offset", "0"), ("Next", "Next")].iter());
}
_ => {
offset = format!("{}", (page - 1) * 66);
form.extend(
[
("offset", offset.borrow()),
("offset", offset.borrow()),
("Next", "Next"),
]
.iter(),
);
}
}
let response = client
.post("https://babynames.com/names/search.php")
.form(&form[..])
.send()
.await?;
StatusError::ensure_success(response.status())?;
let text = response.text().await?;
Ok(tokio::task::spawn_blocking(move || {
let doc = scraper::Html::parse_document(text.borrow());
let next_button = scraper::Selector::parse("input.next-btn").unwrap();
let mut next_button = doc.select(&next_button);
let has_next = match next_button.next() {
None => false,
Some(button) => match button.value().attr("type").map(str::trim) {
Some("hidden") => true,
_ => false,
},
};
let name_selector =
scraper::Selector::parse("ul.searchresults a").unwrap();
let names = doc
.select(&name_selector)
.map(|item| {
let gender = match item.value().attr("class") {
Some("M") => Gender::Masculine,
Some("F") => Gender::Feminine,
_ => Gender::Neutral,
};
let name: String = item.text().map(|s| s.trim()).collect();
(name, gender)
})
.collect();
ResultPage { names, has_next }
})
.await?)
}
pub struct StatusError(reqwest::StatusCode);
impl StatusError {
fn ensure_success(status: reqwest::StatusCode) -> Result<(), Self> {
if status.is_success() {
Ok(())
} else {
Err(Self(status))
}
}
}
impl std::error::Error for StatusError {}
impl std::fmt::Debug for StatusError {
fn fmt(
&self,
formatter: &mut std::fmt::Formatter<'_>,
) -> Result<(), std::fmt::Error> {
<reqwest::StatusCode as std::fmt::Debug>::fmt(&self.0, formatter)
}
}
impl std::fmt::Display for StatusError {
fn fmt(
&self,
formatter: &mut std::fmt::Formatter<'_>,
) -> Result<(), std::fmt::Error> {
<reqwest::StatusCode as std::fmt::Display>::fmt(&self.0, formatter)
}
}