LPVC545KD6R5KNOTDPFDOD5FVA3KFBZARKVOLLXJXW3YK3RMNZKQC
YCWYAX6K2DJKT7FO4IAYL6HJOIJLYFKAPGLFJ5XMYSYAS42LP3FQC
C376NCOVRBWVVPEIM5GQ6Z3LI4PCXPIQQTLI3GBQWAABXCKDOWQAC
HMOBTVJ4FEPZWMUV2JDPZYH3EFCW6ED5M6KKNKQJQZVKTDAKTVFQC
PQ4BG3ZJU5SY6XQDJ3SURLAOWGXGXLONIRZNUJXZVMHLYXWOORXQC
RNW6D77774CYWWM7JIFXI5TGKBOU6ADJIEZB5N2FXGDCTLUXEQZQC
KUANIPWFDXDFKJ2LH4FQ6APYOPLNYJ5LVGLSBSC75WUUZPFHILCAC
for initial in 'A'..='Z' {
let mut page = 0;
loop {
let results =
fetch_name_page(&client, initial, page).await.unwrap();
for (name, gender) in &results.names {
if filter.allow(gender) {
tx.send(name.clone()).await.unwrap();
}
let client = reqwest::Client::builder().cookie_store(true).user_agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:95.0) Gecko/20100101 Firefox/95.0").build().unwrap();
let mut fetch = Fetch::start(&client);
loop {
dbg!(fetch.current_letter, &fetch.form);
let (delay, page) = match fetch.next().await {
Err(err) => {
dbg!(err);
(120, Vec::with_capacity(0))
async fn fetch_name_page(
client: &reqwest::Client,
letter: char,
page: usize,
) -> Result<ResultPage, Box<dyn std::error::Error>> {
let page_header = format!("+Baby+Names+starting+with+{}", letter);
let starts = String::from(letter);
let mut form = vec![
("advanced", "1"),
("starts", starts.borrow()),
("end", ""),
("meaning", ""),
("origin", ""),
("nat", ""),
("startswith", ""),
("endswith", ""),
("gender", ""),
("cat", ""),
("syl", ""),
("page_header", page_header.borrow()),
];
let offset;
match page {
0 => {
form.extend(
[("offset", "66"), ("offset", "66"), ("Next", "Previous")]
.iter(),
);
impl<'a> Fetch<'a> {
fn start(client: &'a reqwest::Client) -> Self {
Self {
client,
current_letter: 'A',
url: String::from("https://babynames.com/names/A"),
form: None,
_ => {
offset = format!("{}", (page - 1) * 66);
form.extend(
[
("offset", offset.borrow()),
("offset", offset.borrow()),
("Next", "Next"),
]
.iter(),
);
let response = match &self.form {
None => self.client.get(&self.url),
Some(fields) => self.client.post(&self.url).form(fields),
StatusError::ensure_success(response.status())?;
let text = response.text().await?;
Ok(tokio::task::spawn_blocking(move || {
let doc = scraper::Html::parse_document(text.borrow());
let next_button = scraper::Selector::parse("input.next-btn").unwrap();
let mut next_button = doc.select(&next_button);
let has_next = match next_button.next() {
None => false,
Some(button) => match button.value().attr("type").map(str::trim) {
Some("hidden") => true,
_ => false,
},
};
let name_selector =
scraper::Selector::parse("ul.searchresults a").unwrap();
let names = doc
.select(&name_selector)
.map(|item| {
let gender = match item.value().attr("class") {
Some("M") => Gender::Masculine,
Some("F") => Gender::Feminine,
_ => Gender::Neutral,
StatusError::ensure_success(response.status())?;
let text = response.text().await?;
let (names, next_url, form) = tokio::task::spawn_blocking(move || {
let doc = scraper::Html::parse_document(text.borrow());
let next_button =
scraper::Selector::parse("input.next-btn").unwrap();
let mut next_button = doc.select(&next_button);
let has_next = match next_button.next() {
None => false,
Some(_) => true,
};
let name_selector =
scraper::Selector::parse("ul.searchresults a").unwrap();
let names = doc
.select(&name_selector)
.map(|item| {
let gender = match item.value().attr("class") {
Some("M") => Gender::Masculine,
Some("F") => Gender::Feminine,
_ => Gender::Neutral,
};
let name: String = item.text().map(|s| s.trim()).collect();
(name, gender)
})
.collect();
let stepper =
scraper::Selector::parse("div.next-previous form").unwrap();
match doc.select(&stepper).next() {
Some(stepper) => {
let action = stepper.value().attr("action");
let form = scraper::Selector::parse("input").unwrap();
let form = stepper
.select(&form)
.filter_map(|input| {
let input = input.value();
if input.attr("type") == Some("submit")
&& input.attr("class") != Some("next-btn")
{
None
} else {
Some((
input.attr("name").map(String::from)?,
input.attr("value").map(String::from)?,
))
}
})
.collect();
if has_next {
(names, action.map(|url| {
if url.starts_with("http") {
String::from(url)
} else {
format!("https://babynames.com{}", url)
}
}), Some(form))
} else {
(names, None, None)
}
}
None => (names, None, None),
}
})
.await?;
match form {
None => {
self.current_letter = unsafe {
char::from_u32_unchecked(self.current_letter as u32 + 1)
let name: String = item.text().map(|s| s.trim()).collect();
(name, gender)
})
.collect();
ResultPage { names, has_next }
})
.await?)
self.form = None;
}
Some(form) => {
self.form = Some(form);
}
}
self.url = match next_url {
None => {
format!("https://babynames.com/names/{}", self.current_letter)
}
Some(url) => url,
};
Ok(Some(names))
}
name = "cookie"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5f1c7727e460397e56abc4bddc1d49e07a1ad78fc98eb2e1c8f032a58a2f80d"
dependencies = [
"percent-encoding",
"time",
"version_check",
]
[[package]]
name = "cookie_store"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3f7034c0932dc36f5bd8ec37368d971346809435824f277cb3b8299fc56167c"
dependencies = [
"cookie",
"idna",
"log",
"publicsuffix",
"serde",
"serde_json",
"time",
"url",
]
[[package]]
name = "psl-types"
version = "2.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8eda7c62d9ecaafdf8b62374c006de0adf61666ae96a96ba74a37134aa4e470"
[[package]]
name = "publicsuffix"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "292972edad6bbecc137ab84c5e36421a4a6c979ea31d3cc73540dd04315b33e1"
dependencies = [
"byteorder",
"hashbrown",
"idna",
"psl-types",
]
[[package]]
name = "standback"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff"
dependencies = [
"version_check",
]
[[package]]
name = "stdweb"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5"
dependencies = [
"discard",
"rustc_version 0.2.3",
"stdweb-derive",
"stdweb-internal-macros",
"stdweb-internal-runtime",
"wasm-bindgen",
]
[[package]]
name = "stdweb-derive"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef"
dependencies = [
"proc-macro2",
"quote",
"serde",
"serde_derive",
"syn",
]
[[package]]
name = "stdweb-internal-macros"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11"
dependencies = [
"base-x",
"proc-macro2",
"quote",
"serde",
"serde_derive",
"serde_json",
"sha1",
"syn",
]
[[package]]
name = "stdweb-internal-runtime"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
[[package]]
[[package]]
name = "time"
version = "0.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4752a97f8eebd6854ff91f1c1824cd6160626ac4bd44287f7f4ea2035a02a242"
dependencies = [
"const_fn",
"libc",
"standback",
"stdweb",
"time-macros",
"version_check",
"winapi",
]
[[package]]
name = "time-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1"
dependencies = [
"proc-macro-hack",
"time-macros-impl",
]