FAXOU7MRT62Y2SBC5PWCPLXF6KIRZOEHAIGBQU6NNL36D6MOOKEAC let doc = Html::parse_document(data);for a_elem in doc.select(&afinder) {if let Some(link) = a_elem.value().attr("href") {match Url::parse(link) {Ok(potentially_new_url) => {if !old_links.contains_key(&potentially_new_url)&& !pending.contains(&potentially_new_url)&& potentially_new_url.domain() == Some("mediabiasfactcheck.com"){println!("chosing to visit {}", potentially_new_url);pending.insert(potentially_new_url);} else {tracing::trace!("skipping {}", potentially_new_url);}}_ => {}}}}Ok(())}
let mut pending: HashSet<Url> =iter::once("https://mediabiasfactcheck.com/".try_into()?).collect();
let mut pending: HashSet<Url> = if confirmed.is_empty() {info!("starting from root, with no cache");iter::once("https://mediabiasfactcheck.com/".try_into()?).collect()} else {let mut new = HashSet::new();confirmed.values().map(|v| {find_new_links(std::str::from_utf8(&v.data).expect("nonutf8"),&confirmed,&mut new,)}).for_each(|_| ());info!("scraping through {:?}", new);new};
let doc = Html::parse_document(std::str::from_utf8(&data)?);for a_elem in doc.select(&afinder) {if let Some(link) = a_elem.value().attr("href") {match Url::parse(link) {Ok(potentially_new_url) => {if !confirmed.contains_key(&potentially_new_url)&& !pending.contains(&potentially_new_url)&& potentially_new_url.domain() == Some("mediabiasfactcheck.com"){tracing::trace!("chosing to visit {}", potentially_new_url);pending.insert(potentially_new_url);} else {tracing::trace!("skipping {}", potentially_new_url);}}_ => {}}}}
let mut confirmed = bincode::deserialize_from(std::io::BufReader::new(std::fs::File::open("phase1.blob").unwrap(),))
let mut confirmed = {let f = std::fs::File::open("phase1.blob").map_err(|e| anyhow!(e));f.and_then(|f| {bincode::deserialize_from(std::io::BufReader::new(f)).map_err(|e| anyhow!(e))})}