B3H475WM3JE532SL7IGJIQBRXWHNDTHP2LH5IL67N46Z6QM75SFAC // map from original scraped url to retrieved page.async fn all_urls_on_site_everywhere(cl: &Client) -> anyhow::Result<HashMap<Url, PageCandidate>> {
/// add elements to confirmedasync fn all_urls_on_site_everywhere(mut stop: broadcast::Receiver<()>,cl: &Client,confirmed: &mut HashMap<Url, PageCandidate>,) -> anyhow::Result<()> {
required_sleep();let pg = cl.get(next_url).send().await?;confirmed.push(PageCandidate {final_url: pg.url.clone(),data: pg.bytes().await?,});let doc = Html::parse_document(data);
required_sleep().await;let pg = cl.get(next_url.clone()).send().await?;let final_url = pg.url().clone();let data = pg.bytes().await?;let doc = Html::parse_document(std::str::from_utf8(&data)?);
println!("Hello, world!");
let app = clap_app!(myapp =>(version: "0.1")(author: "ember")(about: "create a plot of media bias according to mediabiasfactcheck.com")(@subcommand first_scrape =>(about: "crawl all the website's urls ")(@arg verbose: -v --verbose "Verbose logging")));let args = app.get_matches();let (stop_tx, stop) = broadcast::channel(1);let interrupter = tokio::task::spawn(async move {tokio::signal::ctrl_c().await.expect("couldn't wait for signal?");stop_tx.send(()).expect("failed to broadcast stop signal");});let mut cl = reqwest::Client::new();tracing_subscriber::fmt().init();if let Some(phase1) = args.subcommand_matches("first-scrape") {if phase1.is_present("verbose") {// TODO: figure out how to decrease log level unless this is passed?}// load prev state from bloblet mut confirmed = bincode::deserialize_from(std::io::BufReader::new(std::fs::File::open("phase1.blob").unwrap(),)).unwrap_or(HashMap::new());all_urls_on_site_everywhere(stop, &mut cl, &mut confirmed).await?;bincode::serialize_into(std::io::BufWriter::new(std::fs::File::create("phase1.blob").expect("sad")),&confirmed,)?;}std::mem::forget(interrupter);Ok(())
name = "clap"version = "3.0.0-beta.2"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "4bd1061998a501ee7d4b6d449020df3266ca3124b941ec56cf2005c3779ca142"dependencies = ["atty","bitflags","clap_derive","indexmap","lazy_static","os_str_bytes","strsim","termcolor","textwrap","unicode-width","vec_map",][[package]]name = "clap_derive"version = "3.0.0-beta.2"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "370f715b81112975b1b69db93e0b56ea4cd4e5002ac43b2da8474106a54096a1"dependencies = ["heck","proc-macro-error","proc-macro2","quote","syn",][[package]]
name = "proc-macro-error"version = "1.0.4"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"dependencies = ["proc-macro-error-attr","proc-macro2","quote","syn","version_check",][[package]]name = "proc-macro-error-attr"version = "1.0.4"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"dependencies = ["proc-macro2","quote","version_check",][[package]]
name = "regex"version = "1.4.3"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"dependencies = ["regex-syntax",][[package]]name = "regex-automata"version = "0.1.9"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"dependencies = ["byteorder","regex-syntax",][[package]]name = "regex-syntax"version = "0.6.22"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"[[package]]
name = "termcolor"version = "1.1.2"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"dependencies = ["winapi-util",][[package]]name = "textwrap"version = "0.12.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "203008d98caf094106cfaba70acfed15e18ed3ddb7d94e49baec153a2b462789"dependencies = ["unicode-width",][[package]]
name = "tracing-log"version = "0.1.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "5e0f8c7178e13481ff6765bd169b33e8d554c5d2bbede5e32c356194be02b9b9"dependencies = ["lazy_static","log","tracing-core",][[package]]name = "tracing-serde"version = "0.1.2"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b"dependencies = ["serde","tracing-core",][[package]]name = "tracing-subscriber"version = "0.2.15"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "a1fa8f0c8f4c594e4fc9debc1990deab13238077271ba84dd853d54902ee3401"dependencies = ["ansi_term","chrono","lazy_static","matchers","regex","serde","serde_json","sharded-slab","smallvec","thread_local","tracing","tracing-core","tracing-log","tracing-serde",][[package]]
name = "vec_map"version = "0.8.2"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"[[package]]name = "version_check"version = "0.9.2"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"[[package]]