6RFPVLVLGAC62JWVZQF3CNTP74YRNDGDWXFCHKYFIYUNCCF7CGFQC
fn main() {
struct PageCandidate {
final_url: Url,
data: ByteBuf,
}
// map from original scraped url to retrieved page.
async fn all_urls_on_site_everywhere(cl: &Client) -> anyhow::Result<HashMap<Url, PageCandidate>> {
let mut pending: HashSet<Url> =
iter::once("https://mediabiasfactcheck.com/".try_into()?).collect();
let mut confirmed: HashMap<Url, PageCandidate> = HashMap::new();
let required_sleep = || tokio::time::sleep(std::time::Duration::from_secs_f32(0.25));
let afinder = Selector::parse("a").map_err(|e| anyhow!("this is no good :( {:?}", e))?;
while let Some(next_url) = pending.iter().next() {
pending.remove(next_url);
required_sleep();
let pg = cl.get(next_url).send().await?;
confirmed.push(PageCandidate {
final_url: pg.url.clone(),
data: pg.bytes().await?,
});
let doc = Html::parse_document(data);
for a_elem in doc.select(&afinder) {
if let Some(link) = a_elem.value().attr("href") {
match Url::parse(link) {
Ok(potentially_new_url) => {
if !confirmed.contains_key(potentially_new_url)
&& !pending.contains(potentially_new_url)
{
pending.push(potentially_new_url);
}
}
_ => {}
}
}
}
}
Ok(confirmed)
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
"time",
"winapi 0.3.9",
]
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
dependencies = [
"bitflags",
]
[[package]]
name = "cookie"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
dependencies = [
"time",
"url 1.7.2",
]
[[package]]
name = "cookie_store"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46750b3f362965f197996c4448e4a0935e791bf7d6631bfce9ee0af3d24c919c"
dependencies = [
"cookie",
"failure",
"idna 0.1.5",
"log",
"publicsuffix",
name = "crossbeam-deque"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
"maybe-uninit",
]
[[package]]
name = "crossbeam-epoch"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace"
dependencies = [
"autocfg 1.0.1",
"cfg-if 0.1.10",
"crossbeam-utils",
"lazy_static",
"maybe-uninit",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-queue"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570"
dependencies = [
"cfg-if 0.1.10",
"crossbeam-utils",
"maybe-uninit",
]
[[package]]
name = "crossbeam-utils"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
dependencies = [
"autocfg 1.0.1",
"cfg-if 0.1.10",
"lazy_static",
]
[[package]]
]
[[package]]
name = "error-chain"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc"
dependencies = [
"version_check",
]
[[package]]
name = "failure"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86"
dependencies = [
"backtrace",
"failure_derive",
]
[[package]]
name = "failure_derive"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "flate2"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7411863d55df97a419aa64cb4d2f167103ea9d767e2c54a1868b7ac3f6b47129"
dependencies = [
"cfg-if 1.0.0",
"crc32fast",
"libc",
"miniz_oxide",
"tokio-io",
]
[[package]]
name = "idna"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
"tokio",
"tokio-native-tls",
name = "mime_guess"
version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2684d4c2e97d99848d30b324b00c8fcc7e5c897b7cbb5819b09e7c90e8baf212"
dependencies = [
"mime",
"unicase",
]
[[package]]
name = "miniz_oxide"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d"
dependencies = [
"adler",
"autocfg 1.0.1",
]
[[package]]
"rand 0.6.5",
"rand",
]
[[package]]
name = "phf_macros"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
name = "pin-project"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95b70b68509f17aa2857863b6fa00bf21fc93674c7a8893de2f469f6aa7ca2f2"
dependencies = [
"pin-project-internal 1.0.4",
]
[[package]]
name = "pin-project-internal"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65ad2ae56b6abe3a1ee25f15ee605bacadb9a764edaba9c2bf4103800d4a1895"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pin-project-internal"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caa25a6393f22ce819b0f50e0be89287292fda8d425be38ee0ca14c4931d9e71"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pin-project-lite"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba36e0a6cc5a4c645073f4984f1ed55d09f5857d4de7c14550baa81a39ef5a17"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
]
[[package]]
name = "procedural-masquerade"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f1383dff4092fe903ac180e391a8d4121cc48f08ccf850614b0290c6673b69d"
[[package]]
name = "publicsuffix"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bbaa49075179162b49acac1c6aa45fb4dafb5f13cf6794276d77bc7fd95757b"
dependencies = [
"error-chain",
"idna 0.2.0",
"lazy_static",
"regex",
"url 2.2.0",
]
[[package]]
name = "rand"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
dependencies = [
"autocfg 0.1.7",
"libc",
"rand_chacha 0.1.1",
"rand_core 0.4.2",
"rand_hc 0.1.0",
"rand_isaac",
"rand_jitter",
"rand_os",
"rand_pcg",
"rand_xorshift",
"winapi 0.3.9",
"rand_chacha 0.2.2",
"rand_core 0.5.1",
"rand_hc 0.2.0",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
dependencies = [
"autocfg 0.1.7",
"rand_core 0.3.1",
"rand_chacha",
"rand_core",
"rand_hc",
"rand_pcg",
"rand_core 0.5.1",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
"rand_core",
dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rand_jitter"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
name = "rand_os"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
dependencies = [
"cloudabi",
"fuchsia-cprng",
"libc",
"rand_core 0.4.2",
"rdrand",
"winapi 0.3.9",
]
[[package]]
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
dependencies = [
"autocfg 0.1.7",
"rand_core 0.4.2",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
version = "0.2.1"
name = "regex"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"
[[package]]
name = "robotparser"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ff3fc3fbb7e0d7c2de0861baa7a92cc0a2348b1de88384009a1c0afe54f339"
dependencies = [
"reqwest",
"url 1.7.2",
]
[[package]]
name = "rustc-demangle"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e3bad0ee36814ca07d7968269dd4b7ec89ec2da10c4bb613928d3077083c232"
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
]
[[package]]
checksum = "09bc590ec4ba8ba87652da2068d150dcada2cfa2e07faae270a5e0409aa51351"
dependencies = [
"crossbeam-utils",
"futures",
"lazy_static",
"log",
"mio",
"num_cpus",
"parking_lot",
"slab",
"tokio-executor",
"tokio-io",
"tokio-sync",
]
checksum = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860"
"bytes",
"futures",
"iovec",
"mio",
"tokio-io",
"tokio-reactor",
]
[[package]]
name = "tokio-threadpool"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df720b6581784c118f0eb4310796b12b1d242a7eb95f716a8367855325c25f89"
dependencies = [
"crossbeam-deque",
"crossbeam-queue",
"crossbeam-utils",
"futures",
[[package]]
name = "try_from"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "283d3b89e1368717881a9d51dad843cc435380d8109c9e47d38780a324698d8b"
dependencies = [
"cfg-if 0.1.10",
]
[[package]]
name = "unicase"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
dependencies = [
"version_check",
]
name = "winapi"
version = "0.2.8"
name = "wasm-bindgen"
version = "0.2.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cd364751395ca0f68cafb17666eee36b63077fb5ecd972bbcd74c90c4bf736e"
dependencies = [
"cfg-if 1.0.0",
"serde",
"serde_json",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.69"
checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
checksum = "1114f89ab1f4106e5b55e688b828c0ab0ea593a1ea7c094b141b14cbaaec2d62"
dependencies = [
"bumpalo",
"lazy_static",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fe9756085a84584ee9457a002b7cdfe0bfff169f45d2591d8be1345a6780e35"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6ac8995ead1f084a8dea1e65f194d0973800c7f571f6edd70adf06ecf77084"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5a48c72f299d80557c7c62e37e7225369ecc0c963964059509fbafe917c7549"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
name = "wasm-bindgen-shared"
version = "0.2.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e7811dd7f9398f14cc76efd356f98f03aa30419dea46aa810d71e819fc97158"
[[package]]
name = "web-sys"
version = "0.3.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "222b1ef9334f92a21d3fb53dc3fd80f30836959a90f9274a626d7e06315ba3c3"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]