Initial terrible commit
This commit is contained in:
commit
382185f623
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
1834
Cargo.lock
generated
Normal file
1834
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "ebay_compute_scraper"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
scraper = "0.19.0"
|
||||
log = "0.4.21"
|
||||
anyhow = { version = "1.0.82", features = ["backtrace", "std"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = "0.3.18"
|
||||
regex = "1.10.4"
|
||||
reqwest = { version = "0.12.3", features = ["blocking"] }
|
155
EbayScrape_ryzen_1713039640.html
Normal file
155
EbayScrape_ryzen_1713039640.html
Normal file
File diff suppressed because one or more lines are too long
3
scrape.sh
Executable file
3
scrape.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/env bash
|
||||
|
||||
wget 'https://www.ebay.com/sch/179/i.html?_from=R40&_nkw=ryzen&_sop=15&_blrs=recall_filtering' -O EbayScrape_ryzen_$(date +%s).html
|
48
src/ebay_fetcher.rs
Normal file
48
src/ebay_fetcher.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use std::collections::HashMap;
|
||||
use anyhow::{bail, Result};
|
||||
use tracing::{info, error, instrument};
|
||||
use std::sync::mpsc::{Sender, Receiver};
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
use std::thread::Thread;
|
||||
|
||||
pub struct Context {
|
||||
pub urls: HashMap<String, String>,
|
||||
pub threads: Vec<std::thread::JoinHandle<Result<String, anyhow::Error>>>
|
||||
}
|
||||
|
||||
impl Default for Context {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
urls: HashMap::from([
|
||||
(
|
||||
"n100".to_string(),
|
||||
"https://www.ebay.com/sch/i.html?_from=R40&_nkw=n100&_sacat=171957&_sop=15&_blrs=recall_filtering&_ipg=240".to_string()
|
||||
),
|
||||
(
|
||||
"n305".to_string(),
|
||||
"https://www.ebay.com/sch/i.html?_from=R40&_nkw=n305&_sacat=171957&_sop=15&_blrs=recall_filtering&_ipg=240".to_string()
|
||||
)
|
||||
]),
|
||||
threads: Vec::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn ebay_fetch_html(&mut self, filename: String, url: String, outdir_dir: String) -> Result<()> {
|
||||
let thr = thread::spawn(move || {
|
||||
// let body = reqwest::blocking::get(url)?.text()?;
|
||||
|
||||
if url.is_empty() {
|
||||
Ok("hello".to_string())
|
||||
} else {
|
||||
bail!("hmmm")
|
||||
}
|
||||
});
|
||||
self.threads.push(thr);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
223
src/main.rs
Normal file
223
src/main.rs
Normal file
@ -0,0 +1,223 @@
|
||||
mod ebay_fetcher;
|
||||
|
||||
use std::{fs::File, io::Read, path::PathBuf};
|
||||
use scraper::{ElementRef, Html, Selector, selector::ToCss};
|
||||
use anyhow::{bail, Result};
|
||||
use tracing::{info, error, instrument};
|
||||
use tracing_subscriber::FmtSubscriber;
|
||||
use regex::Regex;
|
||||
use crate::ebay_fetcher::Context;
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct ParsedFile {
|
||||
filename: String,
|
||||
listings: Vec<EbayResult>
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone)]
|
||||
struct EbayResult {
|
||||
pub price: f64,
|
||||
pub shipping: f64,
|
||||
pub title: String,
|
||||
pub has_bids: bool,
|
||||
pub allows_best_offer: bool,
|
||||
pub item_num: u64,
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
fn parse_ebay_results(filepath: PathBuf) -> Result<Vec<EbayResult>> {
|
||||
let mut f = File::open(filepath).unwrap();
|
||||
let mut contents = String::new();
|
||||
f.read_to_string(&mut contents).unwrap();
|
||||
|
||||
let document = Html::parse_document(contents.as_str());
|
||||
|
||||
let selector = Selector::parse(".srp-results").unwrap();
|
||||
let srp_results = document.select(&selector).next().unwrap();
|
||||
|
||||
let (elems, errs): (Vec<_>, Vec<_>) = get_items(&srp_results)?
|
||||
.iter()
|
||||
.map(parse_item)
|
||||
.partition(Result::is_ok);
|
||||
for err in errs {
|
||||
error!("Hit error: {:?}", err);
|
||||
}
|
||||
|
||||
Ok(elems.into_iter().map(Result::unwrap).collect())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn get_items<'a>(page: &'a ElementRef<'a>) -> Result<Vec<ElementRef<'a>>> {
|
||||
let selector = Selector::parse(".s-item").unwrap();
|
||||
let found = page.select(&selector);
|
||||
|
||||
Ok(found.collect())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn parse_item_title(binding: &ElementRef) -> Result<String> {
|
||||
let selector = Selector::parse(".s-item__title").unwrap();
|
||||
|
||||
let found = binding.select(&selector);
|
||||
let mut iter = found.into_iter();
|
||||
if iter.clone().count() != 1 {
|
||||
bail!("Expecting only a single title per result! Found:{:?}", iter.count());
|
||||
}
|
||||
|
||||
let b1 = iter.next().unwrap().text().collect::<Vec<_>>();
|
||||
let b2 = b1.iter().filter(|&e| *e != "New Listing").collect::<Vec<_>>();
|
||||
if b2.len() != 1 {
|
||||
bail!("Only expecting one title section per result!");
|
||||
}
|
||||
|
||||
return Ok(b2.first().unwrap().to_string())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn parse_item_price(binding: &ElementRef) -> Result<f64> {
|
||||
let selector = Selector::parse(".s-item__price").unwrap();
|
||||
|
||||
let found = binding.select(&selector);
|
||||
let mut iter = found.into_iter();
|
||||
if iter.clone().count() != 1 {
|
||||
bail!("Expecting only a single price per result! Found:{}", iter.clone().count());
|
||||
}
|
||||
|
||||
let b1 = iter.next().unwrap().text().collect::<Vec<_>>();
|
||||
let b2 = b1.iter().filter(|&e| *e != "New Listing").collect::<Vec<_>>();
|
||||
let mut price_str = match b2.len() {
|
||||
1 => { *b1.first().unwrap() }
|
||||
3 => {
|
||||
if *b2[1] == " to " {
|
||||
bail!("Ignoring ranged listings, range:{:?}", b2);
|
||||
}
|
||||
bail!("Found three elements in pricing but unexpected values:{:?}", b2);
|
||||
}
|
||||
_ => { bail!("Found unexpected pricing: {:?}", b2); }
|
||||
};
|
||||
|
||||
price_str = price_str.trim_start_matches("$");
|
||||
Ok(price_str.parse().unwrap())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn parse_item_shipping(binding: &ElementRef) -> Result<f64> {
|
||||
let free_x_days_shipping = {
|
||||
let selector = Selector::parse(".s-item__freeXDays").unwrap();
|
||||
|
||||
match binding.select(&selector).count() {
|
||||
0 => { false }
|
||||
1 => { true }
|
||||
unknown => {
|
||||
bail!("Expecting only a single item__freeXDays per result! Found:{}", unknown);
|
||||
}
|
||||
}
|
||||
};
|
||||
if free_x_days_shipping {
|
||||
return Ok(0.00);
|
||||
}
|
||||
|
||||
let selector = Selector::parse(".s-item__shipping").unwrap();
|
||||
|
||||
let found = binding.select(&selector);
|
||||
let mut iter = found.into_iter();
|
||||
if iter.clone().count() != 1 {
|
||||
bail!("Expecting only a single shipping price per result! Found:{}", iter.clone().count());
|
||||
}
|
||||
|
||||
let b1 = iter.next().unwrap().text().collect::<Vec<_>>();
|
||||
if b1.len() != 1 {
|
||||
bail!("Expected only a single shipping price per result! Found:{:?}", b1);
|
||||
}
|
||||
let price_str = *b1.first().unwrap();
|
||||
if price_str == "Free shipping" {
|
||||
return Ok(0.00);
|
||||
}
|
||||
|
||||
let price_otherstr = Regex::new(r"\d+\.\d+")
|
||||
.unwrap()
|
||||
.find(price_str)
|
||||
.unwrap().as_str();
|
||||
Ok(price_otherstr.parse().unwrap())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn parse_item_ebay_itm(binding: &ElementRef) -> Result<u64> {
|
||||
let selector = Selector::parse(".s-item__link").unwrap();
|
||||
|
||||
let found = binding.select(&selector);
|
||||
let mut iter = found.into_iter();
|
||||
if iter.clone().count() != 1 {
|
||||
bail!("Expecting only a single item link per result! Found:{}", iter.clone().count());
|
||||
}
|
||||
|
||||
let b1 = iter.next().unwrap().attr("href").into_iter().collect::<Vec<_>>();
|
||||
if b1.len() != 1 {
|
||||
bail!("Expected only a single item link per result! Found:{:?}", b1);
|
||||
}
|
||||
let mut url = *b1.first().unwrap();
|
||||
url = url.trim_start_matches("https://www.ebay.com/itm/");
|
||||
|
||||
let price_otherstr = Regex::new(r"\d+")
|
||||
.unwrap()
|
||||
.find(url)
|
||||
.unwrap().as_str();
|
||||
Ok(price_otherstr.parse().unwrap())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn item_has_bids(binding: &ElementRef) -> bool {
|
||||
let selector_bids = Selector::parse(".s-item__bids").unwrap();
|
||||
// let selector_bidcount = Selector::parse("s-item__bidCount").unwrap();
|
||||
|
||||
match binding.select(&selector_bids).count() {
|
||||
0 => { false }
|
||||
1 => { true }
|
||||
uhm => { error!("Found {} an unexpected {} times", selector_bids.to_css_string(), uhm); false }
|
||||
}
|
||||
}
|
||||
|
||||
fn item_has_bestoffer(binding: &ElementRef) -> bool {
|
||||
let selector_bids = Selector::parse(".s-item__purchaseOptions").unwrap();
|
||||
let elems: Vec<&str> = binding.select(&selector_bids)
|
||||
.map(|e| e.text().collect::<Vec<_>>())
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter().flatten()
|
||||
.collect();
|
||||
|
||||
elems.contains(&"or Best Offer")
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn parse_item(elem: &ElementRef) -> Result<EbayResult> {
|
||||
Ok(EbayResult {
|
||||
price: parse_item_price(elem)?,
|
||||
title: parse_item_title(elem)?,
|
||||
has_bids: item_has_bids(elem),
|
||||
shipping: parse_item_shipping(elem)?,
|
||||
item_num: parse_item_ebay_itm(elem)?,
|
||||
allows_best_offer: item_has_bestoffer(elem)
|
||||
})
|
||||
}
|
||||
|
||||
fn main() -> Result<()>{
|
||||
tracing::subscriber::set_global_default(
|
||||
FmtSubscriber::builder()
|
||||
.with_max_level(tracing::Level::INFO)
|
||||
.finish()
|
||||
)
|
||||
.expect("setting default subscriber failed");
|
||||
|
||||
let parsed_files = vec![ParsedFile::default(); 0];
|
||||
|
||||
let mut ebay_ctx = ebay_fetcher::Context::default();
|
||||
ebay_ctx.ebay_fetch_html("filename".to_string(), "someurl".to_string(), "somedir".to_string())?;
|
||||
|
||||
for e in parse_ebay_results(PathBuf::from("EbayScrape_ryzen_1713039640.html"))? {
|
||||
info!("{:?}", e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue
Block a user