Initial terrible commit
This commit is contained in:
commit
382185f623
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/target
|
1834
Cargo.lock
generated
Normal file
1834
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[package]
|
||||||
|
name = "ebay_compute_scraper"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
scraper = "0.19.0"
|
||||||
|
log = "0.4.21"
|
||||||
|
anyhow = { version = "1.0.82", features = ["backtrace", "std"] }
|
||||||
|
tracing = "0.1.40"
|
||||||
|
tracing-subscriber = "0.3.18"
|
||||||
|
regex = "1.10.4"
|
||||||
|
reqwest = { version = "0.12.3", features = ["blocking"] }
|
155
EbayScrape_ryzen_1713039640.html
Normal file
155
EbayScrape_ryzen_1713039640.html
Normal file
File diff suppressed because one or more lines are too long
3
scrape.sh
Executable file
3
scrape.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/env bash
|
||||||
|
|
||||||
|
wget 'https://www.ebay.com/sch/179/i.html?_from=R40&_nkw=ryzen&_sop=15&_blrs=recall_filtering' -O EbayScrape_ryzen_$(date +%s).html
|
48
src/ebay_fetcher.rs
Normal file
48
src/ebay_fetcher.rs
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use anyhow::{bail, Result};
|
||||||
|
use tracing::{info, error, instrument};
|
||||||
|
use std::sync::mpsc::{Sender, Receiver};
|
||||||
|
use std::sync::mpsc;
|
||||||
|
use std::thread;
|
||||||
|
use std::thread::Thread;
|
||||||
|
|
||||||
|
pub struct Context {
|
||||||
|
pub urls: HashMap<String, String>,
|
||||||
|
pub threads: Vec<std::thread::JoinHandle<Result<String, anyhow::Error>>>
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Context {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
urls: HashMap::from([
|
||||||
|
(
|
||||||
|
"n100".to_string(),
|
||||||
|
"https://www.ebay.com/sch/i.html?_from=R40&_nkw=n100&_sacat=171957&_sop=15&_blrs=recall_filtering&_ipg=240".to_string()
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"n305".to_string(),
|
||||||
|
"https://www.ebay.com/sch/i.html?_from=R40&_nkw=n305&_sacat=171957&_sop=15&_blrs=recall_filtering&_ipg=240".to_string()
|
||||||
|
)
|
||||||
|
]),
|
||||||
|
threads: Vec::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Context {
|
||||||
|
pub fn ebay_fetch_html(&mut self, filename: String, url: String, outdir_dir: String) -> Result<()> {
|
||||||
|
let thr = thread::spawn(move || {
|
||||||
|
// let body = reqwest::blocking::get(url)?.text()?;
|
||||||
|
|
||||||
|
if url.is_empty() {
|
||||||
|
Ok("hello".to_string())
|
||||||
|
} else {
|
||||||
|
bail!("hmmm")
|
||||||
|
}
|
||||||
|
});
|
||||||
|
self.threads.push(thr);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
223
src/main.rs
Normal file
223
src/main.rs
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
mod ebay_fetcher;
|
||||||
|
|
||||||
|
use std::{fs::File, io::Read, path::PathBuf};
|
||||||
|
use scraper::{ElementRef, Html, Selector, selector::ToCss};
|
||||||
|
use anyhow::{bail, Result};
|
||||||
|
use tracing::{info, error, instrument};
|
||||||
|
use tracing_subscriber::FmtSubscriber;
|
||||||
|
use regex::Regex;
|
||||||
|
use crate::ebay_fetcher::Context;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Default, Clone)]
|
||||||
|
struct ParsedFile {
|
||||||
|
filename: String,
|
||||||
|
listings: Vec<EbayResult>
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct EbayResult {
|
||||||
|
pub price: f64,
|
||||||
|
pub shipping: f64,
|
||||||
|
pub title: String,
|
||||||
|
pub has_bids: bool,
|
||||||
|
pub allows_best_offer: bool,
|
||||||
|
pub item_num: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument]
|
||||||
|
fn parse_ebay_results(filepath: PathBuf) -> Result<Vec<EbayResult>> {
|
||||||
|
let mut f = File::open(filepath).unwrap();
|
||||||
|
let mut contents = String::new();
|
||||||
|
f.read_to_string(&mut contents).unwrap();
|
||||||
|
|
||||||
|
let document = Html::parse_document(contents.as_str());
|
||||||
|
|
||||||
|
let selector = Selector::parse(".srp-results").unwrap();
|
||||||
|
let srp_results = document.select(&selector).next().unwrap();
|
||||||
|
|
||||||
|
let (elems, errs): (Vec<_>, Vec<_>) = get_items(&srp_results)?
|
||||||
|
.iter()
|
||||||
|
.map(parse_item)
|
||||||
|
.partition(Result::is_ok);
|
||||||
|
for err in errs {
|
||||||
|
error!("Hit error: {:?}", err);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(elems.into_iter().map(Result::unwrap).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
fn get_items<'a>(page: &'a ElementRef<'a>) -> Result<Vec<ElementRef<'a>>> {
|
||||||
|
let selector = Selector::parse(".s-item").unwrap();
|
||||||
|
let found = page.select(&selector);
|
||||||
|
|
||||||
|
Ok(found.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
fn parse_item_title(binding: &ElementRef) -> Result<String> {
|
||||||
|
let selector = Selector::parse(".s-item__title").unwrap();
|
||||||
|
|
||||||
|
let found = binding.select(&selector);
|
||||||
|
let mut iter = found.into_iter();
|
||||||
|
if iter.clone().count() != 1 {
|
||||||
|
bail!("Expecting only a single title per result! Found:{:?}", iter.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
let b1 = iter.next().unwrap().text().collect::<Vec<_>>();
|
||||||
|
let b2 = b1.iter().filter(|&e| *e != "New Listing").collect::<Vec<_>>();
|
||||||
|
if b2.len() != 1 {
|
||||||
|
bail!("Only expecting one title section per result!");
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(b2.first().unwrap().to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
fn parse_item_price(binding: &ElementRef) -> Result<f64> {
|
||||||
|
let selector = Selector::parse(".s-item__price").unwrap();
|
||||||
|
|
||||||
|
let found = binding.select(&selector);
|
||||||
|
let mut iter = found.into_iter();
|
||||||
|
if iter.clone().count() != 1 {
|
||||||
|
bail!("Expecting only a single price per result! Found:{}", iter.clone().count());
|
||||||
|
}
|
||||||
|
|
||||||
|
let b1 = iter.next().unwrap().text().collect::<Vec<_>>();
|
||||||
|
let b2 = b1.iter().filter(|&e| *e != "New Listing").collect::<Vec<_>>();
|
||||||
|
let mut price_str = match b2.len() {
|
||||||
|
1 => { *b1.first().unwrap() }
|
||||||
|
3 => {
|
||||||
|
if *b2[1] == " to " {
|
||||||
|
bail!("Ignoring ranged listings, range:{:?}", b2);
|
||||||
|
}
|
||||||
|
bail!("Found three elements in pricing but unexpected values:{:?}", b2);
|
||||||
|
}
|
||||||
|
_ => { bail!("Found unexpected pricing: {:?}", b2); }
|
||||||
|
};
|
||||||
|
|
||||||
|
price_str = price_str.trim_start_matches("$");
|
||||||
|
Ok(price_str.parse().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
fn parse_item_shipping(binding: &ElementRef) -> Result<f64> {
|
||||||
|
let free_x_days_shipping = {
|
||||||
|
let selector = Selector::parse(".s-item__freeXDays").unwrap();
|
||||||
|
|
||||||
|
match binding.select(&selector).count() {
|
||||||
|
0 => { false }
|
||||||
|
1 => { true }
|
||||||
|
unknown => {
|
||||||
|
bail!("Expecting only a single item__freeXDays per result! Found:{}", unknown);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if free_x_days_shipping {
|
||||||
|
return Ok(0.00);
|
||||||
|
}
|
||||||
|
|
||||||
|
let selector = Selector::parse(".s-item__shipping").unwrap();
|
||||||
|
|
||||||
|
let found = binding.select(&selector);
|
||||||
|
let mut iter = found.into_iter();
|
||||||
|
if iter.clone().count() != 1 {
|
||||||
|
bail!("Expecting only a single shipping price per result! Found:{}", iter.clone().count());
|
||||||
|
}
|
||||||
|
|
||||||
|
let b1 = iter.next().unwrap().text().collect::<Vec<_>>();
|
||||||
|
if b1.len() != 1 {
|
||||||
|
bail!("Expected only a single shipping price per result! Found:{:?}", b1);
|
||||||
|
}
|
||||||
|
let price_str = *b1.first().unwrap();
|
||||||
|
if price_str == "Free shipping" {
|
||||||
|
return Ok(0.00);
|
||||||
|
}
|
||||||
|
|
||||||
|
let price_otherstr = Regex::new(r"\d+\.\d+")
|
||||||
|
.unwrap()
|
||||||
|
.find(price_str)
|
||||||
|
.unwrap().as_str();
|
||||||
|
Ok(price_otherstr.parse().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
fn parse_item_ebay_itm(binding: &ElementRef) -> Result<u64> {
|
||||||
|
let selector = Selector::parse(".s-item__link").unwrap();
|
||||||
|
|
||||||
|
let found = binding.select(&selector);
|
||||||
|
let mut iter = found.into_iter();
|
||||||
|
if iter.clone().count() != 1 {
|
||||||
|
bail!("Expecting only a single item link per result! Found:{}", iter.clone().count());
|
||||||
|
}
|
||||||
|
|
||||||
|
let b1 = iter.next().unwrap().attr("href").into_iter().collect::<Vec<_>>();
|
||||||
|
if b1.len() != 1 {
|
||||||
|
bail!("Expected only a single item link per result! Found:{:?}", b1);
|
||||||
|
}
|
||||||
|
let mut url = *b1.first().unwrap();
|
||||||
|
url = url.trim_start_matches("https://www.ebay.com/itm/");
|
||||||
|
|
||||||
|
let price_otherstr = Regex::new(r"\d+")
|
||||||
|
.unwrap()
|
||||||
|
.find(url)
|
||||||
|
.unwrap().as_str();
|
||||||
|
Ok(price_otherstr.parse().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
fn item_has_bids(binding: &ElementRef) -> bool {
|
||||||
|
let selector_bids = Selector::parse(".s-item__bids").unwrap();
|
||||||
|
// let selector_bidcount = Selector::parse("s-item__bidCount").unwrap();
|
||||||
|
|
||||||
|
match binding.select(&selector_bids).count() {
|
||||||
|
0 => { false }
|
||||||
|
1 => { true }
|
||||||
|
uhm => { error!("Found {} an unexpected {} times", selector_bids.to_css_string(), uhm); false }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn item_has_bestoffer(binding: &ElementRef) -> bool {
|
||||||
|
let selector_bids = Selector::parse(".s-item__purchaseOptions").unwrap();
|
||||||
|
let elems: Vec<&str> = binding.select(&selector_bids)
|
||||||
|
.map(|e| e.text().collect::<Vec<_>>())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter().flatten()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
elems.contains(&"or Best Offer")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[instrument(skip_all)]
|
||||||
|
fn parse_item(elem: &ElementRef) -> Result<EbayResult> {
|
||||||
|
Ok(EbayResult {
|
||||||
|
price: parse_item_price(elem)?,
|
||||||
|
title: parse_item_title(elem)?,
|
||||||
|
has_bids: item_has_bids(elem),
|
||||||
|
shipping: parse_item_shipping(elem)?,
|
||||||
|
item_num: parse_item_ebay_itm(elem)?,
|
||||||
|
allows_best_offer: item_has_bestoffer(elem)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()>{
|
||||||
|
tracing::subscriber::set_global_default(
|
||||||
|
FmtSubscriber::builder()
|
||||||
|
.with_max_level(tracing::Level::INFO)
|
||||||
|
.finish()
|
||||||
|
)
|
||||||
|
.expect("setting default subscriber failed");
|
||||||
|
|
||||||
|
let parsed_files = vec![ParsedFile::default(); 0];
|
||||||
|
|
||||||
|
let mut ebay_ctx = ebay_fetcher::Context::default();
|
||||||
|
ebay_ctx.ebay_fetch_html("filename".to_string(), "someurl".to_string(), "somedir".to_string())?;
|
||||||
|
|
||||||
|
for e in parse_ebay_results(PathBuf::from("EbayScrape_ryzen_1713039640.html"))? {
|
||||||
|
info!("{:?}", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user