Added stats, parallel parsing of pages, and filtered fetch of listings
All checks were successful
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 3m34s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m3s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 4m13s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 9m44s

This commit is contained in:
2025-07-06 23:38:27 -04:00
parent bbca1f3bcb
commit 448933ae67
7 changed files with 528 additions and 206 deletions

View File

@@ -18,7 +18,6 @@ fn parse_price(price_text: &str) -> Option<f64> {
if let Some(first_part) = lower_price_text.split(" to ").next() {
if let Some(caps) = PRICE_REGEX.captures(first_part) {
if let Some(price_match) = caps.get(1) {
info!("Price string:{:?} parsed!", price_match);
return price_match.as_str().replace(',', "").parse().ok();
}
}
@@ -49,7 +48,7 @@ fn parse_price(price_text: &str) -> Option<f64> {
}
/// Extracts item data from HTML content.
pub fn extract_data_from_html(
pub fn parse_from_ebay_page(
html_content: &str,
timestamp: &chrono::DateTime<Utc>,
category: &str,
@@ -98,7 +97,7 @@ pub fn extract_data_from_html(
continue;
}
if id.unwrap() == 123456 {
info!("Skipping {:?} due to bogus ID of 123456", element);
info!("Skipping element due to bogus ID of 123456");
continue;
}
@@ -178,7 +177,7 @@ mod tests {
fn parse() {
let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
let html = include_str!("../test_data/scraper/raw_scraped/ssd/1750369463.html");
let parsed = extract_data_from_html(html, &timestamp, "ssd").unwrap();
let parsed = parse_from_ebay_page(html, &timestamp, "ssd").unwrap();
// assert_eq!(parsed.len(), 62);
let parsed = parsed.first_chunk::<10>().unwrap();