Added stats, parallel parsing of pages, and filtered fetch of listings

2025-07-06 23:38:27 -04:00
parent bbca1f3bcb
commit 448933ae67
7 changed files with 528 additions and 206 deletions
--- a/src/parser_ebay.rs
+++ b/src/parser_ebay.rs
@@ -18,7 +18,6 @@ fn parse_price(price_text: &str) -> Option<f64> {
        if let Some(first_part) = lower_price_text.split(" to ").next() {
            if let Some(caps) = PRICE_REGEX.captures(first_part) {
                if let Some(price_match) = caps.get(1) {
-                    info!("Price string:{:?} parsed!", price_match);
                    return price_match.as_str().replace(',', "").parse().ok();
                }
            }
@@ -49,7 +48,7 @@ fn parse_price(price_text: &str) -> Option<f64> {
 }

 /// Extracts item data from HTML content.
-pub fn extract_data_from_html(
+pub fn parse_from_ebay_page(
    html_content: &str,
    timestamp: &chrono::DateTime<Utc>,
    category: &str,
@@ -98,7 +97,7 @@ pub fn extract_data_from_html(
            continue;
        }
        if id.unwrap() == 123456 {
-            info!("Skipping {:?} due to bogus ID of 123456", element);
+            info!("Skipping element due to bogus ID of 123456");
            continue;
        }

@@ -178,7 +177,7 @@ mod tests {
    fn parse() {
        let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
        let html = include_str!("../test_data/scraper/raw_scraped/ssd/1750369463.html");
-        let parsed = extract_data_from_html(html, &timestamp, "ssd").unwrap();
+        let parsed = parse_from_ebay_page(html, &timestamp, "ssd").unwrap();
        // assert_eq!(parsed.len(), 62);

        let parsed = parsed.first_chunk::<10>().unwrap();