Add ItemAppearances to track price and page history
All checks were successful
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 3m30s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 3m57s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 4m9s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 9m50s

This commit is contained in:
2025-06-28 01:00:28 -04:00
parent b9cc62e3dd
commit 817b1d6275
3 changed files with 178 additions and 61 deletions

View File

@ -1,4 +1,4 @@
use crate::db::Listing;
use crate::db::{ItemAppearances, Listing};
use chrono::Utc;
use lazy_static::lazy_static;
use regex::Regex;
@ -52,7 +52,8 @@ fn parse_price(price_text: &str) -> Option<f64> {
pub fn extract_data_from_html(
html_content: &str,
timestamp: &chrono::DateTime<Utc>,
) -> Option<Vec<Listing>> {
category: &str,
) -> Option<Vec<(Listing, ItemAppearances)>> {
let document = Html::parse_document(html_content);
let mut items = Vec::new();
@ -148,16 +149,22 @@ pub fn extract_data_from_html(
.map(|s| s.to_string())
.unwrap();
items.push(Listing {
title,
id: 0,
item_id: id?,
added_time: *timestamp,
current_bid_price,
buy_it_now_price: final_buy_it_now_price,
has_best_offer,
image_url,
});
items.push((
Listing {
title,
id: 0,
item_id: id?,
buy_it_now_price: final_buy_it_now_price,
has_best_offer,
image_url,
},
ItemAppearances {
item: id?,
timestamp: *timestamp,
current_bid_usd_cents: current_bid_price.map(|b| (b * 100.0).round() as i64),
category: category.to_owned(),
},
));
}
Some(items)
}
@ -171,39 +178,48 @@ mod tests {
fn parse() {
let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
let html = include_str!("../test_data/ebay_scraper/raw_scraped/ssd/1750369463.html");
let parsed = extract_data_from_html(html, &timestamp).unwrap();
let parsed = extract_data_from_html(html, &timestamp, "ssd").unwrap();
// assert_eq!(parsed.len(), 62);
let parsed = parsed.first_chunk::<10>().unwrap();
assert_eq!(
parsed[0],
Listing {
id: 0,
item_id: 388484391867,
title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(),
added_time: timestamp,
current_bid_price: None,
buy_it_now_price: Some(59.99),
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp"
.to_string()
}
(
Listing {
id: 0,
item_id: 388484391867,
title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(),
buy_it_now_price: Some(59.99),
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp"
.to_string()
},
ItemAppearances {
item: 388484391867,
timestamp: timestamp,
category: "ssd".to_owned(),
current_bid_usd_cents: None
}
)
);
assert_eq!(
parsed[4],
Listing {
(Listing {
id: 0,
item_id: 286605201240,
title:
"Fanxiang M.2 SSD 1TB NVMe PCIe Gen 3x 4 M2 Internal Solid State Drive 3500MB/s"
.to_string(),
added_time: timestamp,
current_bid_price: Some(12.60),
buy_it_now_price: None,
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/3NoAAeSwPrtoDb1O/s-l500.webp"
.to_string()
}
}, ItemAppearances {
item: 286605201240,
timestamp: timestamp,
category: "ssd".to_owned(),
current_bid_usd_cents: Some(1260)
})
);
}
}