From 817b1d62752ea3143906f0205eb342658e93aeff Mon Sep 17 00:00:00 2001 From: hak8or Date: Sat, 28 Jun 2025 01:00:28 -0400 Subject: [PATCH] Add ItemAppearances to track price and page history --- src/db.rs | 138 ++++++++++++++++++++++++++++++++++++--------- src/main.rs | 29 ++++++++-- src/parser_ebay.rs | 72 ++++++++++++++--------- 3 files changed, 178 insertions(+), 61 deletions(-) diff --git a/src/db.rs b/src/db.rs index 658d991..8717af8 100644 --- a/src/db.rs +++ b/src/db.rs @@ -192,13 +192,81 @@ impl ParsedStorage { } } +#[derive(Serialize, Debug, PartialEq, Clone)] +pub struct ItemAppearances { + pub item: i64, + pub timestamp: DateTime, + pub category: String, + pub current_bid_usd_cents: Option, +} +impl DBTable for ItemAppearances { + const TABLE_NAME: &'static str = "Item_Appearances"; + const TABLE_SCHEMA: &'static str = " + id INTEGER PRIMARY KEY, + item INTEGER NOT NULL, + category TEXT NOT NULL, + timestamp INTEGER NOT NULL, + current_bid_usd_cents INTEGER, + UNIQUE(item, timestamp), + FOREIGN KEY(item) REFERENCES Ebay_Items(item_id), + FOREIGN KEY(category, timestamp) REFERENCES Pages_Parsed(category, timestamp) + "; +} +impl ItemAppearances { + pub fn add_or_update(&self, conn: &Connection) { + let count = conn + .execute( + &format!( + " + INSERT OR REPLACE INTO {} + (item, timestamp, category, current_bid_usd_cents) + VALUES + (?1, ?2, ?3, ?4)", + Self::TABLE_NAME + ), + ( + self.item, + &self.timestamp, + &self.category, + self.current_bid_usd_cents, + ), + ) + .unwrap(); + if count != 1 { + panic!("Expected count to be 1 but got {}", count); + } + } + + pub fn lookup(conn: &Connection, listing_id: i64) -> Vec { + let mut stmt = conn + .prepare(&format!( + " + SELECT * FROM {} + WHERE item IS ?1", + Self::TABLE_NAME, + )) + .ok() + .unwrap(); + stmt.query_map([listing_id], |row| { + Ok(ItemAppearances { + item: row.get(1)?, + category: row.get(2)?, + timestamp: row.get(3)?, + current_bid_usd_cents: row.get(4)?, + }) + }) + .ok() + .unwrap() + .map(|e| e.unwrap()) + .collect() + } +} + #[derive(Serialize, Debug, PartialEq, Clone)] pub struct Listing { pub id: i64, pub item_id: i64, pub title: String, - pub added_time: DateTime, - pub current_bid_price: Option, pub buy_it_now_price: Option, pub has_best_offer: bool, pub image_url: String, @@ -209,8 +277,6 @@ impl DBTable for Listing { id INTEGER PRIMARY KEY, item_id INTEGER NOT NULL UNIQUE, title TEXT NOT NULL, - added_time INTEGER NOT NULL, - current_bid_usd_cents INTEGER, buy_it_now_usd_cents INTEGER, has_best_offer INTEGER NOT NULL, image_url TEXT NOT NULL @@ -229,39 +295,43 @@ impl Listing { id: row.get(0)?, item_id: row.get(1)?, title: row.get(2)?, - added_time: row.get(3)?, - current_bid_price: row.get(4)?, - buy_it_now_price: row.get(5)?, - has_best_offer: row.get(6)?, - image_url: row.get(7)?, + buy_it_now_price: row.get(3)?, + has_best_offer: row.get(4)?, + image_url: row.get(5)?, }) }) .ok() } - pub fn lookup_since(conn: &Connection, since: i64, limit: i64) -> Vec { + pub fn lookup_since(conn: &Connection, since: DateTime, limit: i64) -> Vec { let mut stmt = conn .prepare(&format!( - "SELECT * FROM {} - WHERE added_time >= ?1 - ORDER BY added_time + " + SELECT * + FROM {0} + WHERE EXISTS ( + SELECT 1 + FROM {1} + WHERE + {1}.item = {0}.item_id AND + {1}.timestamp >= ?1 + ) LIMIT ?2 ", - Self::TABLE_NAME + Self::TABLE_NAME, + ItemAppearances::TABLE_NAME )) .ok() .unwrap(); - stmt.query_map([since, limit], |row| { + stmt.query_map([since.timestamp(), limit], |row| { Ok(Listing { id: row.get(0)?, item_id: row.get(1)?, title: row.get(2)?, - added_time: row.get(3)?, - current_bid_price: row.get(4)?, - buy_it_now_price: row.get(5)?, - has_best_offer: row.get(6)?, - image_url: row.get(7)?, + buy_it_now_price: row.get(3)?, + has_best_offer: row.get(4)?, + image_url: row.get(5)?, }) }) .ok() @@ -297,20 +367,16 @@ impl Listing { ( item_id, title, - added_time, - current_bid_usd_cents, buy_it_now_usd_cents, has_best_offer, image_url ) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + VALUES (?1, ?2, ?3, ?4, ?5)", Self::TABLE_NAME ), ( self.item_id, &self.title, - self.added_time, - self.current_bid_price, self.buy_it_now_price, self.has_best_offer, self.image_url.clone(), @@ -334,6 +400,7 @@ pub fn get_initialized(path: Option<&Path>) -> Connection { Listing::initialize(&conn); ParsedStorage::initialize(&conn); ParsedPage::initialize(&conn); + ItemAppearances::initialize(&conn); conn } @@ -357,8 +424,6 @@ mod tests { id: 1, item_id: 1234, title: "Some Title".to_string(), - added_time: std::time::SystemTime::now().into(), - current_bid_price: Some(0.12), buy_it_now_price: Some(1.23), has_best_offer: false, image_url: "google.com".to_string(), @@ -383,6 +448,23 @@ mod tests { timestamp: std::time::SystemTime::now().into(), }; page.add_or_update_db(&db); - assert_eq!(ParsedPage::lookup_db(&db, page.timestamp), Some(page)); + assert_eq!( + ParsedPage::lookup_db(&db, page.timestamp), + Some(page.clone()) + ); + + let apperance = ItemAppearances { + item: listing.item_id, + timestamp: page.timestamp, + category: page.category, + current_bid_usd_cents: Some(1233), + }; + apperance.add_or_update(&db); + assert_eq!( + ItemAppearances::lookup(&db, listing.item_id), + vec![apperance] + ); + + assert_eq!(Listing::lookup_since(&db, page.timestamp, 3), vec![listing]); } } diff --git a/src/main.rs b/src/main.rs index 8364d52..25fddff 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,9 @@ use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data}; +use chrono::DateTime; use clap::Parser; -use ebay_scraper_rust::db::{Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized}; +use ebay_scraper_rust::db::{ + ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized, +}; use ebay_scraper_rust::{parser_ebay, parser_storage}; use std::path::{Path, PathBuf}; use std::sync::Mutex; @@ -27,6 +30,19 @@ async fn page_get( ))) } +#[get("/listing/{id}/history")] +async fn listing_history_get( + db: Data>, + id: web::Path, +) -> Result { + let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id) + .iter() + .inspect(|e| info!("got: {:?}", e)) + .filter_map(|e| Some((e.timestamp, e.current_bid_usd_cents?))) + .collect(); + Ok(web::Json(history)) +} + #[get("/listing/{id}")] async fn listing_get( db: Data>, @@ -42,7 +58,7 @@ async fn listing_since_get( ) -> Result { Ok(web::Json(Listing::lookup_since( &db.lock().unwrap(), - req.0, + DateTime::from_timestamp(req.0, 0).unwrap(), req.1, ))) } @@ -147,13 +163,15 @@ async fn parse_post( parser_ebay::extract_data_from_html( &std::fs::read_to_string(dir.join(format!("{t}.html"))).unwrap(), ×tamp, + &category, ) .unwrap() .iter() - .for_each(|l| { + .for_each(|lp| { cnt = cnt + 1; - l.add_or_update(&db.lock().unwrap()); - info!("Inserting id:{}, title:{}", l.item_id, l.title); + lp.0.add_or_update(&db.lock().unwrap()); + lp.1.add_or_update(&db.lock().unwrap()); + info!("Inserting id:{}, title:{}", lp.0.item_id, lp.0.title); }); cnt }) @@ -178,6 +196,7 @@ async fn main() -> std::io::Result<()> { App::new() .service(page_get) .service(listing_get) + .service(listing_history_get) .service(listing_since_get) .service(parse_post) .service(parse_listings) diff --git a/src/parser_ebay.rs b/src/parser_ebay.rs index 76e12a6..fa7241b 100644 --- a/src/parser_ebay.rs +++ b/src/parser_ebay.rs @@ -1,4 +1,4 @@ -use crate::db::Listing; +use crate::db::{ItemAppearances, Listing}; use chrono::Utc; use lazy_static::lazy_static; use regex::Regex; @@ -52,7 +52,8 @@ fn parse_price(price_text: &str) -> Option { pub fn extract_data_from_html( html_content: &str, timestamp: &chrono::DateTime, -) -> Option> { + category: &str, +) -> Option> { let document = Html::parse_document(html_content); let mut items = Vec::new(); @@ -148,16 +149,22 @@ pub fn extract_data_from_html( .map(|s| s.to_string()) .unwrap(); - items.push(Listing { - title, - id: 0, - item_id: id?, - added_time: *timestamp, - current_bid_price, - buy_it_now_price: final_buy_it_now_price, - has_best_offer, - image_url, - }); + items.push(( + Listing { + title, + id: 0, + item_id: id?, + buy_it_now_price: final_buy_it_now_price, + has_best_offer, + image_url, + }, + ItemAppearances { + item: id?, + timestamp: *timestamp, + current_bid_usd_cents: current_bid_price.map(|b| (b * 100.0).round() as i64), + category: category.to_owned(), + }, + )); } Some(items) } @@ -171,39 +178,48 @@ mod tests { fn parse() { let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap(); let html = include_str!("../test_data/ebay_scraper/raw_scraped/ssd/1750369463.html"); - let parsed = extract_data_from_html(html, ×tamp).unwrap(); + let parsed = extract_data_from_html(html, ×tamp, "ssd").unwrap(); // assert_eq!(parsed.len(), 62); let parsed = parsed.first_chunk::<10>().unwrap(); assert_eq!( parsed[0], - Listing { - id: 0, - item_id: 388484391867, - title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(), - added_time: timestamp, - current_bid_price: None, - buy_it_now_price: Some(59.99), - has_best_offer: true, - image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp" - .to_string() - } + ( + Listing { + id: 0, + item_id: 388484391867, + title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(), + buy_it_now_price: Some(59.99), + has_best_offer: true, + image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp" + .to_string() + }, + ItemAppearances { + item: 388484391867, + timestamp: timestamp, + category: "ssd".to_owned(), + current_bid_usd_cents: None + } + ) ); assert_eq!( parsed[4], - Listing { + (Listing { id: 0, item_id: 286605201240, title: "Fanxiang M.2 SSD 1TB NVMe PCIe Gen 3x 4 M2 Internal Solid State Drive 3500MB/s" .to_string(), - added_time: timestamp, - current_bid_price: Some(12.60), buy_it_now_price: None, has_best_offer: true, image_url: "https://i.ebayimg.com/images/g/3NoAAeSwPrtoDb1O/s-l500.webp" .to_string() - } + }, ItemAppearances { + item: 286605201240, + timestamp: timestamp, + category: "ssd".to_owned(), + current_bid_usd_cents: Some(1260) + }) ); } }