Add ItemAppearances to track price and page history
All checks were successful
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 3m30s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 3m57s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 4m9s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 9m50s

This commit is contained in:
2025-06-28 01:00:28 -04:00
parent b9cc62e3dd
commit 817b1d6275
3 changed files with 178 additions and 61 deletions

138
src/db.rs
View File

@ -192,13 +192,81 @@ impl ParsedStorage {
}
}
#[derive(Serialize, Debug, PartialEq, Clone)]
pub struct ItemAppearances {
pub item: i64,
pub timestamp: DateTime<Utc>,
pub category: String,
pub current_bid_usd_cents: Option<i64>,
}
impl DBTable for ItemAppearances {
const TABLE_NAME: &'static str = "Item_Appearances";
const TABLE_SCHEMA: &'static str = "
id INTEGER PRIMARY KEY,
item INTEGER NOT NULL,
category TEXT NOT NULL,
timestamp INTEGER NOT NULL,
current_bid_usd_cents INTEGER,
UNIQUE(item, timestamp),
FOREIGN KEY(item) REFERENCES Ebay_Items(item_id),
FOREIGN KEY(category, timestamp) REFERENCES Pages_Parsed(category, timestamp)
";
}
impl ItemAppearances {
pub fn add_or_update(&self, conn: &Connection) {
let count = conn
.execute(
&format!(
"
INSERT OR REPLACE INTO {}
(item, timestamp, category, current_bid_usd_cents)
VALUES
(?1, ?2, ?3, ?4)",
Self::TABLE_NAME
),
(
self.item,
&self.timestamp,
&self.category,
self.current_bid_usd_cents,
),
)
.unwrap();
if count != 1 {
panic!("Expected count to be 1 but got {}", count);
}
}
pub fn lookup(conn: &Connection, listing_id: i64) -> Vec<ItemAppearances> {
let mut stmt = conn
.prepare(&format!(
"
SELECT * FROM {}
WHERE item IS ?1",
Self::TABLE_NAME,
))
.ok()
.unwrap();
stmt.query_map([listing_id], |row| {
Ok(ItemAppearances {
item: row.get(1)?,
category: row.get(2)?,
timestamp: row.get(3)?,
current_bid_usd_cents: row.get(4)?,
})
})
.ok()
.unwrap()
.map(|e| e.unwrap())
.collect()
}
}
#[derive(Serialize, Debug, PartialEq, Clone)]
pub struct Listing {
pub id: i64,
pub item_id: i64,
pub title: String,
pub added_time: DateTime<Utc>,
pub current_bid_price: Option<f64>,
pub buy_it_now_price: Option<f64>,
pub has_best_offer: bool,
pub image_url: String,
@ -209,8 +277,6 @@ impl DBTable for Listing {
id INTEGER PRIMARY KEY,
item_id INTEGER NOT NULL UNIQUE,
title TEXT NOT NULL,
added_time INTEGER NOT NULL,
current_bid_usd_cents INTEGER,
buy_it_now_usd_cents INTEGER,
has_best_offer INTEGER NOT NULL,
image_url TEXT NOT NULL
@ -229,39 +295,43 @@ impl Listing {
id: row.get(0)?,
item_id: row.get(1)?,
title: row.get(2)?,
added_time: row.get(3)?,
current_bid_price: row.get(4)?,
buy_it_now_price: row.get(5)?,
has_best_offer: row.get(6)?,
image_url: row.get(7)?,
buy_it_now_price: row.get(3)?,
has_best_offer: row.get(4)?,
image_url: row.get(5)?,
})
})
.ok()
}
pub fn lookup_since(conn: &Connection, since: i64, limit: i64) -> Vec<Self> {
pub fn lookup_since(conn: &Connection, since: DateTime<Utc>, limit: i64) -> Vec<Self> {
let mut stmt = conn
.prepare(&format!(
"SELECT * FROM {}
WHERE added_time >= ?1
ORDER BY added_time
"
SELECT *
FROM {0}
WHERE EXISTS (
SELECT 1
FROM {1}
WHERE
{1}.item = {0}.item_id AND
{1}.timestamp >= ?1
)
LIMIT ?2
",
Self::TABLE_NAME
Self::TABLE_NAME,
ItemAppearances::TABLE_NAME
))
.ok()
.unwrap();
stmt.query_map([since, limit], |row| {
stmt.query_map([since.timestamp(), limit], |row| {
Ok(Listing {
id: row.get(0)?,
item_id: row.get(1)?,
title: row.get(2)?,
added_time: row.get(3)?,
current_bid_price: row.get(4)?,
buy_it_now_price: row.get(5)?,
has_best_offer: row.get(6)?,
image_url: row.get(7)?,
buy_it_now_price: row.get(3)?,
has_best_offer: row.get(4)?,
image_url: row.get(5)?,
})
})
.ok()
@ -297,20 +367,16 @@ impl Listing {
(
item_id,
title,
added_time,
current_bid_usd_cents,
buy_it_now_usd_cents,
has_best_offer,
image_url
)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
VALUES (?1, ?2, ?3, ?4, ?5)",
Self::TABLE_NAME
),
(
self.item_id,
&self.title,
self.added_time,
self.current_bid_price,
self.buy_it_now_price,
self.has_best_offer,
self.image_url.clone(),
@ -334,6 +400,7 @@ pub fn get_initialized(path: Option<&Path>) -> Connection {
Listing::initialize(&conn);
ParsedStorage::initialize(&conn);
ParsedPage::initialize(&conn);
ItemAppearances::initialize(&conn);
conn
}
@ -357,8 +424,6 @@ mod tests {
id: 1,
item_id: 1234,
title: "Some Title".to_string(),
added_time: std::time::SystemTime::now().into(),
current_bid_price: Some(0.12),
buy_it_now_price: Some(1.23),
has_best_offer: false,
image_url: "google.com".to_string(),
@ -383,6 +448,23 @@ mod tests {
timestamp: std::time::SystemTime::now().into(),
};
page.add_or_update_db(&db);
assert_eq!(ParsedPage::lookup_db(&db, page.timestamp), Some(page));
assert_eq!(
ParsedPage::lookup_db(&db, page.timestamp),
Some(page.clone())
);
let apperance = ItemAppearances {
item: listing.item_id,
timestamp: page.timestamp,
category: page.category,
current_bid_usd_cents: Some(1233),
};
apperance.add_or_update(&db);
assert_eq!(
ItemAppearances::lookup(&db, listing.item_id),
vec![apperance]
);
assert_eq!(Listing::lookup_since(&db, page.timestamp, 3), vec![listing]);
}
}

View File

@ -1,6 +1,9 @@
use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
use chrono::DateTime;
use clap::Parser;
use ebay_scraper_rust::db::{Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized};
use ebay_scraper_rust::db::{
ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
};
use ebay_scraper_rust::{parser_ebay, parser_storage};
use std::path::{Path, PathBuf};
use std::sync::Mutex;
@ -27,6 +30,19 @@ async fn page_get(
)))
}
#[get("/listing/{id}/history")]
async fn listing_history_get(
db: Data<Mutex<rusqlite::Connection>>,
id: web::Path<i64>,
) -> Result<impl Responder> {
let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id)
.iter()
.inspect(|e| info!("got: {:?}", e))
.filter_map(|e| Some((e.timestamp, e.current_bid_usd_cents?)))
.collect();
Ok(web::Json(history))
}
#[get("/listing/{id}")]
async fn listing_get(
db: Data<Mutex<rusqlite::Connection>>,
@ -42,7 +58,7 @@ async fn listing_since_get(
) -> Result<impl Responder> {
Ok(web::Json(Listing::lookup_since(
&db.lock().unwrap(),
req.0,
DateTime::from_timestamp(req.0, 0).unwrap(),
req.1,
)))
}
@ -147,13 +163,15 @@ async fn parse_post(
parser_ebay::extract_data_from_html(
&std::fs::read_to_string(dir.join(format!("{t}.html"))).unwrap(),
&timestamp,
&category,
)
.unwrap()
.iter()
.for_each(|l| {
.for_each(|lp| {
cnt = cnt + 1;
l.add_or_update(&db.lock().unwrap());
info!("Inserting id:{}, title:{}", l.item_id, l.title);
lp.0.add_or_update(&db.lock().unwrap());
lp.1.add_or_update(&db.lock().unwrap());
info!("Inserting id:{}, title:{}", lp.0.item_id, lp.0.title);
});
cnt
})
@ -178,6 +196,7 @@ async fn main() -> std::io::Result<()> {
App::new()
.service(page_get)
.service(listing_get)
.service(listing_history_get)
.service(listing_since_get)
.service(parse_post)
.service(parse_listings)

View File

@ -1,4 +1,4 @@
use crate::db::Listing;
use crate::db::{ItemAppearances, Listing};
use chrono::Utc;
use lazy_static::lazy_static;
use regex::Regex;
@ -52,7 +52,8 @@ fn parse_price(price_text: &str) -> Option<f64> {
pub fn extract_data_from_html(
html_content: &str,
timestamp: &chrono::DateTime<Utc>,
) -> Option<Vec<Listing>> {
category: &str,
) -> Option<Vec<(Listing, ItemAppearances)>> {
let document = Html::parse_document(html_content);
let mut items = Vec::new();
@ -148,16 +149,22 @@ pub fn extract_data_from_html(
.map(|s| s.to_string())
.unwrap();
items.push(Listing {
title,
id: 0,
item_id: id?,
added_time: *timestamp,
current_bid_price,
buy_it_now_price: final_buy_it_now_price,
has_best_offer,
image_url,
});
items.push((
Listing {
title,
id: 0,
item_id: id?,
buy_it_now_price: final_buy_it_now_price,
has_best_offer,
image_url,
},
ItemAppearances {
item: id?,
timestamp: *timestamp,
current_bid_usd_cents: current_bid_price.map(|b| (b * 100.0).round() as i64),
category: category.to_owned(),
},
));
}
Some(items)
}
@ -171,39 +178,48 @@ mod tests {
fn parse() {
let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
let html = include_str!("../test_data/ebay_scraper/raw_scraped/ssd/1750369463.html");
let parsed = extract_data_from_html(html, &timestamp).unwrap();
let parsed = extract_data_from_html(html, &timestamp, "ssd").unwrap();
// assert_eq!(parsed.len(), 62);
let parsed = parsed.first_chunk::<10>().unwrap();
assert_eq!(
parsed[0],
Listing {
id: 0,
item_id: 388484391867,
title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(),
added_time: timestamp,
current_bid_price: None,
buy_it_now_price: Some(59.99),
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp"
.to_string()
}
(
Listing {
id: 0,
item_id: 388484391867,
title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(),
buy_it_now_price: Some(59.99),
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp"
.to_string()
},
ItemAppearances {
item: 388484391867,
timestamp: timestamp,
category: "ssd".to_owned(),
current_bid_usd_cents: None
}
)
);
assert_eq!(
parsed[4],
Listing {
(Listing {
id: 0,
item_id: 286605201240,
title:
"Fanxiang M.2 SSD 1TB NVMe PCIe Gen 3x 4 M2 Internal Solid State Drive 3500MB/s"
.to_string(),
added_time: timestamp,
current_bid_price: Some(12.60),
buy_it_now_price: None,
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/3NoAAeSwPrtoDb1O/s-l500.webp"
.to_string()
}
}, ItemAppearances {
item: 286605201240,
timestamp: timestamp,
category: "ssd".to_owned(),
current_bid_usd_cents: Some(1260)
})
);
}
}