Add ItemAppearances to track price and page history
All checks were successful
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 3m30s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 3m57s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 4m9s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 9m50s
All checks were successful
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 3m30s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 3m57s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 4m9s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 9m50s
This commit is contained in:
138
src/db.rs
138
src/db.rs
@ -192,13 +192,81 @@ impl ParsedStorage {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||
pub struct ItemAppearances {
|
||||
pub item: i64,
|
||||
pub timestamp: DateTime<Utc>,
|
||||
pub category: String,
|
||||
pub current_bid_usd_cents: Option<i64>,
|
||||
}
|
||||
impl DBTable for ItemAppearances {
|
||||
const TABLE_NAME: &'static str = "Item_Appearances";
|
||||
const TABLE_SCHEMA: &'static str = "
|
||||
id INTEGER PRIMARY KEY,
|
||||
item INTEGER NOT NULL,
|
||||
category TEXT NOT NULL,
|
||||
timestamp INTEGER NOT NULL,
|
||||
current_bid_usd_cents INTEGER,
|
||||
UNIQUE(item, timestamp),
|
||||
FOREIGN KEY(item) REFERENCES Ebay_Items(item_id),
|
||||
FOREIGN KEY(category, timestamp) REFERENCES Pages_Parsed(category, timestamp)
|
||||
";
|
||||
}
|
||||
impl ItemAppearances {
|
||||
pub fn add_or_update(&self, conn: &Connection) {
|
||||
let count = conn
|
||||
.execute(
|
||||
&format!(
|
||||
"
|
||||
INSERT OR REPLACE INTO {}
|
||||
(item, timestamp, category, current_bid_usd_cents)
|
||||
VALUES
|
||||
(?1, ?2, ?3, ?4)",
|
||||
Self::TABLE_NAME
|
||||
),
|
||||
(
|
||||
self.item,
|
||||
&self.timestamp,
|
||||
&self.category,
|
||||
self.current_bid_usd_cents,
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
if count != 1 {
|
||||
panic!("Expected count to be 1 but got {}", count);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lookup(conn: &Connection, listing_id: i64) -> Vec<ItemAppearances> {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!(
|
||||
"
|
||||
SELECT * FROM {}
|
||||
WHERE item IS ?1",
|
||||
Self::TABLE_NAME,
|
||||
))
|
||||
.ok()
|
||||
.unwrap();
|
||||
stmt.query_map([listing_id], |row| {
|
||||
Ok(ItemAppearances {
|
||||
item: row.get(1)?,
|
||||
category: row.get(2)?,
|
||||
timestamp: row.get(3)?,
|
||||
current_bid_usd_cents: row.get(4)?,
|
||||
})
|
||||
})
|
||||
.ok()
|
||||
.unwrap()
|
||||
.map(|e| e.unwrap())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||
pub struct Listing {
|
||||
pub id: i64,
|
||||
pub item_id: i64,
|
||||
pub title: String,
|
||||
pub added_time: DateTime<Utc>,
|
||||
pub current_bid_price: Option<f64>,
|
||||
pub buy_it_now_price: Option<f64>,
|
||||
pub has_best_offer: bool,
|
||||
pub image_url: String,
|
||||
@ -209,8 +277,6 @@ impl DBTable for Listing {
|
||||
id INTEGER PRIMARY KEY,
|
||||
item_id INTEGER NOT NULL UNIQUE,
|
||||
title TEXT NOT NULL,
|
||||
added_time INTEGER NOT NULL,
|
||||
current_bid_usd_cents INTEGER,
|
||||
buy_it_now_usd_cents INTEGER,
|
||||
has_best_offer INTEGER NOT NULL,
|
||||
image_url TEXT NOT NULL
|
||||
@ -229,39 +295,43 @@ impl Listing {
|
||||
id: row.get(0)?,
|
||||
item_id: row.get(1)?,
|
||||
title: row.get(2)?,
|
||||
added_time: row.get(3)?,
|
||||
current_bid_price: row.get(4)?,
|
||||
buy_it_now_price: row.get(5)?,
|
||||
has_best_offer: row.get(6)?,
|
||||
image_url: row.get(7)?,
|
||||
buy_it_now_price: row.get(3)?,
|
||||
has_best_offer: row.get(4)?,
|
||||
image_url: row.get(5)?,
|
||||
})
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
|
||||
pub fn lookup_since(conn: &Connection, since: i64, limit: i64) -> Vec<Self> {
|
||||
pub fn lookup_since(conn: &Connection, since: DateTime<Utc>, limit: i64) -> Vec<Self> {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!(
|
||||
"SELECT * FROM {}
|
||||
WHERE added_time >= ?1
|
||||
ORDER BY added_time
|
||||
"
|
||||
SELECT *
|
||||
FROM {0}
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM {1}
|
||||
WHERE
|
||||
{1}.item = {0}.item_id AND
|
||||
{1}.timestamp >= ?1
|
||||
)
|
||||
LIMIT ?2
|
||||
",
|
||||
Self::TABLE_NAME
|
||||
Self::TABLE_NAME,
|
||||
ItemAppearances::TABLE_NAME
|
||||
))
|
||||
.ok()
|
||||
.unwrap();
|
||||
|
||||
stmt.query_map([since, limit], |row| {
|
||||
stmt.query_map([since.timestamp(), limit], |row| {
|
||||
Ok(Listing {
|
||||
id: row.get(0)?,
|
||||
item_id: row.get(1)?,
|
||||
title: row.get(2)?,
|
||||
added_time: row.get(3)?,
|
||||
current_bid_price: row.get(4)?,
|
||||
buy_it_now_price: row.get(5)?,
|
||||
has_best_offer: row.get(6)?,
|
||||
image_url: row.get(7)?,
|
||||
buy_it_now_price: row.get(3)?,
|
||||
has_best_offer: row.get(4)?,
|
||||
image_url: row.get(5)?,
|
||||
})
|
||||
})
|
||||
.ok()
|
||||
@ -297,20 +367,16 @@ impl Listing {
|
||||
(
|
||||
item_id,
|
||||
title,
|
||||
added_time,
|
||||
current_bid_usd_cents,
|
||||
buy_it_now_usd_cents,
|
||||
has_best_offer,
|
||||
image_url
|
||||
)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
|
||||
VALUES (?1, ?2, ?3, ?4, ?5)",
|
||||
Self::TABLE_NAME
|
||||
),
|
||||
(
|
||||
self.item_id,
|
||||
&self.title,
|
||||
self.added_time,
|
||||
self.current_bid_price,
|
||||
self.buy_it_now_price,
|
||||
self.has_best_offer,
|
||||
self.image_url.clone(),
|
||||
@ -334,6 +400,7 @@ pub fn get_initialized(path: Option<&Path>) -> Connection {
|
||||
Listing::initialize(&conn);
|
||||
ParsedStorage::initialize(&conn);
|
||||
ParsedPage::initialize(&conn);
|
||||
ItemAppearances::initialize(&conn);
|
||||
|
||||
conn
|
||||
}
|
||||
@ -357,8 +424,6 @@ mod tests {
|
||||
id: 1,
|
||||
item_id: 1234,
|
||||
title: "Some Title".to_string(),
|
||||
added_time: std::time::SystemTime::now().into(),
|
||||
current_bid_price: Some(0.12),
|
||||
buy_it_now_price: Some(1.23),
|
||||
has_best_offer: false,
|
||||
image_url: "google.com".to_string(),
|
||||
@ -383,6 +448,23 @@ mod tests {
|
||||
timestamp: std::time::SystemTime::now().into(),
|
||||
};
|
||||
page.add_or_update_db(&db);
|
||||
assert_eq!(ParsedPage::lookup_db(&db, page.timestamp), Some(page));
|
||||
assert_eq!(
|
||||
ParsedPage::lookup_db(&db, page.timestamp),
|
||||
Some(page.clone())
|
||||
);
|
||||
|
||||
let apperance = ItemAppearances {
|
||||
item: listing.item_id,
|
||||
timestamp: page.timestamp,
|
||||
category: page.category,
|
||||
current_bid_usd_cents: Some(1233),
|
||||
};
|
||||
apperance.add_or_update(&db);
|
||||
assert_eq!(
|
||||
ItemAppearances::lookup(&db, listing.item_id),
|
||||
vec![apperance]
|
||||
);
|
||||
|
||||
assert_eq!(Listing::lookup_since(&db, page.timestamp, 3), vec![listing]);
|
||||
}
|
||||
}
|
||||
|
29
src/main.rs
29
src/main.rs
@ -1,6 +1,9 @@
|
||||
use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
||||
use chrono::DateTime;
|
||||
use clap::Parser;
|
||||
use ebay_scraper_rust::db::{Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized};
|
||||
use ebay_scraper_rust::db::{
|
||||
ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
|
||||
};
|
||||
use ebay_scraper_rust::{parser_ebay, parser_storage};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
@ -27,6 +30,19 @@ async fn page_get(
|
||||
)))
|
||||
}
|
||||
|
||||
#[get("/listing/{id}/history")]
|
||||
async fn listing_history_get(
|
||||
db: Data<Mutex<rusqlite::Connection>>,
|
||||
id: web::Path<i64>,
|
||||
) -> Result<impl Responder> {
|
||||
let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id)
|
||||
.iter()
|
||||
.inspect(|e| info!("got: {:?}", e))
|
||||
.filter_map(|e| Some((e.timestamp, e.current_bid_usd_cents?)))
|
||||
.collect();
|
||||
Ok(web::Json(history))
|
||||
}
|
||||
|
||||
#[get("/listing/{id}")]
|
||||
async fn listing_get(
|
||||
db: Data<Mutex<rusqlite::Connection>>,
|
||||
@ -42,7 +58,7 @@ async fn listing_since_get(
|
||||
) -> Result<impl Responder> {
|
||||
Ok(web::Json(Listing::lookup_since(
|
||||
&db.lock().unwrap(),
|
||||
req.0,
|
||||
DateTime::from_timestamp(req.0, 0).unwrap(),
|
||||
req.1,
|
||||
)))
|
||||
}
|
||||
@ -147,13 +163,15 @@ async fn parse_post(
|
||||
parser_ebay::extract_data_from_html(
|
||||
&std::fs::read_to_string(dir.join(format!("{t}.html"))).unwrap(),
|
||||
×tamp,
|
||||
&category,
|
||||
)
|
||||
.unwrap()
|
||||
.iter()
|
||||
.for_each(|l| {
|
||||
.for_each(|lp| {
|
||||
cnt = cnt + 1;
|
||||
l.add_or_update(&db.lock().unwrap());
|
||||
info!("Inserting id:{}, title:{}", l.item_id, l.title);
|
||||
lp.0.add_or_update(&db.lock().unwrap());
|
||||
lp.1.add_or_update(&db.lock().unwrap());
|
||||
info!("Inserting id:{}, title:{}", lp.0.item_id, lp.0.title);
|
||||
});
|
||||
cnt
|
||||
})
|
||||
@ -178,6 +196,7 @@ async fn main() -> std::io::Result<()> {
|
||||
App::new()
|
||||
.service(page_get)
|
||||
.service(listing_get)
|
||||
.service(listing_history_get)
|
||||
.service(listing_since_get)
|
||||
.service(parse_post)
|
||||
.service(parse_listings)
|
||||
|
@ -1,4 +1,4 @@
|
||||
use crate::db::Listing;
|
||||
use crate::db::{ItemAppearances, Listing};
|
||||
use chrono::Utc;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
@ -52,7 +52,8 @@ fn parse_price(price_text: &str) -> Option<f64> {
|
||||
pub fn extract_data_from_html(
|
||||
html_content: &str,
|
||||
timestamp: &chrono::DateTime<Utc>,
|
||||
) -> Option<Vec<Listing>> {
|
||||
category: &str,
|
||||
) -> Option<Vec<(Listing, ItemAppearances)>> {
|
||||
let document = Html::parse_document(html_content);
|
||||
let mut items = Vec::new();
|
||||
|
||||
@ -148,16 +149,22 @@ pub fn extract_data_from_html(
|
||||
.map(|s| s.to_string())
|
||||
.unwrap();
|
||||
|
||||
items.push(Listing {
|
||||
items.push((
|
||||
Listing {
|
||||
title,
|
||||
id: 0,
|
||||
item_id: id?,
|
||||
added_time: *timestamp,
|
||||
current_bid_price,
|
||||
buy_it_now_price: final_buy_it_now_price,
|
||||
has_best_offer,
|
||||
image_url,
|
||||
});
|
||||
},
|
||||
ItemAppearances {
|
||||
item: id?,
|
||||
timestamp: *timestamp,
|
||||
current_bid_usd_cents: current_bid_price.map(|b| (b * 100.0).round() as i64),
|
||||
category: category.to_owned(),
|
||||
},
|
||||
));
|
||||
}
|
||||
Some(items)
|
||||
}
|
||||
@ -171,39 +178,48 @@ mod tests {
|
||||
fn parse() {
|
||||
let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
|
||||
let html = include_str!("../test_data/ebay_scraper/raw_scraped/ssd/1750369463.html");
|
||||
let parsed = extract_data_from_html(html, ×tamp).unwrap();
|
||||
let parsed = extract_data_from_html(html, ×tamp, "ssd").unwrap();
|
||||
// assert_eq!(parsed.len(), 62);
|
||||
|
||||
let parsed = parsed.first_chunk::<10>().unwrap();
|
||||
assert_eq!(
|
||||
parsed[0],
|
||||
(
|
||||
Listing {
|
||||
id: 0,
|
||||
item_id: 388484391867,
|
||||
title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(),
|
||||
added_time: timestamp,
|
||||
current_bid_price: None,
|
||||
buy_it_now_price: Some(59.99),
|
||||
has_best_offer: true,
|
||||
image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp"
|
||||
.to_string()
|
||||
},
|
||||
ItemAppearances {
|
||||
item: 388484391867,
|
||||
timestamp: timestamp,
|
||||
category: "ssd".to_owned(),
|
||||
current_bid_usd_cents: None
|
||||
}
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
parsed[4],
|
||||
Listing {
|
||||
(Listing {
|
||||
id: 0,
|
||||
item_id: 286605201240,
|
||||
title:
|
||||
"Fanxiang M.2 SSD 1TB NVMe PCIe Gen 3x 4 M2 Internal Solid State Drive 3500MB/s"
|
||||
.to_string(),
|
||||
added_time: timestamp,
|
||||
current_bid_price: Some(12.60),
|
||||
buy_it_now_price: None,
|
||||
has_best_offer: true,
|
||||
image_url: "https://i.ebayimg.com/images/g/3NoAAeSwPrtoDb1O/s-l500.webp"
|
||||
.to_string()
|
||||
}
|
||||
}, ItemAppearances {
|
||||
item: 286605201240,
|
||||
timestamp: timestamp,
|
||||
category: "ssd".to_owned(),
|
||||
current_bid_usd_cents: Some(1260)
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user