Files
ebay_scraper_rust/src/db.rs
hak8or b9cc62e3dd
All checks were successful
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 3m30s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 4m1s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m5s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 9m41s
Initial rough commit
2025-06-27 21:56:50 -04:00

389 lines
11 KiB
Rust

use chrono::{DateTime, Utc};
use rusqlite::Connection;
use serde::Serialize;
use std::path::Path;
use tracing::info;
trait DBTable {
const TABLE_NAME: &'static str;
const TABLE_SCHEMA: &'static str;
fn initialize(conn: &Connection) {
let create_table = &format!(
"CREATE TABLE IF NOT EXISTS {} (
{}
)",
Self::TABLE_NAME,
Self::TABLE_SCHEMA
);
info!("Creating table with following schema;");
info!("{} ({})", Self::TABLE_NAME, Self::TABLE_SCHEMA);
conn.execute(create_table, ()).unwrap();
}
}
#[derive(Serialize, Debug, PartialEq, Clone)]
pub struct SearchURL {
pub full_url: String,
pub name: String,
}
impl DBTable for SearchURL {
const TABLE_NAME: &'static str = "SearchURLs";
const TABLE_SCHEMA: &'static str = "
id INTEGER PRIMARY KEY,
url TEXT NOT NULL UNIQUE,
name TEXT NOT NULL UNIQUE";
}
impl SearchURL {
pub fn lookup(conn: &Connection, name: &str) -> Option<Self> {
let mut stmt = conn
.prepare(&format!(
"SELECT * FROM {} WHERE name = ?",
Self::TABLE_NAME
))
.ok()?;
stmt.query_one([name], |row| {
Ok(SearchURL {
// id: row.get(0)?,
full_url: row.get(1)?,
name: row.get(2)?,
})
})
.ok()
}
pub fn add_or_update(&self, conn: &Connection) {
let _ = conn
.execute(
&format!(
"INSERT OR REPLACE INTO {} (name, url) VALUES (?1, ?2)",
Self::TABLE_NAME
),
(&self.name, &self.full_url),
)
.unwrap();
}
pub fn names(conn: &Connection) -> Vec<String> {
let mut stmt = conn
.prepare(&format!("SELECT name FROM {}", Self::TABLE_NAME))
.ok()
.unwrap();
stmt.query_map([], |row| Ok(row.get(0)))
.ok()
.unwrap()
.map(|e| e.unwrap())
.flatten()
.collect()
}
}
#[derive(Serialize, Debug, PartialEq, Clone)]
pub struct ParsedPage {
pub timestamp: DateTime<Utc>,
pub category: String,
}
impl DBTable for ParsedPage {
const TABLE_NAME: &'static str = "Pages_Parsed";
const TABLE_SCHEMA: &'static str = "
id INTEGER PRIMARY KEY,
category TEXT NOT NULL,
timestamp INTEGER NOT NULL,
UNIQUE(category, timestamp)
FOREIGN KEY(category) REFERENCES SearchURLs(name)
";
}
impl ParsedPage {
pub fn lookup_db(conn: &Connection, timestamp: DateTime<Utc>) -> Option<Self> {
let mut stmt = conn
.prepare(&format!(
"SELECT * FROM {} WHERE timestamp = ?",
Self::TABLE_NAME
))
.ok()?;
stmt.query_one([timestamp], |row| {
Ok(ParsedPage {
// id: row.get(0)?,
category: row.get(1)?,
timestamp: row.get(2)?,
})
})
.ok()
}
pub fn add_or_update_db(&self, conn: &Connection) {
let _ = conn
.execute(
&format!(
"INSERT OR REPLACE INTO {} (category, timestamp) VALUES (?1, ?2)",
Self::TABLE_NAME
),
(&self.category, self.timestamp),
)
.unwrap();
}
}
#[derive(Serialize, Debug, PartialEq, Copy, Clone)]
pub struct ParsedStorage {
pub id: i64,
pub item: i64,
pub total_gigabytes: i64,
pub quantity: i64,
pub individual_size_gigabytes: i64,
pub parse_engine: i64,
pub needed_description_check: bool,
}
impl DBTable for ParsedStorage {
const TABLE_NAME: &'static str = "Storage_Parsed";
const TABLE_SCHEMA: &'static str = "
id INTEGER PRIMARY KEY,
item INTEGER,
total_gigabytes INTEGER,
quantity INTEGER,
sizes_gigabytes TEXT,
parse_engine INTEGER,
need_description_check INTEGER,
UNIQUE(item, parse_engine)
FOREIGN KEY(item) REFERENCES Ebay_Items(item_id)
";
}
impl ParsedStorage {
pub fn lookup_db(conn: &Connection, item: i64) -> Vec<ParsedStorage> {
let mut stmt = conn
.prepare(&format!("SELECT * FROM {} WHERE id = ?", Self::TABLE_NAME))
.ok()
.unwrap();
stmt.query_map([item], |row| {
Ok(ParsedStorage {
id: row.get(0)?,
item: row.get(1)?,
total_gigabytes: row.get(2)?,
quantity: row.get(3)?,
individual_size_gigabytes: {
let r: String = row.get(4)?;
r.parse().unwrap()
},
parse_engine: row.get(5)?,
needed_description_check: row.get(6)?,
})
})
.ok()
.unwrap()
.map(|e| e.unwrap())
.collect()
}
pub fn add_or_update_db(&self, conn: &Connection) {
let _ = conn.execute(&format!("
INSERT OR REPLACE INTO {}
(item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check)
VALUES
(?1, ?2, ?3, ?4, ?5, ?6)",
Self::TABLE_NAME),
(
&self.item,
self.total_gigabytes,
self.quantity,
self.individual_size_gigabytes.to_string(),
self.parse_engine,
self.needed_description_check
)
).unwrap();
}
}
#[derive(Serialize, Debug, PartialEq, Clone)]
pub struct Listing {
pub id: i64,
pub item_id: i64,
pub title: String,
pub added_time: DateTime<Utc>,
pub current_bid_price: Option<f64>,
pub buy_it_now_price: Option<f64>,
pub has_best_offer: bool,
pub image_url: String,
}
impl DBTable for Listing {
const TABLE_NAME: &'static str = "Ebay_Items";
const TABLE_SCHEMA: &'static str = "
id INTEGER PRIMARY KEY,
item_id INTEGER NOT NULL UNIQUE,
title TEXT NOT NULL,
added_time INTEGER NOT NULL,
current_bid_usd_cents INTEGER,
buy_it_now_usd_cents INTEGER,
has_best_offer INTEGER NOT NULL,
image_url TEXT NOT NULL
";
}
impl Listing {
pub fn lookup(conn: &Connection, item_id: i64) -> Option<Listing> {
let mut stmt = conn
.prepare(&format!(
"SELECT * FROM {} WHERE item_id = ?",
Self::TABLE_NAME
))
.ok()?;
stmt.query_one([item_id], |row| {
Ok(Listing {
id: row.get(0)?,
item_id: row.get(1)?,
title: row.get(2)?,
added_time: row.get(3)?,
current_bid_price: row.get(4)?,
buy_it_now_price: row.get(5)?,
has_best_offer: row.get(6)?,
image_url: row.get(7)?,
})
})
.ok()
}
pub fn lookup_since(conn: &Connection, since: i64, limit: i64) -> Vec<Self> {
let mut stmt = conn
.prepare(&format!(
"SELECT * FROM {}
WHERE added_time >= ?1
ORDER BY added_time
LIMIT ?2
",
Self::TABLE_NAME
))
.ok()
.unwrap();
stmt.query_map([since, limit], |row| {
Ok(Listing {
id: row.get(0)?,
item_id: row.get(1)?,
title: row.get(2)?,
added_time: row.get(3)?,
current_bid_price: row.get(4)?,
buy_it_now_price: row.get(5)?,
has_best_offer: row.get(6)?,
image_url: row.get(7)?,
})
})
.ok()
.unwrap()
.map(|e| e.unwrap())
.collect()
}
pub fn lookup_non_parsed(conn: &Connection) -> Vec<(i64, String)> {
let mut stmt = conn
.prepare(&format!(
"
SELECT ei.item_id, ei.title FROM {} AS ei
LEFT JOIN {} AS sp ON ei.item_id = sp.item
WHERE sp.item IS NULL",
Self::TABLE_NAME,
ParsedStorage::TABLE_NAME
))
.ok()
.unwrap();
stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
.ok()
.unwrap()
.map(|e| e.unwrap())
.collect()
}
pub fn add_or_update(&self, conn: &Connection) {
let count = conn
.execute(
&format!(
"INSERT OR REPLACE INTO {}
(
item_id,
title,
added_time,
current_bid_usd_cents,
buy_it_now_usd_cents,
has_best_offer,
image_url
)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
Self::TABLE_NAME
),
(
self.item_id,
&self.title,
self.added_time,
self.current_bid_price,
self.buy_it_now_price,
self.has_best_offer,
self.image_url.clone(),
),
)
.unwrap();
if count != 1 {
panic!("Expected count to be 1 but got {}", count);
}
}
}
pub fn get_initialized(path: Option<&Path>) -> Connection {
let conn = match path {
Some(p) => Connection::open(&p),
None => Connection::open_in_memory(),
}
.unwrap();
SearchURL::initialize(&conn);
Listing::initialize(&conn);
ParsedStorage::initialize(&conn);
ParsedPage::initialize(&conn);
conn
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanity_check() {
let db = get_initialized(None);
let searchurl = SearchURL {
full_url: "google".to_owned(),
name: "ssd".to_owned(),
};
searchurl.add_or_update(&db);
assert_eq!(SearchURL::lookup(&db, &searchurl.name), Some(searchurl));
let listing = Listing {
id: 1,
item_id: 1234,
title: "Some Title".to_string(),
added_time: std::time::SystemTime::now().into(),
current_bid_price: Some(0.12),
buy_it_now_price: Some(1.23),
has_best_offer: false,
image_url: "google.com".to_string(),
};
listing.add_or_update(&db);
assert_eq!(Listing::lookup(&db, listing.item_id), Some(listing.clone()));
let parsed = ParsedStorage {
id: 1,
item: 1234,
total_gigabytes: 13,
quantity: 3,
individual_size_gigabytes: 13,
parse_engine: 9,
needed_description_check: true,
};
parsed.add_or_update_db(&db);
assert_eq!(ParsedStorage::lookup_db(&db, listing.id), vec![parsed]);
let page = ParsedPage {
category: "ssd".to_owned(),
timestamp: std::time::SystemTime::now().into(),
};
page.add_or_update_db(&db);
assert_eq!(ParsedPage::lookup_db(&db, page.timestamp), Some(page));
}
}