Added stats, parallel parsing of pages, and filtered fetch of listings
All checks were successful
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 3m34s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m3s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 4m13s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 9m44s
All checks were successful
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 3m34s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m3s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 4m13s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 9m44s
This commit is contained in:
175
src/db.rs
175
src/db.rs
@@ -1,8 +1,9 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use rusqlite::Connection;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::path::Path;
|
||||
use tracing::info;
|
||||
use tracing::{error, info};
|
||||
|
||||
pub trait DBTable {
|
||||
const TABLE_NAME: &'static str;
|
||||
@@ -20,6 +21,16 @@ pub trait DBTable {
|
||||
conn.execute(create_table, ()).unwrap();
|
||||
}
|
||||
|
||||
fn get_count(conn: &Connection) -> i64 {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!("SELECT COUNT(*) FROM {}", Self::TABLE_NAME))
|
||||
.ok()
|
||||
.unwrap();
|
||||
stmt.query_one([], |r| r.get(0))
|
||||
.inspect_err(|e| error!("Failed to get count due to error\"{:?}\", returning 0", e))
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>>
|
||||
where
|
||||
Self: Sized;
|
||||
@@ -95,6 +106,38 @@ impl SearchURL {
|
||||
.flatten()
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn scan(conn: &Connection, downloads_dir: &Path, filename: &str) {
|
||||
// Grab all directories.
|
||||
let dirs = std::fs::read_dir(downloads_dir)
|
||||
.unwrap()
|
||||
.filter_map(|e| Some(e.ok()?.path()))
|
||||
.filter(|e| e.is_dir());
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct URLJSON {
|
||||
url: String,
|
||||
}
|
||||
|
||||
// Grab url JSON's.
|
||||
for dir in dirs {
|
||||
let url_fpath = dir.join(filename);
|
||||
if !url_fpath.exists() {
|
||||
info!("Skipping {:?} as file does not exist", url_fpath);
|
||||
continue;
|
||||
}
|
||||
|
||||
let url_contents = std::fs::read_to_string(&url_fpath)
|
||||
.inspect_err(|e| error!("Failed reading {}: {e}", url_fpath.display()))
|
||||
.unwrap();
|
||||
let su = SearchURL {
|
||||
full_url: serde_json::from_str::<URLJSON>(&url_contents).unwrap().url,
|
||||
name: dir.file_name().unwrap().to_str().unwrap().to_owned(),
|
||||
};
|
||||
info!("Adding {:?} to search urls table", su);
|
||||
su.add_or_update(&conn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||
@@ -213,7 +256,10 @@ impl DBTable for ParsedStorage {
|
||||
impl ParsedStorage {
|
||||
pub fn lookup(conn: &Connection, item: i64) -> Vec<ParsedStorage> {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!("SELECT * FROM {} WHERE id = ?", Self::TABLE_NAME))
|
||||
.prepare(&format!(
|
||||
"SELECT * FROM {} WHERE item = ?",
|
||||
Self::TABLE_NAME
|
||||
))
|
||||
.ok()
|
||||
.unwrap();
|
||||
stmt.query_map([item], |row| {
|
||||
@@ -414,24 +460,25 @@ impl Listing {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!(
|
||||
"
|
||||
SELECT *
|
||||
FROM {0}
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM {1}
|
||||
WHERE
|
||||
{1}.item = {0}.item_id AND
|
||||
{1}.timestamp >= ?1
|
||||
)
|
||||
LIMIT ?2
|
||||
",
|
||||
SELECT *
|
||||
FROM {0}
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM {1}
|
||||
WHERE
|
||||
{1}.item = {0}.item_id AND
|
||||
{1}.timestamp >= ?1
|
||||
)
|
||||
LIMIT {2}
|
||||
",
|
||||
Self::TABLE_NAME,
|
||||
ItemAppearances::TABLE_NAME
|
||||
ItemAppearances::TABLE_NAME,
|
||||
limit
|
||||
))
|
||||
.ok()
|
||||
.unwrap();
|
||||
|
||||
stmt.query_map([since.timestamp(), limit], |row| {
|
||||
stmt.query_map([since], |row| {
|
||||
Ok(Listing {
|
||||
id: row.get(0)?,
|
||||
item_id: row.get(1)?,
|
||||
@@ -496,6 +543,77 @@ impl Listing {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct ListingsFilterResult {
|
||||
listing: Listing,
|
||||
history: Vec<ItemAppearances>,
|
||||
parsed: Vec<ParsedStorage>,
|
||||
}
|
||||
|
||||
pub fn listings_get_filtered(
|
||||
conn: &Connection,
|
||||
since: &DateTime<Utc>,
|
||||
limit: i64,
|
||||
cents_per_tbytes_max: i64,
|
||||
) -> Vec<ListingsFilterResult> {
|
||||
// First grab all appearances seen since the timestamp including their
|
||||
// history and parsings.
|
||||
let listings_recent = Listing::lookup_since(conn, *since, 100_000)
|
||||
.into_iter()
|
||||
.map(|l| ListingsFilterResult {
|
||||
listing: l.clone(),
|
||||
history: ItemAppearances::lookup(conn, l.item_id),
|
||||
parsed: ParsedStorage::lookup(conn, l.item_id),
|
||||
})
|
||||
.filter(|lr| lr.parsed.iter().any(|p| !p.needed_description_check))
|
||||
.collect::<Vec<ListingsFilterResult>>();
|
||||
info!(
|
||||
"Found total {} listings since (str:{} epoch:{})",
|
||||
listings_recent.len(),
|
||||
*since,
|
||||
since.timestamp()
|
||||
);
|
||||
|
||||
// Then for each listing grab if within our price range.
|
||||
let listings: Vec<ListingsFilterResult> = listings_recent
|
||||
.into_iter()
|
||||
.filter_map(|l| {
|
||||
let mut history = l.history.clone();
|
||||
history.sort_by_key(|h| h.timestamp);
|
||||
// info!("item_id:{} history: {:?}", l.listing.item_id, history);
|
||||
let cents = history
|
||||
.last()
|
||||
.map(|h| h.current_bid_usd_cents)
|
||||
.unwrap_or(l.listing.buy_it_now_price_cents)?;
|
||||
// info!("item_id:{} cents: {:?}", l.listing.item_id, cents);
|
||||
let mut parses = l.parsed.clone();
|
||||
parses.sort_by_key(|p| p.parse_engine);
|
||||
// info!("item_id:{} parses: {:?}", l.listing.item_id, parses);
|
||||
let gb = parses.last()?.total_gigabytes;
|
||||
// info!("item_id:{} gb: {:?}", l.listing.item_id, gb);
|
||||
let usd_per_tb = (cents as f64 / 100.0) / (gb as f64 / 1024.0);
|
||||
// info!(
|
||||
// "item_id: {}, gb:{}, cents:{}, usd_per_tb:{}, cents_per_tbytes_max:{}",
|
||||
// l.listing.item_id, gb, cents, usd_per_tb, cents_per_tbytes_max
|
||||
// );
|
||||
if usd_per_tb >= (cents_per_tbytes_max as f64 / 100.0) {
|
||||
None
|
||||
} else {
|
||||
Some(l)
|
||||
}
|
||||
})
|
||||
.take(limit as usize)
|
||||
.collect();
|
||||
info!(
|
||||
"Found total {} listings since (str:{} epoch:{}) filtered by price",
|
||||
listings.len(),
|
||||
*since,
|
||||
since.timestamp()
|
||||
);
|
||||
|
||||
listings
|
||||
}
|
||||
|
||||
pub fn get_initialized(path: Option<&Path>) -> Connection {
|
||||
let conn = match path {
|
||||
Some(p) => Connection::open(&p),
|
||||
@@ -512,11 +630,30 @@ pub fn get_initialized(path: Option<&Path>) -> Connection {
|
||||
conn
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug)]
|
||||
pub struct Stats {
|
||||
rows_search_url: i64,
|
||||
rows_listing: i64,
|
||||
rows_parsed_storage: i64,
|
||||
rows_parsed_page: i64,
|
||||
rows_item_appearances: i64,
|
||||
}
|
||||
|
||||
pub fn get_stats(conn: &Connection) -> Stats {
|
||||
Stats {
|
||||
rows_search_url: SearchURL::get_count(conn),
|
||||
rows_listing: Listing::get_count(conn),
|
||||
rows_parsed_storage: ParsedStorage::get_count(conn),
|
||||
rows_parsed_page: ParsedPage::get_count(conn),
|
||||
rows_item_appearances: ItemAppearances::get_count(conn),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[test_log::test]
|
||||
fn sanity_check() {
|
||||
let db = get_initialized(None);
|
||||
|
||||
@@ -548,7 +685,7 @@ mod tests {
|
||||
needed_description_check: true,
|
||||
};
|
||||
parsed.add_or_update(&db);
|
||||
assert_eq!(ParsedStorage::lookup(&db, listing.id), vec![parsed]);
|
||||
assert_eq!(ParsedStorage::lookup(&db, listing.item_id), vec![parsed]);
|
||||
|
||||
let page = ParsedPage {
|
||||
category: "ssd".to_owned(),
|
||||
@@ -570,5 +707,9 @@ mod tests {
|
||||
);
|
||||
|
||||
assert_eq!(Listing::lookup_since(&db, page.timestamp, 3), vec![listing]);
|
||||
assert_eq!(
|
||||
Listing::lookup_since(&db, page.timestamp + chrono::Duration::seconds(1), 3),
|
||||
vec![]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user