use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data}; use chrono::{DateTime, Utc}; use clap::Parser; use ebay_scraper_rust::db::{ DBTable, ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized, get_stats, listings_get_filtered, }; use ebay_scraper_rust::parser::parse_dir; use ebay_scraper_rust::{parser_storage_e0, parser_storage_e1}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::sync::Mutex; use std::time::Instant; use tracing::{info, instrument}; use tracing_subscriber::filter::EnvFilter; use tracing_subscriber::fmt; use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; mod xdg_dirs; #[derive(Parser, Debug)] #[clap( name = "ebay-scraper-rust", version = "0.1.0", about = "Scrapes eBay search results for homelab purposes" )] struct Args {} #[derive(Deserialize, Debug)] struct ListingsFilter { since: Option, limit: Option, cents_per_tbytes_max: Option, } #[get("/listings")] async fn listings_filtered_get( db: Data>, filter: web::Query, ) -> Result { let start = Instant::now(); let res = listings_get_filtered( &db.lock().unwrap(), &DateTime::::from_timestamp(filter.since.unwrap_or(0), 0).unwrap(), filter.limit.unwrap_or(1_000), filter.cents_per_tbytes_max.unwrap_or(100_00), ); let elapsed = start.elapsed().as_micros() as f64 / 1000.0; info!( "Took {elapsed} milliseconds with {} listings found for a filter of {:?}", res.len(), filter ); Ok(web::Json(res)) } #[get("/listing/{id}")] async fn listing_get( db: Data>, id: web::Path, ) -> Result { Ok(web::Json(Listing::lookup(&db.lock().unwrap(), *id))) } #[get("/listing/{id}/parsed")] async fn listing_parse_get( db: Data>, id: web::Path, ) -> Result { Ok(web::Json(ParsedStorage::lookup(&db.lock().unwrap(), *id))) } #[derive(Serialize)] struct APIHistory { when: DateTime, current_bid_usd_cents: i64, } #[get("/listing/{id}/history")] async fn listing_history_get( db: Data>, id: web::Path, ) -> Result { let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id) .iter() // .inspect(|e| info!("got: {:?}", e)) .filter_map(|e| { Some(APIHistory { when: e.timestamp, current_bid_usd_cents: e.current_bid_usd_cents?, }) }) .collect(); Ok(web::Json(history)) } #[post("/listing/parse")] async fn parse_listings(db: Data>) -> Result { let mut cnt = 0; let db_unlocked = db.lock().unwrap(); Listing::lookup_non_parsed(&db_unlocked) .iter() .map(|l| parser_storage_e0::parse_size_and_quantity(l.0, &l.1)) .inspect(|_| cnt = cnt + 1) .for_each(|ps| ps.add_or_update(&db_unlocked)); Ok(web::Json(cnt)) } #[get("/category")] async fn category_getnames(db: Data>) -> Result { Ok(web::Json(SearchURL::names(&db.lock().unwrap()))) } #[post("/category/{category}/parse")] #[instrument(skip_all)] async fn category_parse( db: Data>, downloaddir: Data, category: web::Path, ) -> Result { let start = Instant::now(); let count = parse_dir( &downloaddir.join(category.clone()), &category, &db.lock().unwrap(), ) .unwrap(); let elapsed = start.elapsed().as_micros() as f64 / 1000.0; info!("Added {count} listings, took {elapsed} ms."); Ok(count.to_string()) } #[get("/stats")] async fn stats_get(db: Data>) -> Result { Ok(web::Json(get_stats(&db.lock().unwrap()))) } #[get("/admin")] async fn admin_get(db: Data>) -> Result { let db = db.lock().unwrap(); let query_start_time = Instant::now(); let search_urls = SearchURL::get_all(&db).unwrap_or_default(); let parsed_pages = ParsedPage::get_all(&db).unwrap_or_default(); let parsed_storages = ParsedStorage::get_all(&db).unwrap_or_default(); let item_appearances = ItemAppearances::get_all(&db).unwrap_or_default(); let listings = Listing::get_all(&db).unwrap_or_default(); let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0; let html_gen_start_time = Instant::now(); let mut html = String::new(); html.push_str( r#" Database Dump

Database Dump

"#, ); html.push_str(&generate_table("SearchURLs", &search_urls)); html.push_str(&generate_table("Pages_Parsed", &parsed_pages)); html.push_str(&generate_table("Storage_Parsed", &parsed_storages)); html.push_str(&generate_table("Item_Appearances", &item_appearances)); html.push_str(&generate_table("Listings", &listings)); // Performance Metrics let html_gen_time = html_gen_start_time.elapsed().as_micros() as f64 / 1000.0; html.push_str(&format!( r#"

Database query time: {}ms
HTML generation time: {}ms

"#, total_query_time, html_gen_time )); info!("DB Query ms: {total_query_time}, HTML Generation ms:{html_gen_time}"); // Footer and Scripts html.push_str( r#" "#, ); Ok(web::Html::new(&html)) } fn generate_table(title: &str, data: &[T]) -> String { use serde_json::Value; if data.is_empty() { return format!( "

{} (0 rows)

", title ); } let mut headers: Vec = serde_json::to_value(&data[0]) .unwrap_or(Value::Null) .as_object() .map_or(Vec::new(), |obj| obj.keys().cloned().collect()); // Define the desired order for specific columns. let desired_order = ["id", "item", "item_id", "timestamp"]; // Sort the headers. Columns in `desired_order` come first, // in that order. The rest are sorted alphabetically. headers.sort_by(|a, b| { let a_pos = desired_order .iter() .position(|&p| p == a) .unwrap_or(usize::MAX); let b_pos = desired_order .iter() .position(|&p| p == b) .unwrap_or(usize::MAX); a_pos.cmp(&b_pos).then_with(|| a.cmp(b)) }); // Create the HTML for the table header row. let header_html = headers .iter() .map(|header| format!("{}", header)) .collect::(); // Create the HTML for all the table body rows. let body_html = data .iter() .map(|item| { let item_json = serde_json::to_value(item).unwrap_or(Value::Null); let obj = item_json.as_object(); // Create all cells for a single row. let cells_html = headers .iter() .map(|header| { let value = obj.and_then(|o| o.get(header)).unwrap_or(&Value::Null); // Remove quotes from the resulting JSON string value for cleaner output. format!("{}", value.to_string().replace('"', "")) }) .collect::(); format!("{}", cells_html) }) .collect::(); // Assemble the final table HTML. format!( "

{} ({} rows)

{}{}
", title, data.len(), header_html, body_html ) } #[actix_web::main] async fn main() -> std::io::Result<()> { tracing_subscriber::registry() .with(fmt::layer()) .with(EnvFilter::from_default_env()) .init(); let _ = Args::parse(); let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("scraper", None); info!( "Starting with scraped data dir of \"{}\".", scrapedatadir.to_str().unwrap() ); let db_mutex = Data::new(Mutex::new(get_initialized(None))); // Prepare our backend via pulling in what catagories we are preconfigured with. SearchURL::scan(&db_mutex.lock().unwrap(), &scrapedatadir, "url.json"); HttpServer::new(move || { App::new() // Listing handlers .service(listing_get) .service(listings_filtered_get) .service(listing_history_get) // Category handlers .service(parse_listings) .service(category_parse) .service(category_getnames) // Gnarly info dump .service(admin_get) .service(stats_get) // Stuff which is passed into every request. .app_data(db_mutex.clone()) .app_data(Data::new(scrapedatadir.clone())) }) .bind(("0.0.0.0", 9876))? .run() .await }