use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data}; use chrono::DateTime; use clap::Parser; use ebay_scraper_rust::db::{ ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized, }; use ebay_scraper_rust::{parser_ebay, parser_storage}; use std::path::{Path, PathBuf}; use std::sync::Mutex; use tracing::info; mod xdg_dirs; #[derive(Parser, Debug)] #[clap( name = "ebay-scraper-rust", version = "0.1.0", about = "Scrapes eBay search results for homelab purposes" )] struct Args {} #[get("/page/{timestamp}")] async fn page_get( db: Data>, timestamp: web::Path, ) -> Result { Ok(web::Json(ParsedPage::lookup( &db.lock().unwrap(), chrono::DateTime::from_timestamp(*timestamp, 0).unwrap(), ))) } #[get("/listing/{id}/history")] async fn listing_history_get( db: Data>, id: web::Path, ) -> Result { let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id) .iter() .inspect(|e| info!("got: {:?}", e)) .filter_map(|e| Some((e.timestamp, e.current_bid_usd_cents?))) .collect(); Ok(web::Json(history)) } #[get("/listing/{id}")] async fn listing_get( db: Data>, id: web::Path, ) -> Result { Ok(web::Json(Listing::lookup(&db.lock().unwrap(), *id))) } #[get("/listing/since/{timestamp}/{limit}")] async fn listing_since_get( db: Data>, req: web::Path<(i64, i64)>, ) -> Result { Ok(web::Json(Listing::lookup_since( &db.lock().unwrap(), DateTime::from_timestamp(req.0, 0).unwrap(), req.1, ))) } #[post("listing/parse")] async fn parse_listings(db: Data>) -> Result { let mut cnt = 0; let db_unlocked = db.lock().unwrap(); Listing::lookup_non_parsed(&db_unlocked) .iter() .map(|l| parser_storage::parse_size_and_quantity(l.0, &l.1)) .inspect(|_| cnt = cnt + 1) .for_each(|ps| ps.add_or_update(&db_unlocked)); Ok(web::Json(cnt)) } #[get("listing/parse/{id}")] async fn listing_parse_get( db: Data>, id: web::Path, ) -> Result { Ok(web::Json(ParsedStorage::lookup(&db.lock().unwrap(), *id))) } pub fn timestamps_from_dir(path: &Path) -> Vec { if !std::fs::exists(path).expect("Directory must exist") { panic!( "Directory {:?} does not exist, cannot grab timestamps from there.", path ); } std::fs::read_dir(path) .unwrap() .inspect(|fpath| info!("Found {:?}", fpath)) .map(|fpath| fpath.unwrap().path()) .filter_map(|fstem| { fstem .file_stem() .and_then(|s| s.to_str()) .expect("Invalid file name") .parse() .ok() }) .collect() } #[post("page/parse/{category}")] async fn parse_post( db: Data>, downloaddir: Data, category: web::Path, ) -> Result { let dir = &downloaddir.join(category.clone()); // Ensure the category is created. let url: serde_json::Value = serde_json::from_str(&std::fs::read_to_string(dir.join("url.json")).unwrap()).unwrap(); let su = SearchURL { full_url: url.to_string(), name: category.to_string(), }; su.add_or_update(&db.lock().unwrap()); // Find all pages. let pages = timestamps_from_dir(dir); // See what pages haven't been seen before. let to_parse = pages.iter().filter(|t| { let ts = chrono::DateTime::from_timestamp(**t, 0).unwrap(); info!("Checking if page with a timestamp of {ts} and catagory of {category} exists"); let p = ParsedPage::lookup(&db.lock().unwrap(), ts); // Timestamp never seen before, lets pass it on. if p.is_none() { return true; } // Timestamp was seen before *and* from the same catagory, don't pass // it on. if p.unwrap().category == *category { return false; } return true; }); let mut added_count = 0; for p in to_parse { let ts = chrono::DateTime::from_timestamp(*p, 0).unwrap(); info!("Adding page with a timestamp of {ts} and catagory of {category} to db"); ParsedPage { timestamp: ts, category: category.to_string(), } .add_or_update(&db.lock().unwrap()); let elements = parser_ebay::extract_data_from_html( &std::fs::read_to_string(dir.join(format!("{ts}.html"))).unwrap(), &ts, &category, ) .unwrap(); added_count += elements.len(); for e in elements { e.0.add_or_update(&db.lock().unwrap()); e.1.add_or_update(&db.lock().unwrap()); info!("Inserting id:{}, title:{}", e.0.item_id, e.0.title); } } Ok(added_count.to_string()) } #[actix_web::main] async fn main() -> std::io::Result<()> { env_logger::init_from_env(env_logger::Env::new().default_filter_or("info")); let _ = Args::parse(); let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("ebay_scraper", None); info!( "Starting with scraped data dir of \"{}\".", scrapedatadir.to_str().unwrap() ); let db_mutex = Data::new(Mutex::new(get_initialized(None))); HttpServer::new(move || { App::new() .service(page_get) .service(listing_get) .service(listing_history_get) .service(listing_since_get) .service(parse_post) .service(parse_listings) .app_data(db_mutex.clone()) .app_data(Data::new(scrapedatadir.clone())) }) .bind(("127.0.0.1", 8080))? .run() .await }