diff --git a/src/db.rs b/src/db.rs index 91b41fd..f1ee7ee 100644 --- a/src/db.rs +++ b/src/db.rs @@ -93,7 +93,7 @@ impl DBTable for ParsedPage { "; } impl ParsedPage { - pub fn lookup_db(conn: &Connection, timestamp: DateTime) -> Option { + pub fn lookup(conn: &Connection, timestamp: DateTime) -> Option { let mut stmt = conn .prepare(&format!( "SELECT * FROM {} WHERE timestamp = ?", @@ -110,7 +110,7 @@ impl ParsedPage { .ok() } - pub fn add_or_update_db(&self, conn: &Connection) { + pub fn add_or_update(&self, conn: &Connection) { let _ = conn .execute( &format!( @@ -148,7 +148,7 @@ impl DBTable for ParsedStorage { "; } impl ParsedStorage { - pub fn lookup_db(conn: &Connection, item: i64) -> Vec { + pub fn lookup(conn: &Connection, item: i64) -> Vec { let mut stmt = conn .prepare(&format!("SELECT * FROM {} WHERE id = ?", Self::TABLE_NAME)) .ok() @@ -173,7 +173,7 @@ impl ParsedStorage { .collect() } - pub fn add_or_update_db(&self, conn: &Connection) { + pub fn add_or_update(&self, conn: &Connection) { let _ = conn.execute(&format!(" INSERT OR REPLACE INTO {} (item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check) @@ -440,18 +440,15 @@ mod tests { parse_engine: 9, needed_description_check: true, }; - parsed.add_or_update_db(&db); - assert_eq!(ParsedStorage::lookup_db(&db, listing.id), vec![parsed]); + parsed.add_or_update(&db); + assert_eq!(ParsedStorage::lookup(&db, listing.id), vec![parsed]); let page = ParsedPage { category: "ssd".to_owned(), timestamp: std::time::SystemTime::now().into(), }; - page.add_or_update_db(&db); - assert_eq!( - ParsedPage::lookup_db(&db, page.timestamp), - Some(page.clone()) - ); + page.add_or_update(&db); + assert_eq!(ParsedPage::lookup(&db, page.timestamp), Some(page.clone())); let apperance = ItemAppearances { item: listing.item_id, diff --git a/src/main.rs b/src/main.rs index 25fddff..f4223a8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,7 +24,7 @@ async fn page_get( db: Data>, timestamp: web::Path, ) -> Result { - Ok(web::Json(ParsedPage::lookup_db( + Ok(web::Json(ParsedPage::lookup( &db.lock().unwrap(), chrono::DateTime::from_timestamp(*timestamp, 0).unwrap(), ))) @@ -71,7 +71,7 @@ async fn parse_listings(db: Data>) -> Result>, id: web::Path, ) -> Result { - Ok(web::Json(ParsedStorage::lookup_db( - &db.lock().unwrap(), - *id, - ))) + Ok(web::Json(ParsedStorage::lookup(&db.lock().unwrap(), *id))) } pub fn timestamps_from_dir(path: &Path) -> Vec { @@ -121,63 +118,60 @@ async fn parse_post( // Ensure the category is created. let url: serde_json::Value = serde_json::from_str(&std::fs::read_to_string(dir.join("url.json")).unwrap()).unwrap(); - info!("{:?}", url); let su = SearchURL { full_url: url.to_string(), name: category.to_string(), }; - info!("{:?}", su); su.add_or_update(&db.lock().unwrap()); - let added: u64 = timestamps_from_dir(dir) - .iter() - .filter(|t| { - info!("Checking for the existance of page {t}"); - let p = ParsedPage::lookup_db( - &db.lock().unwrap(), - chrono::DateTime::from_timestamp(**t, 0).unwrap(), - ); + // Find all pages. + let pages = timestamps_from_dir(dir); - // Timestamp never seen before, lets pass it on. - if p.is_none() { - return true; - } + // See what pages haven't been seen before. + let to_parse = pages.iter().filter(|t| { + let ts = chrono::DateTime::from_timestamp(**t, 0).unwrap(); + info!("Checking if page with a timestamp of {ts} and catagory of {category} exists"); + let p = ParsedPage::lookup(&db.lock().unwrap(), ts); - // Timestamp was seen before *and* from the same catagory, don't pass - // it on. - if p.unwrap().category == *category { - return false; - } + // Timestamp never seen before, lets pass it on. + if p.is_none() { return true; - }) - .map(|t| { - let timestamp = chrono::DateTime::from_timestamp(*t, 0).unwrap(); - info!("Adding or updating db with timestamp:{timestamp} catagory:{category}"); - ParsedPage { - timestamp: timestamp, - category: category.to_string(), - } - .add_or_update_db(&db.lock().unwrap()); + } - let mut cnt = 0; - parser_ebay::extract_data_from_html( - &std::fs::read_to_string(dir.join(format!("{t}.html"))).unwrap(), - ×tamp, - &category, - ) - .unwrap() - .iter() - .for_each(|lp| { - cnt = cnt + 1; - lp.0.add_or_update(&db.lock().unwrap()); - lp.1.add_or_update(&db.lock().unwrap()); - info!("Inserting id:{}, title:{}", lp.0.item_id, lp.0.title); - }); - cnt - }) - .sum(); + // Timestamp was seen before *and* from the same catagory, don't pass + // it on. + if p.unwrap().category == *category { + return false; + } + return true; + }); - Ok(added.to_string()) + let mut added_count = 0; + for p in to_parse { + let ts = chrono::DateTime::from_timestamp(*p, 0).unwrap(); + info!("Adding page with a timestamp of {ts} and catagory of {category} to db"); + ParsedPage { + timestamp: ts, + category: category.to_string(), + } + .add_or_update(&db.lock().unwrap()); + + let elements = parser_ebay::extract_data_from_html( + &std::fs::read_to_string(dir.join(format!("{ts}.html"))).unwrap(), + &ts, + &category, + ) + .unwrap(); + + added_count += elements.len(); + for e in elements { + e.0.add_or_update(&db.lock().unwrap()); + e.1.add_or_update(&db.lock().unwrap()); + info!("Inserting id:{}, title:{}", e.0.item_id, e.0.title); + } + } + + Ok(added_count.to_string()) } #[actix_web::main]