From bbca1f3bcb2f533fce8cfb074871485e38865c4a Mon Sep 17 00:00:00 2001 From: hak8or Date: Sat, 28 Jun 2025 15:49:26 -0400 Subject: [PATCH] Add an admin based table dump and change buy_it_now_price to cents --- src/db.rs | 119 +++++++++++++- src/main.rs | 153 ++++++++++++++++-- src/parser_ebay.rs | 8 +- .../raw_scraped/minipc/1750637320.html | 0 .../raw_scraped/minipc/1750637335.html | 59 ------- .../raw_scraped/ssd/1750637297.html | 63 -------- .../raw_scraped/ssd/1750637334.html | 63 -------- .../raw_scraped/minipc/url.json | 0 .../raw_scraped/ssd/1750369463.html | 0 .../raw_scraped/ssd/url.json | 0 10 files changed, 256 insertions(+), 209 deletions(-) delete mode 100644 test_data/ebay_scraper/raw_scraped/minipc/1750637320.html delete mode 100644 test_data/ebay_scraper/raw_scraped/minipc/1750637335.html delete mode 100644 test_data/ebay_scraper/raw_scraped/ssd/1750637297.html delete mode 100644 test_data/ebay_scraper/raw_scraped/ssd/1750637334.html rename test_data/{ebay_scraper => scraper}/raw_scraped/minipc/url.json (100%) rename test_data/{ebay_scraper => scraper}/raw_scraped/ssd/1750369463.html (100%) rename test_data/{ebay_scraper => scraper}/raw_scraped/ssd/url.json (100%) diff --git a/src/db.rs b/src/db.rs index f1ee7ee..8d035c2 100644 --- a/src/db.rs +++ b/src/db.rs @@ -4,7 +4,7 @@ use serde::Serialize; use std::path::Path; use tracing::info; -trait DBTable { +pub trait DBTable { const TABLE_NAME: &'static str; const TABLE_SCHEMA: &'static str; fn initialize(conn: &Connection) { @@ -19,6 +19,10 @@ trait DBTable { info!("{} ({})", Self::TABLE_NAME, Self::TABLE_SCHEMA); conn.execute(create_table, ()).unwrap(); } + + fn get_all(conn: &Connection) -> rusqlite::Result> + where + Self: Sized; } #[derive(Serialize, Debug, PartialEq, Clone)] @@ -32,6 +36,22 @@ impl DBTable for SearchURL { id INTEGER PRIMARY KEY, url TEXT NOT NULL UNIQUE, name TEXT NOT NULL UNIQUE"; + + fn get_all(conn: &Connection) -> rusqlite::Result> { + let mut stmt = conn.prepare(&format!("SELECT url, name FROM {}", Self::TABLE_NAME))?; + let iter = stmt.query_map([], |row| { + Ok(SearchURL { + full_url: row.get(0)?, + name: row.get(1)?, + }) + })?; + + let mut result = Vec::new(); + for item in iter { + result.push(item?); + } + Ok(result) + } } impl SearchURL { pub fn lookup(conn: &Connection, name: &str) -> Option { @@ -91,6 +111,25 @@ impl DBTable for ParsedPage { UNIQUE(category, timestamp) FOREIGN KEY(category) REFERENCES SearchURLs(name) "; + + fn get_all(conn: &Connection) -> rusqlite::Result> { + let mut stmt = conn.prepare(&format!( + "SELECT category, timestamp FROM {}", + Self::TABLE_NAME + ))?; + let iter = stmt.query_map([], |row| { + Ok(ParsedPage { + category: row.get(0)?, + timestamp: row.get(1)?, + }) + })?; + + let mut result = Vec::new(); + for item in iter { + result.push(item?); + } + Ok(result) + } } impl ParsedPage { pub fn lookup(conn: &Connection, timestamp: DateTime) -> Option { @@ -146,6 +185,30 @@ impl DBTable for ParsedStorage { UNIQUE(item, parse_engine) FOREIGN KEY(item) REFERENCES Listings(item_id) "; + + fn get_all(conn: &Connection) -> rusqlite::Result> { + let mut stmt = conn.prepare(&format!("SELECT id, item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check FROM {}", Self::TABLE_NAME))?; + let iter = stmt.query_map([], |row| { + Ok(ParsedStorage { + id: row.get(0)?, + item: row.get(1)?, + total_gigabytes: row.get(2)?, + quantity: row.get(3)?, + individual_size_gigabytes: { + let r: String = row.get(4)?; + r.parse().unwrap_or(0) + }, + parse_engine: row.get(5)?, + needed_description_check: row.get(6)?, + }) + })?; + + let mut result = Vec::new(); + for item in iter { + result.push(item?); + } + Ok(result) + } } impl ParsedStorage { pub fn lookup(conn: &Connection, item: i64) -> Vec { @@ -211,6 +274,27 @@ impl DBTable for ItemAppearances { FOREIGN KEY(item) REFERENCES Listings(item_id), FOREIGN KEY(category, timestamp) REFERENCES Pages_Parsed(category, timestamp) "; + + fn get_all(conn: &Connection) -> rusqlite::Result> { + let mut stmt = conn.prepare(&format!( + "SELECT item, category, timestamp, current_bid_usd_cents FROM {}", + Self::TABLE_NAME + ))?; + let iter = stmt.query_map([], |row| { + Ok(ItemAppearances { + item: row.get(0)?, + category: row.get(1)?, + timestamp: row.get(2)?, + current_bid_usd_cents: row.get(3)?, + }) + })?; + + let mut result = Vec::new(); + for item in iter { + result.push(item?); + } + Ok(result) + } } impl ItemAppearances { pub fn add_or_update(&self, conn: &Connection) { @@ -267,7 +351,7 @@ pub struct Listing { pub id: i64, pub item_id: i64, pub title: String, - pub buy_it_now_price: Option, + pub buy_it_now_price_cents: Option, pub has_best_offer: bool, pub image_url: String, } @@ -281,6 +365,29 @@ impl DBTable for Listing { has_best_offer INTEGER NOT NULL, image_url TEXT NOT NULL "; + + fn get_all(conn: &Connection) -> rusqlite::Result> { + let mut stmt = conn.prepare(&format!( + "SELECT id, item_id, title, buy_it_now_usd_cents, has_best_offer, image_url FROM {}", + Self::TABLE_NAME + ))?; + let iter = stmt.query_map([], |row| { + Ok(Listing { + id: row.get(0)?, + item_id: row.get(1)?, + title: row.get(2)?, + buy_it_now_price_cents: row.get(3)?, + has_best_offer: row.get(4)?, + image_url: row.get(5)?, + }) + })?; + + let mut result = Vec::new(); + for item in iter { + result.push(item?); + } + Ok(result) + } } impl Listing { pub fn lookup(conn: &Connection, item_id: i64) -> Option { @@ -295,7 +402,7 @@ impl Listing { id: row.get(0)?, item_id: row.get(1)?, title: row.get(2)?, - buy_it_now_price: row.get(3)?, + buy_it_now_price_cents: row.get(3)?, has_best_offer: row.get(4)?, image_url: row.get(5)?, }) @@ -329,7 +436,7 @@ impl Listing { id: row.get(0)?, item_id: row.get(1)?, title: row.get(2)?, - buy_it_now_price: row.get(3)?, + buy_it_now_price_cents: row.get(3)?, has_best_offer: row.get(4)?, image_url: row.get(5)?, }) @@ -377,7 +484,7 @@ impl Listing { ( self.item_id, &self.title, - self.buy_it_now_price, + self.buy_it_now_price_cents, self.has_best_offer, self.image_url.clone(), ), @@ -424,7 +531,7 @@ mod tests { id: 1, item_id: 1234, title: "Some Title".to_string(), - buy_it_now_price: Some(1.23), + buy_it_now_price_cents: Some(123), has_best_offer: false, image_url: "google.com".to_string(), }; diff --git a/src/main.rs b/src/main.rs index 4f43aca..156bed3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,9 +2,10 @@ use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data}; use chrono::DateTime; use clap::Parser; use ebay_scraper_rust::db::{ - ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized, + DBTable, ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized, }; use ebay_scraper_rust::{parser_ebay, parser_storage}; +use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; use std::sync::Mutex; use std::time::Instant; @@ -122,10 +123,15 @@ async fn parse_post( let dir = &downloaddir.join(category.clone()); // Ensure the category is created. - let url: serde_json::Value = - serde_json::from_str(&std::fs::read_to_string(dir.join("url.json")).unwrap()).unwrap(); + let url_fpath = dir.join("url.json"); + let url_contents = std::fs::read_to_string(&url_fpath) + .inspect_err(|e| error!("Failed reading {}: {e}", url_fpath.display()))?; + #[derive(Deserialize)] + struct URLJSON { + url: String, + } let su = SearchURL { - full_url: url.to_string(), + full_url: serde_json::from_str::(&url_contents).unwrap().url, name: category.to_string(), }; su.add_or_update(&db.lock().unwrap()); @@ -136,7 +142,6 @@ async fn parse_post( // See what pages haven't been seen before. let to_parse = pages.iter().filter(|t| { let ts = chrono::DateTime::from_timestamp(**t, 0).unwrap(); - info!("Checking if page with a timestamp of {ts} and catagory of {category} exists"); let p = ParsedPage::lookup(&db.lock().unwrap(), ts); // Timestamp never seen before, lets pass it on. @@ -174,18 +179,26 @@ async fn parse_post( } .add_or_update(&db.lock().unwrap()); - let elements = parser_ebay::extract_data_from_html( - &std::fs::read_to_string(dir.join(format!("{ts}.html"))).unwrap(), - &ts, - &category, - ) - .unwrap(); + let page_path = dir.join(format!("{}.html", ts.timestamp())); + let page_contents = std::fs::read_to_string(&page_path) + .inspect_err(|e| error!("Failed reading {}, error:{e}", page_path.display()))?; + let elements = parser_ebay::extract_data_from_html(&page_contents, &ts, &category).unwrap(); + info!( + "Page {} contains {} elements", + ts.timestamp(), + elements.len() + ); added_count += elements.len(); for e in elements { e.0.add_or_update(&db.lock().unwrap()); e.1.add_or_update(&db.lock().unwrap()); - info!("Inserting id:{}, title:{}", e.0.item_id, e.0.title); + info!( + "From page {}, inserting id:{}, title:{}", + ts.timestamp(), + e.0.item_id, + e.0.title + ); } } @@ -193,6 +206,117 @@ async fn parse_post( Ok(added_count.to_string()) } +#[get("admin")] +async fn admin_get(db: Data>) -> Result { + let db = db.lock().unwrap(); + let query_start_time = Instant::now(); + let search_urls = SearchURL::get_all(&db).unwrap_or_default(); + let parsed_pages = ParsedPage::get_all(&db).unwrap_or_default(); + let parsed_storages = ParsedStorage::get_all(&db).unwrap_or_default(); + let item_appearances = ItemAppearances::get_all(&db).unwrap_or_default(); + let listings = Listing::get_all(&db).unwrap_or_default(); + let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0; + + let html_gen_start_time = Instant::now(); + let mut html = String::new(); + html.push_str( + r#" + + + + + Database Dump + + + + + + +

Database Dump

+"#, + ); + + // Performance Metrics + let html_gen_time = html_gen_start_time.elapsed().as_micros() as f64 / 1000.0; + html.push_str(&format!( + r#"

+ Database query time: {}ms
+ HTML generation time: {}ms +

"#, + total_query_time, html_gen_time + )); + info!("DB Query ms: {total_query_time}, HTML Generation ms:{html_gen_time}"); + + // --- Tables --- + + // SearchURLs + html.push_str(&generate_table("SearchURLs", &search_urls)); + + // ParsedPages + html.push_str(&generate_table("Pages_Parsed", &parsed_pages)); + + // ParsedStorage + html.push_str(&generate_table("Storage_Parsed", &parsed_storages)); + + // ItemAppearances + html.push_str(&generate_table("Item_Appearances", &item_appearances)); + + // Listings + html.push_str(&generate_table("Listings", &listings)); + + // Footer and Scripts + html.push_str( + r#" + + +"#, + ); + Ok(web::Html::new(&html)) +} + +fn generate_table(title: &str, data: &[T]) -> String { + let mut table_html = format!( + "

{} ({} rows)

", + title, + data.len() + ); + + if data.len() > 0 { + for header in serde_json::to_value(&data[0]) + .unwrap() + .as_object() + .unwrap() + .keys() + { + table_html.push_str(&format!("", header)); + } + table_html.push_str(""); + + for item in data { + table_html.push_str(""); + let item_json = serde_json::to_value(item).unwrap(); + if let Some(obj) = item_json.as_object() { + for (_key, value) in obj.iter() { + table_html + .push_str(&format!("", value.to_string().replace("\"", ""))); + } + } + table_html.push_str(""); + } + } + + table_html.push_str("
{}
{}
"); + table_html +} + #[actix_web::main] async fn main() -> std::io::Result<()> { tracing_subscriber::registry() @@ -201,7 +325,7 @@ async fn main() -> std::io::Result<()> { .init(); let _ = Args::parse(); - let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("ebay_scraper", None); + let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("scraper", None); info!( "Starting with scraped data dir of \"{}\".", scrapedatadir.to_str().unwrap() @@ -216,10 +340,11 @@ async fn main() -> std::io::Result<()> { .service(listing_since_get) .service(parse_post) .service(parse_listings) + .service(admin_get) .app_data(db_mutex.clone()) .app_data(Data::new(scrapedatadir.clone())) }) - .bind(("127.0.0.1", 8080))? + .bind(("0.0.0.0", 9876))? .run() .await } diff --git a/src/parser_ebay.rs b/src/parser_ebay.rs index fa7241b..26b78b5 100644 --- a/src/parser_ebay.rs +++ b/src/parser_ebay.rs @@ -154,7 +154,7 @@ pub fn extract_data_from_html( title, id: 0, item_id: id?, - buy_it_now_price: final_buy_it_now_price, + buy_it_now_price_cents: final_buy_it_now_price.map(|b| (b * 100.0).round() as i64), has_best_offer, image_url, }, @@ -177,7 +177,7 @@ mod tests { #[test_log::test] fn parse() { let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap(); - let html = include_str!("../test_data/ebay_scraper/raw_scraped/ssd/1750369463.html"); + let html = include_str!("../test_data/scraper/raw_scraped/ssd/1750369463.html"); let parsed = extract_data_from_html(html, ×tamp, "ssd").unwrap(); // assert_eq!(parsed.len(), 62); @@ -189,7 +189,7 @@ mod tests { id: 0, item_id: 388484391867, title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(), - buy_it_now_price: Some(59.99), + buy_it_now_price_cents: Some(5999), has_best_offer: true, image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp" .to_string() @@ -210,7 +210,7 @@ mod tests { title: "Fanxiang M.2 SSD 1TB NVMe PCIe Gen 3x 4 M2 Internal Solid State Drive 3500MB/s" .to_string(), - buy_it_now_price: None, + buy_it_now_price_cents: None, has_best_offer: true, image_url: "https://i.ebayimg.com/images/g/3NoAAeSwPrtoDb1O/s-l500.webp" .to_string() diff --git a/test_data/ebay_scraper/raw_scraped/minipc/1750637320.html b/test_data/ebay_scraper/raw_scraped/minipc/1750637320.html deleted file mode 100644 index e69de29..0000000 diff --git a/test_data/ebay_scraper/raw_scraped/minipc/1750637335.html b/test_data/ebay_scraper/raw_scraped/minipc/1750637335.html deleted file mode 100644 index 7abd0b5..0000000 --- a/test_data/ebay_scraper/raw_scraped/minipc/1750637335.html +++ /dev/null @@ -1,59 +0,0 @@ ---2025-06-22 20:08:55-- https://www.ebay.com/sch/i.html?&_nkw=&_sacat=179&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240 -Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt' -Resolving www.ebay.com (www.ebay.com)... 23.56.163.160 -Connecting to www.ebay.com (www.ebay.com)|23.56.163.160|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: unspecified [text/html] -Saving to: ‘i.html?&_nkw=&_sacat=179&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240’ - - 0K .......... .......... .......... .......... .......... 6.28M - 50K .......... .......... .......... .......... .......... 76.1K - 100K .......... .......... .......... .......... .......... 18.6M - 150K .......... .......... .......... .......... .......... 12.7M - 200K .......... .......... .......... .......... .......... 34.4M - 250K .......... .......... .......... .......... .......... 25.0M - 300K .......... .......... .......... .......... .......... 41.3M - 350K .......... .......... .......... .......... .......... 114M - 400K .......... .......... .......... .......... .......... 73.4M - 450K .......... .......... .......... .......... .......... 33.5M - 500K .......... .......... .......... .......... .......... 50.2M - 550K .......... .......... .......... .......... .......... 76.2M - 600K .......... .......... .......... .......... .......... 109M - 650K .......... .......... .......... .......... .......... 61.5M - 700K .......... .......... .......... .......... .......... 81.1M - 750K .......... .......... .......... .......... .......... 337M - 800K .......... .......... .......... .......... .......... 118M - 850K .......... .......... .......... .......... .......... 85.5M - 900K .......... .......... .......... .......... .......... 92.6M - 950K .......... .......... .......... .......... .......... 96.7M - 1000K .......... .......... .......... .......... .......... 84.6M - 1050K .......... .......... .......... .......... .......... 500M - 1100K .......... .......... .......... .......... .......... 109M - 1150K .......... .......... .......... .......... .......... 83.5M - 1200K .......... .......... .......... .......... .......... 160M - 1250K .......... .......... .......... .......... .......... 141M - 1300K .......... .......... .......... .......... .......... 41.7M - 1350K .......... .......... .......... .......... .......... 96.4M - 1400K .......... .......... .......... .......... .......... 2.47M - 1450K .......... .......... .......... .......... .......... 36.6M - 1500K .......... .......... .......... .......... .......... 83.5M - 1550K .......... .......... .......... .......... .......... 71.7M - 1600K .......... .......... .......... .......... .......... 37.7M - 1650K .......... .......... .......... .......... .......... 104M - 1700K .......... .......... .......... .......... .......... 73.7M - 1750K .......... .......... .......... .......... .......... 115M - 1800K .......... .......... .......... .......... .......... 85.3M - 1850K .......... .......... .......... .......... .......... 140M - 1900K .......... .......... .......... .......... .......... 71.1M - 1950K .......... .......... .......... .......... .......... 112M - 2000K .......... .......... .......... .......... .......... 75.4M - 2050K .......... .......... .......... .......... .......... 120M - 2100K .......... .......... .......... .......... .......... 112M - 2150K .......... .......... .......... .......... .......... 117M - 2200K .......... .......... .......... .......... .......... 108M - 2250K .......... .......... .......... .......... .......... 97.1M - 2300K .......... .......... .......... .......... .......... 31.8M - 2350K ...... 11.4T=0.7s - -2025-06-22 20:08:56 (3.20 MB/s) - ‘i.html?&_nkw=&_sacat=179&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240’ saved [2412662] - diff --git a/test_data/ebay_scraper/raw_scraped/ssd/1750637297.html b/test_data/ebay_scraper/raw_scraped/ssd/1750637297.html deleted file mode 100644 index 59065fd..0000000 --- a/test_data/ebay_scraper/raw_scraped/ssd/1750637297.html +++ /dev/null @@ -1,63 +0,0 @@ ---2025-06-22 20:08:17-- https://www.ebay.com/sch/i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240 -Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt' -Resolving www.ebay.com (www.ebay.com)... 23.56.163.160 -Connecting to www.ebay.com (www.ebay.com)|23.56.163.160|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: unspecified [text/html] -Saving to: ‘i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240’ - - 0K .......... .......... .......... .......... .......... 8.04M - 50K .......... .......... .......... .......... .......... 83.3K - 100K .......... .......... .......... .......... .......... 1.38M - 150K .......... .......... .......... .......... .......... 7.12M - 200K .......... .......... .......... .......... .......... 18.8M - 250K .......... .......... .......... .......... .......... 18.0M - 300K .......... .......... .......... .......... .......... 19.4M - 350K .......... .......... .......... .......... .......... 48.4M - 400K .......... .......... .......... .......... .......... 45.9M - 450K .......... .......... .......... .......... .......... 50.4M - 500K .......... .......... .......... .......... .......... 50.1M - 550K .......... .......... .......... .......... .......... 119M - 600K .......... .......... .......... .......... .......... 45.4M - 650K .......... .......... .......... .......... .......... 44.1M - 700K .......... .......... .......... .......... .......... 59.1M - 750K .......... .......... .......... .......... .......... 84.0M - 800K .......... .......... .......... .......... .......... 167M - 850K .......... .......... .......... .......... .......... 76.6M - 900K .......... .......... .......... .......... .......... 59.4M - 950K .......... .......... .......... .......... .......... 60.3M - 1000K .......... .......... .......... .......... .......... 113M - 1050K .......... .......... .......... .......... .......... 592M - 1100K .......... .......... .......... .......... .......... 53.9M - 1150K .......... .......... .......... .......... .......... 101M - 1200K .......... .......... .......... .......... .......... 91.9M - 1250K .......... .......... .......... .......... .......... 108M - 1300K .......... .......... .......... .......... .......... 85.2M - 1350K .......... .......... .......... .......... .......... 96.9M - 1400K .......... .......... .......... .......... .......... 93.5M - 1450K .......... .......... .......... .......... .......... 51.2M - 1500K .......... .......... .......... .......... .......... 69.9M - 1550K .......... .......... .......... .......... .......... 654M - 1600K .......... .......... .......... .......... .......... 185M - 1650K .......... .......... .......... .......... .......... 9.94M - 1700K .......... .......... .......... .......... .......... 27.5M - 1750K .......... .......... .......... .......... .......... 613M - 1800K .......... .......... .......... .......... .......... 659M - 1850K .......... .......... .......... .......... .......... 21.3M - 1900K .......... .......... .......... .......... .......... 107M - 1950K .......... .......... .......... .......... .......... 158M - 2000K .......... .......... .......... .......... .......... 37.8M - 2050K .......... .......... .......... .......... .......... 85.2M - 2100K .......... .......... .......... .......... .......... 26.0M - 2150K .......... .......... .......... .......... .......... 57.1M - 2200K .......... .......... .......... .......... .......... 114M - 2250K .......... .......... .......... .......... .......... 117M - 2300K .......... .......... .......... .......... .......... 57.9M - 2350K .......... .......... .......... .......... .......... 127M - 2400K .......... .......... .......... .......... .......... 118M - 2450K .......... .......... .......... .......... .......... 62.1M - 2500K .......... .......... .......... .......... .......... 157M - 2550K 723G=0.7s - -2025-06-22 20:08:18 (3.60 MB/s) - ‘i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240’ saved [2611588] - diff --git a/test_data/ebay_scraper/raw_scraped/ssd/1750637334.html b/test_data/ebay_scraper/raw_scraped/ssd/1750637334.html deleted file mode 100644 index 4f3f257..0000000 --- a/test_data/ebay_scraper/raw_scraped/ssd/1750637334.html +++ /dev/null @@ -1,63 +0,0 @@ ---2025-06-22 20:08:54-- https://www.ebay.com/sch/i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240 -Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt' -Resolving www.ebay.com (www.ebay.com)... 23.56.163.160 -Connecting to www.ebay.com (www.ebay.com)|23.56.163.160|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: unspecified [text/html] -Saving to: ‘i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240.2’ - - 0K .......... .......... .......... .......... .......... 4.98M - 50K .......... .......... .......... .......... .......... 75.7K - 100K .......... .......... .......... .......... .......... 4.26M - 150K .......... .......... .......... .......... .......... 10.1M - 200K .......... .......... .......... .......... .......... 25.3M - 250K .......... .......... .......... .......... .......... 27.1M - 300K .......... .......... .......... .......... .......... 37.0M - 350K .......... .......... .......... .......... .......... 31.8M - 400K .......... .......... .......... .......... .......... 58.2M - 450K .......... .......... .......... .......... .......... 44.6M - 500K .......... .......... .......... .......... .......... 40.7M - 550K .......... .......... .......... .......... .......... 48.7M - 600K .......... .......... .......... .......... .......... 719M - 650K .......... .......... .......... .......... .......... 62.0M - 700K .......... .......... .......... .......... .......... 61.0M - 750K .......... .......... .......... .......... .......... 144M - 800K .......... .......... .......... .......... .......... 270M - 850K .......... .......... .......... .......... .......... 36.5M - 900K .......... .......... .......... .......... .......... 64.1M - 950K .......... .......... .......... .......... .......... 204M - 1000K .......... .......... .......... .......... .......... 102M - 1050K .......... .......... .......... .......... .......... 90.0M - 1100K .......... .......... .......... .......... .......... 179M - 1150K .......... .......... .......... .......... .......... 132M - 1200K .......... .......... .......... .......... .......... 84.9M - 1250K .......... .......... .......... .......... .......... 90.3M - 1300K .......... .......... .......... .......... .......... 141M - 1350K .......... .......... .......... .......... .......... 187M - 1400K .......... .......... .......... .......... .......... 116M - 1450K .......... .......... .......... .......... .......... 86.2M - 1500K .......... .......... .......... .......... .......... 118M - 1550K .......... .......... .......... .......... .......... 113M - 1600K .......... .......... .......... .......... .......... 120M - 1650K .......... .......... .......... .......... .......... 113M - 1700K .......... .......... .......... .......... .......... 113M - 1750K .......... .......... .......... .......... .......... 107M - 1800K .......... .......... .......... .......... .......... 113M - 1850K .......... .......... .......... .......... .......... 5.40M - 1900K .......... .......... .......... .......... .......... 93.9M - 1950K .......... .......... .......... .......... .......... 104M - 2000K .......... .......... .......... .......... .......... 85.4M - 2050K .......... .......... .......... .......... .......... 126M - 2100K .......... .......... .......... .......... .......... 27.8M - 2150K .......... .......... .......... .......... .......... 9.09M - 2200K .......... .......... .......... .......... .......... 119M - 2250K .......... .......... .......... .......... .......... 17.0M - 2300K .......... .......... .......... .......... .......... 21.5M - 2350K .......... .......... .......... .......... .......... 128M - 2400K .......... .......... .......... .......... .......... 117M - 2450K .......... .......... .......... .......... .......... 88.9M - 2500K .......... .......... .......... .......... .......... 16.9M - 2550K .. 5.53T=0.7s - -2025-06-22 20:08:55 (3.38 MB/s) - ‘i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240.2’ saved [2614240] - diff --git a/test_data/ebay_scraper/raw_scraped/minipc/url.json b/test_data/scraper/raw_scraped/minipc/url.json similarity index 100% rename from test_data/ebay_scraper/raw_scraped/minipc/url.json rename to test_data/scraper/raw_scraped/minipc/url.json diff --git a/test_data/ebay_scraper/raw_scraped/ssd/1750369463.html b/test_data/scraper/raw_scraped/ssd/1750369463.html similarity index 100% rename from test_data/ebay_scraper/raw_scraped/ssd/1750369463.html rename to test_data/scraper/raw_scraped/ssd/1750369463.html diff --git a/test_data/ebay_scraper/raw_scraped/ssd/url.json b/test_data/scraper/raw_scraped/ssd/url.json similarity index 100% rename from test_data/ebay_scraper/raw_scraped/ssd/url.json rename to test_data/scraper/raw_scraped/ssd/url.json