Compare commits

...

4 Commits

Author SHA1 Message Date
bbca1f3bcb Add an admin based table dump and change buy_it_now_price to cents
All checks were successful
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 3m32s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 3m57s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 4m2s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 9m17s
2025-06-28 15:49:26 -04:00
e3b018b046 Add simple readme 2025-06-28 15:48:28 -04:00
91d8efe5bb Fix tracing logging to use only tracing and get instrument/spans going 2025-06-28 15:48:09 -04:00
6c6b766b72 Remove _db in func names of db interactions 2025-06-28 13:13:58 -04:00
13 changed files with 341 additions and 314 deletions

43
Cargo.lock generated
View File

@@ -654,9 +654,7 @@ dependencies = [
"chrono",
"clap",
"dirs",
"env_logger",
"lazy_static",
"log",
"regex",
"rusqlite",
"scraper",
@@ -696,7 +694,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
dependencies = [
"log",
"regex",
]
[[package]]
@@ -708,7 +705,6 @@ dependencies = [
"anstream",
"anstyle",
"env_filter",
"jiff",
"log",
]
@@ -1095,30 +1091,6 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jiff"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
dependencies = [
"jiff-static",
"log",
"portable-atomic",
"portable-atomic-util",
"serde",
]
[[package]]
name = "jiff-static"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "jobserver"
version = "0.1.33"
@@ -1453,21 +1425,6 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "portable-atomic"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
[[package]]
name = "portable-atomic-util"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
dependencies = [
"portable-atomic",
]
[[package]]
name = "potential_utf"
version = "0.1.2"

View File

@@ -8,17 +8,15 @@ actix-web = "4.11.0"
chrono = { version = "0.4.41", features = ["serde"] }
clap = { version = "4.5.40", features = ["derive"] }
dirs = "6.0.0"
env_logger = "0.11.8"
lazy_static = "1.5.0"
log = "0.4.27"
regex = "1.11.1"
rusqlite = { version = "0.36.0", features = ["bundled", "chrono"] }
scraper = "0.23.1"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
test-log = { version = "0.2.17", features = ["trace"] }
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
tracing = { version = "0.1.41", features = ["attributes"] }
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
[dev-dependencies]
similar-asserts = "1.7.0"

12
readme.md Normal file
View File

@@ -0,0 +1,12 @@
# Scraper Web API
This is a dumb little tool which ingests raw HTML files, does some parsing on them, and serves the results over a web API.
```bash
export URL_BASE="http://scraper.homelab.hak8or.com:8080"; \
echo run0 && http POST "$URL_BASE/page/parse/ssd" && \
echo run1 && http POST "$URL_BASE/listing/parse" && \
echo run2 && http GET "$URL_BASE/listing/since/12345678/2" && \
echo run3 && http GET "$URL_BASE/listing/388484391867" && \
echo run4 && http GET "$URL_BASE/listing/286605201240/history"
```

138
src/db.rs
View File

@@ -4,7 +4,7 @@ use serde::Serialize;
use std::path::Path;
use tracing::info;
trait DBTable {
pub trait DBTable {
const TABLE_NAME: &'static str;
const TABLE_SCHEMA: &'static str;
fn initialize(conn: &Connection) {
@@ -19,6 +19,10 @@ trait DBTable {
info!("{} ({})", Self::TABLE_NAME, Self::TABLE_SCHEMA);
conn.execute(create_table, ()).unwrap();
}
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>>
where
Self: Sized;
}
#[derive(Serialize, Debug, PartialEq, Clone)]
@@ -32,6 +36,22 @@ impl DBTable for SearchURL {
id INTEGER PRIMARY KEY,
url TEXT NOT NULL UNIQUE,
name TEXT NOT NULL UNIQUE";
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
let mut stmt = conn.prepare(&format!("SELECT url, name FROM {}", Self::TABLE_NAME))?;
let iter = stmt.query_map([], |row| {
Ok(SearchURL {
full_url: row.get(0)?,
name: row.get(1)?,
})
})?;
let mut result = Vec::new();
for item in iter {
result.push(item?);
}
Ok(result)
}
}
impl SearchURL {
pub fn lookup(conn: &Connection, name: &str) -> Option<Self> {
@@ -91,9 +111,28 @@ impl DBTable for ParsedPage {
UNIQUE(category, timestamp)
FOREIGN KEY(category) REFERENCES SearchURLs(name)
";
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
let mut stmt = conn.prepare(&format!(
"SELECT category, timestamp FROM {}",
Self::TABLE_NAME
))?;
let iter = stmt.query_map([], |row| {
Ok(ParsedPage {
category: row.get(0)?,
timestamp: row.get(1)?,
})
})?;
let mut result = Vec::new();
for item in iter {
result.push(item?);
}
Ok(result)
}
}
impl ParsedPage {
pub fn lookup_db(conn: &Connection, timestamp: DateTime<Utc>) -> Option<Self> {
pub fn lookup(conn: &Connection, timestamp: DateTime<Utc>) -> Option<Self> {
let mut stmt = conn
.prepare(&format!(
"SELECT * FROM {} WHERE timestamp = ?",
@@ -110,7 +149,7 @@ impl ParsedPage {
.ok()
}
pub fn add_or_update_db(&self, conn: &Connection) {
pub fn add_or_update(&self, conn: &Connection) {
let _ = conn
.execute(
&format!(
@@ -146,9 +185,33 @@ impl DBTable for ParsedStorage {
UNIQUE(item, parse_engine)
FOREIGN KEY(item) REFERENCES Listings(item_id)
";
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
let mut stmt = conn.prepare(&format!("SELECT id, item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check FROM {}", Self::TABLE_NAME))?;
let iter = stmt.query_map([], |row| {
Ok(ParsedStorage {
id: row.get(0)?,
item: row.get(1)?,
total_gigabytes: row.get(2)?,
quantity: row.get(3)?,
individual_size_gigabytes: {
let r: String = row.get(4)?;
r.parse().unwrap_or(0)
},
parse_engine: row.get(5)?,
needed_description_check: row.get(6)?,
})
})?;
let mut result = Vec::new();
for item in iter {
result.push(item?);
}
Ok(result)
}
}
impl ParsedStorage {
pub fn lookup_db(conn: &Connection, item: i64) -> Vec<ParsedStorage> {
pub fn lookup(conn: &Connection, item: i64) -> Vec<ParsedStorage> {
let mut stmt = conn
.prepare(&format!("SELECT * FROM {} WHERE id = ?", Self::TABLE_NAME))
.ok()
@@ -173,7 +236,7 @@ impl ParsedStorage {
.collect()
}
pub fn add_or_update_db(&self, conn: &Connection) {
pub fn add_or_update(&self, conn: &Connection) {
let _ = conn.execute(&format!("
INSERT OR REPLACE INTO {}
(item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check)
@@ -211,6 +274,27 @@ impl DBTable for ItemAppearances {
FOREIGN KEY(item) REFERENCES Listings(item_id),
FOREIGN KEY(category, timestamp) REFERENCES Pages_Parsed(category, timestamp)
";
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
let mut stmt = conn.prepare(&format!(
"SELECT item, category, timestamp, current_bid_usd_cents FROM {}",
Self::TABLE_NAME
))?;
let iter = stmt.query_map([], |row| {
Ok(ItemAppearances {
item: row.get(0)?,
category: row.get(1)?,
timestamp: row.get(2)?,
current_bid_usd_cents: row.get(3)?,
})
})?;
let mut result = Vec::new();
for item in iter {
result.push(item?);
}
Ok(result)
}
}
impl ItemAppearances {
pub fn add_or_update(&self, conn: &Connection) {
@@ -267,7 +351,7 @@ pub struct Listing {
pub id: i64,
pub item_id: i64,
pub title: String,
pub buy_it_now_price: Option<f64>,
pub buy_it_now_price_cents: Option<i64>,
pub has_best_offer: bool,
pub image_url: String,
}
@@ -281,6 +365,29 @@ impl DBTable for Listing {
has_best_offer INTEGER NOT NULL,
image_url TEXT NOT NULL
";
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
let mut stmt = conn.prepare(&format!(
"SELECT id, item_id, title, buy_it_now_usd_cents, has_best_offer, image_url FROM {}",
Self::TABLE_NAME
))?;
let iter = stmt.query_map([], |row| {
Ok(Listing {
id: row.get(0)?,
item_id: row.get(1)?,
title: row.get(2)?,
buy_it_now_price_cents: row.get(3)?,
has_best_offer: row.get(4)?,
image_url: row.get(5)?,
})
})?;
let mut result = Vec::new();
for item in iter {
result.push(item?);
}
Ok(result)
}
}
impl Listing {
pub fn lookup(conn: &Connection, item_id: i64) -> Option<Listing> {
@@ -295,7 +402,7 @@ impl Listing {
id: row.get(0)?,
item_id: row.get(1)?,
title: row.get(2)?,
buy_it_now_price: row.get(3)?,
buy_it_now_price_cents: row.get(3)?,
has_best_offer: row.get(4)?,
image_url: row.get(5)?,
})
@@ -329,7 +436,7 @@ impl Listing {
id: row.get(0)?,
item_id: row.get(1)?,
title: row.get(2)?,
buy_it_now_price: row.get(3)?,
buy_it_now_price_cents: row.get(3)?,
has_best_offer: row.get(4)?,
image_url: row.get(5)?,
})
@@ -377,7 +484,7 @@ impl Listing {
(
self.item_id,
&self.title,
self.buy_it_now_price,
self.buy_it_now_price_cents,
self.has_best_offer,
self.image_url.clone(),
),
@@ -424,7 +531,7 @@ mod tests {
id: 1,
item_id: 1234,
title: "Some Title".to_string(),
buy_it_now_price: Some(1.23),
buy_it_now_price_cents: Some(123),
has_best_offer: false,
image_url: "google.com".to_string(),
};
@@ -440,18 +547,15 @@ mod tests {
parse_engine: 9,
needed_description_check: true,
};
parsed.add_or_update_db(&db);
assert_eq!(ParsedStorage::lookup_db(&db, listing.id), vec![parsed]);
parsed.add_or_update(&db);
assert_eq!(ParsedStorage::lookup(&db, listing.id), vec![parsed]);
let page = ParsedPage {
category: "ssd".to_owned(),
timestamp: std::time::SystemTime::now().into(),
};
page.add_or_update_db(&db);
assert_eq!(
ParsedPage::lookup_db(&db, page.timestamp),
Some(page.clone())
);
page.add_or_update(&db);
assert_eq!(ParsedPage::lookup(&db, page.timestamp), Some(page.clone()));
let apperance = ItemAppearances {
item: listing.item_id,

View File

@@ -2,12 +2,19 @@ use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
use chrono::DateTime;
use clap::Parser;
use ebay_scraper_rust::db::{
ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
DBTable, ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
};
use ebay_scraper_rust::{parser_ebay, parser_storage};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use tracing::info;
use std::time::Instant;
use tracing::{error, info, instrument};
use tracing_subscriber::filter::EnvFilter;
use tracing_subscriber::fmt;
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
mod xdg_dirs;
@@ -24,7 +31,7 @@ async fn page_get(
db: Data<Mutex<rusqlite::Connection>>,
timestamp: web::Path<i64>,
) -> Result<impl Responder> {
Ok(web::Json(ParsedPage::lookup_db(
Ok(web::Json(ParsedPage::lookup(
&db.lock().unwrap(),
chrono::DateTime::from_timestamp(*timestamp, 0).unwrap(),
)))
@@ -71,7 +78,7 @@ async fn parse_listings(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Re
.iter()
.map(|l| parser_storage::parse_size_and_quantity(l.0, &l.1))
.inspect(|_| cnt = cnt + 1)
.for_each(|ps| ps.add_or_update_db(&db_unlocked));
.for_each(|ps| ps.add_or_update(&db_unlocked));
Ok(web::Json(cnt))
}
@@ -81,10 +88,7 @@ async fn listing_parse_get(
db: Data<Mutex<rusqlite::Connection>>,
id: web::Path<i64>,
) -> Result<impl Responder> {
Ok(web::Json(ParsedStorage::lookup_db(
&db.lock().unwrap(),
*id,
)))
Ok(web::Json(ParsedStorage::lookup(&db.lock().unwrap(), *id)))
}
pub fn timestamps_from_dir(path: &Path) -> Vec<i64> {
@@ -97,7 +101,6 @@ pub fn timestamps_from_dir(path: &Path) -> Vec<i64> {
std::fs::read_dir(path)
.unwrap()
.inspect(|fpath| info!("Found {:?}", fpath))
.map(|fpath| fpath.unwrap().path())
.filter_map(|fstem| {
fstem
@@ -111,6 +114,7 @@ pub fn timestamps_from_dir(path: &Path) -> Vec<i64> {
}
#[post("page/parse/{category}")]
#[instrument(skip_all)]
async fn parse_post(
db: Data<Mutex<rusqlite::Connection>>,
downloaddir: Data<PathBuf>,
@@ -119,73 +123,209 @@ async fn parse_post(
let dir = &downloaddir.join(category.clone());
// Ensure the category is created.
let url: serde_json::Value =
serde_json::from_str(&std::fs::read_to_string(dir.join("url.json")).unwrap()).unwrap();
info!("{:?}", url);
let url_fpath = dir.join("url.json");
let url_contents = std::fs::read_to_string(&url_fpath)
.inspect_err(|e| error!("Failed reading {}: {e}", url_fpath.display()))?;
#[derive(Deserialize)]
struct URLJSON {
url: String,
}
let su = SearchURL {
full_url: url.to_string(),
full_url: serde_json::from_str::<URLJSON>(&url_contents).unwrap().url,
name: category.to_string(),
};
info!("{:?}", su);
su.add_or_update(&db.lock().unwrap());
let added: u64 = timestamps_from_dir(dir)
.iter()
.filter(|t| {
info!("Checking for the existance of page {t}");
let p = ParsedPage::lookup_db(
&db.lock().unwrap(),
chrono::DateTime::from_timestamp(**t, 0).unwrap(),
// Find all pages.
let pages = timestamps_from_dir(dir);
// See what pages haven't been seen before.
let to_parse = pages.iter().filter(|t| {
let ts = chrono::DateTime::from_timestamp(**t, 0).unwrap();
let p = ParsedPage::lookup(&db.lock().unwrap(), ts);
// Timestamp never seen before, lets pass it on.
if p.is_none() {
info!(
"Page of timestamp:{} and catagory:{category} never seen before, processing ...",
ts.timestamp()
);
// Timestamp never seen before, lets pass it on.
if p.is_none() {
return true;
}
// Timestamp was seen before *and* from the same catagory, don't pass
// it on.
if p.unwrap().category == *category {
return false;
}
return true;
})
.map(|t| {
let timestamp = chrono::DateTime::from_timestamp(*t, 0).unwrap();
info!("Adding or updating db with timestamp:{timestamp} catagory:{category}");
ParsedPage {
timestamp: timestamp,
category: category.to_string(),
}
.add_or_update_db(&db.lock().unwrap());
}
let mut cnt = 0;
parser_ebay::extract_data_from_html(
&std::fs::read_to_string(dir.join(format!("{t}.html"))).unwrap(),
&timestamp,
&category,
)
// Timestamp was seen before *and* from the same catagory, don't pass
// it on.
if p.unwrap().category == *category {
info!(
"Page of timestamp:{} and catagory:{category} seen before, skipping ...",
ts.timestamp()
);
return false;
}
info!(
"Page of timestamp:{} seen before, but not of catagory:{category}, processing ...",
ts.timestamp()
);
return true;
});
let mut added_count = 0;
for p in to_parse {
let ts = chrono::DateTime::from_timestamp(*p, 0).unwrap();
ParsedPage {
timestamp: ts,
category: category.to_string(),
}
.add_or_update(&db.lock().unwrap());
let page_path = dir.join(format!("{}.html", ts.timestamp()));
let page_contents = std::fs::read_to_string(&page_path)
.inspect_err(|e| error!("Failed reading {}, error:{e}", page_path.display()))?;
let elements = parser_ebay::extract_data_from_html(&page_contents, &ts, &category).unwrap();
info!(
"Page {} contains {} elements",
ts.timestamp(),
elements.len()
);
added_count += elements.len();
for e in elements {
e.0.add_or_update(&db.lock().unwrap());
e.1.add_or_update(&db.lock().unwrap());
info!(
"From page {}, inserting id:{}, title:{}",
ts.timestamp(),
e.0.item_id,
e.0.title
);
}
}
info!("Added {added_count} listings");
Ok(added_count.to_string())
}
#[get("admin")]
async fn admin_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
let db = db.lock().unwrap();
let query_start_time = Instant::now();
let search_urls = SearchURL::get_all(&db).unwrap_or_default();
let parsed_pages = ParsedPage::get_all(&db).unwrap_or_default();
let parsed_storages = ParsedStorage::get_all(&db).unwrap_or_default();
let item_appearances = ItemAppearances::get_all(&db).unwrap_or_default();
let listings = Listing::get_all(&db).unwrap_or_default();
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
let html_gen_start_time = Instant::now();
let mut html = String::new();
html.push_str(
r#"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Database Dump</title>
<link rel="stylesheet" href="https://unpkg.com/purecss@2.0.6/build/pure-min.css" xintegrity="sha384-Uu6IeWbM+gzNVXJcM9XV3SohHtmWE+3VGi496jvgX1jyvDTWuaAUiIEoIeVVERG" crossorigin="anonymous">
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.tablesorter/2.31.3/js/jquery.tablesorter.min.js"></script>
<style>
body { padding: 1em; }
.pure-table { margin-bottom: 2em; }
th { cursor: pointer; }
</style>
</head>
<body>
<h1>Database Dump</h1>
"#,
);
// Performance Metrics
let html_gen_time = html_gen_start_time.elapsed().as_micros() as f64 / 1000.0;
html.push_str(&format!(
r#"<p>
Database query time: <strong>{}ms</strong><br>
HTML generation time: <strong>{}ms</strong>
</p>"#,
total_query_time, html_gen_time
));
info!("DB Query ms: {total_query_time}, HTML Generation ms:{html_gen_time}");
// --- Tables ---
// SearchURLs
html.push_str(&generate_table("SearchURLs", &search_urls));
// ParsedPages
html.push_str(&generate_table("Pages_Parsed", &parsed_pages));
// ParsedStorage
html.push_str(&generate_table("Storage_Parsed", &parsed_storages));
// ItemAppearances
html.push_str(&generate_table("Item_Appearances", &item_appearances));
// Listings
html.push_str(&generate_table("Listings", &listings));
// Footer and Scripts
html.push_str(
r#"
<script>
$(function() {
$(".sortable-table").tablesorter();
});
</script>
</body>
</html>"#,
);
Ok(web::Html::new(&html))
}
fn generate_table<T: Serialize>(title: &str, data: &[T]) -> String {
let mut table_html = format!(
"<h2>{} ({} rows)</h2><table class='pure-table pure-table-bordered sortable-table'><thead><tr>",
title,
data.len()
);
if data.len() > 0 {
for header in serde_json::to_value(&data[0])
.unwrap()
.iter()
.for_each(|lp| {
cnt = cnt + 1;
lp.0.add_or_update(&db.lock().unwrap());
lp.1.add_or_update(&db.lock().unwrap());
info!("Inserting id:{}, title:{}", lp.0.item_id, lp.0.title);
});
cnt
})
.sum();
.as_object()
.unwrap()
.keys()
{
table_html.push_str(&format!("<th>{}</th>", header));
}
table_html.push_str("</tr></thead><tbody>");
Ok(added.to_string())
for item in data {
table_html.push_str("<tr>");
let item_json = serde_json::to_value(item).unwrap();
if let Some(obj) = item_json.as_object() {
for (_key, value) in obj.iter() {
table_html
.push_str(&format!("<td>{}</td>", value.to_string().replace("\"", "")));
}
}
table_html.push_str("</tr>");
}
}
table_html.push_str("</tbody></table>");
table_html
}
#[actix_web::main]
async fn main() -> std::io::Result<()> {
env_logger::init_from_env(env_logger::Env::new().default_filter_or("info"));
tracing_subscriber::registry()
.with(fmt::layer())
.with(EnvFilter::from_default_env())
.init();
let _ = Args::parse();
let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("ebay_scraper", None);
let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("scraper", None);
info!(
"Starting with scraped data dir of \"{}\".",
scrapedatadir.to_str().unwrap()
@@ -200,10 +340,11 @@ async fn main() -> std::io::Result<()> {
.service(listing_since_get)
.service(parse_post)
.service(parse_listings)
.service(admin_get)
.app_data(db_mutex.clone())
.app_data(Data::new(scrapedatadir.clone()))
})
.bind(("127.0.0.1", 8080))?
.bind(("0.0.0.0", 9876))?
.run()
.await
}

View File

@@ -154,7 +154,7 @@ pub fn extract_data_from_html(
title,
id: 0,
item_id: id?,
buy_it_now_price: final_buy_it_now_price,
buy_it_now_price_cents: final_buy_it_now_price.map(|b| (b * 100.0).round() as i64),
has_best_offer,
image_url,
},
@@ -177,7 +177,7 @@ mod tests {
#[test_log::test]
fn parse() {
let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
let html = include_str!("../test_data/ebay_scraper/raw_scraped/ssd/1750369463.html");
let html = include_str!("../test_data/scraper/raw_scraped/ssd/1750369463.html");
let parsed = extract_data_from_html(html, &timestamp, "ssd").unwrap();
// assert_eq!(parsed.len(), 62);
@@ -189,7 +189,7 @@ mod tests {
id: 0,
item_id: 388484391867,
title: "WD Blue 2.5-Inch 3D NAND SATA SSD 1TB - WDBNCE0010PNC-WRSN".to_string(),
buy_it_now_price: Some(59.99),
buy_it_now_price_cents: Some(5999),
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/wQYAAeSwOTtoN8SC/s-l500.webp"
.to_string()
@@ -210,7 +210,7 @@ mod tests {
title:
"Fanxiang M.2 SSD 1TB NVMe PCIe Gen 3x 4 M2 Internal Solid State Drive 3500MB/s"
.to_string(),
buy_it_now_price: None,
buy_it_now_price_cents: None,
has_best_offer: true,
image_url: "https://i.ebayimg.com/images/g/3NoAAeSwPrtoDb1O/s-l500.webp"
.to_string()

View File

@@ -1,59 +0,0 @@
--2025-06-22 20:08:55-- https://www.ebay.com/sch/i.html?&_nkw=&_sacat=179&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving www.ebay.com (www.ebay.com)... 23.56.163.160
Connecting to www.ebay.com (www.ebay.com)|23.56.163.160|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: i.html?&_nkw=&_sacat=179&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240
0K .......... .......... .......... .......... .......... 6.28M
50K .......... .......... .......... .......... .......... 76.1K
100K .......... .......... .......... .......... .......... 18.6M
150K .......... .......... .......... .......... .......... 12.7M
200K .......... .......... .......... .......... .......... 34.4M
250K .......... .......... .......... .......... .......... 25.0M
300K .......... .......... .......... .......... .......... 41.3M
350K .......... .......... .......... .......... .......... 114M
400K .......... .......... .......... .......... .......... 73.4M
450K .......... .......... .......... .......... .......... 33.5M
500K .......... .......... .......... .......... .......... 50.2M
550K .......... .......... .......... .......... .......... 76.2M
600K .......... .......... .......... .......... .......... 109M
650K .......... .......... .......... .......... .......... 61.5M
700K .......... .......... .......... .......... .......... 81.1M
750K .......... .......... .......... .......... .......... 337M
800K .......... .......... .......... .......... .......... 118M
850K .......... .......... .......... .......... .......... 85.5M
900K .......... .......... .......... .......... .......... 92.6M
950K .......... .......... .......... .......... .......... 96.7M
1000K .......... .......... .......... .......... .......... 84.6M
1050K .......... .......... .......... .......... .......... 500M
1100K .......... .......... .......... .......... .......... 109M
1150K .......... .......... .......... .......... .......... 83.5M
1200K .......... .......... .......... .......... .......... 160M
1250K .......... .......... .......... .......... .......... 141M
1300K .......... .......... .......... .......... .......... 41.7M
1350K .......... .......... .......... .......... .......... 96.4M
1400K .......... .......... .......... .......... .......... 2.47M
1450K .......... .......... .......... .......... .......... 36.6M
1500K .......... .......... .......... .......... .......... 83.5M
1550K .......... .......... .......... .......... .......... 71.7M
1600K .......... .......... .......... .......... .......... 37.7M
1650K .......... .......... .......... .......... .......... 104M
1700K .......... .......... .......... .......... .......... 73.7M
1750K .......... .......... .......... .......... .......... 115M
1800K .......... .......... .......... .......... .......... 85.3M
1850K .......... .......... .......... .......... .......... 140M
1900K .......... .......... .......... .......... .......... 71.1M
1950K .......... .......... .......... .......... .......... 112M
2000K .......... .......... .......... .......... .......... 75.4M
2050K .......... .......... .......... .......... .......... 120M
2100K .......... .......... .......... .......... .......... 112M
2150K .......... .......... .......... .......... .......... 117M
2200K .......... .......... .......... .......... .......... 108M
2250K .......... .......... .......... .......... .......... 97.1M
2300K .......... .......... .......... .......... .......... 31.8M
2350K ...... 11.4T=0.7s
2025-06-22 20:08:56 (3.20 MB/s) - i.html?&_nkw=&_sacat=179&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240 saved [2412662]

View File

@@ -1,63 +0,0 @@
--2025-06-22 20:08:17-- https://www.ebay.com/sch/i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving www.ebay.com (www.ebay.com)... 23.56.163.160
Connecting to www.ebay.com (www.ebay.com)|23.56.163.160|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240
0K .......... .......... .......... .......... .......... 8.04M
50K .......... .......... .......... .......... .......... 83.3K
100K .......... .......... .......... .......... .......... 1.38M
150K .......... .......... .......... .......... .......... 7.12M
200K .......... .......... .......... .......... .......... 18.8M
250K .......... .......... .......... .......... .......... 18.0M
300K .......... .......... .......... .......... .......... 19.4M
350K .......... .......... .......... .......... .......... 48.4M
400K .......... .......... .......... .......... .......... 45.9M
450K .......... .......... .......... .......... .......... 50.4M
500K .......... .......... .......... .......... .......... 50.1M
550K .......... .......... .......... .......... .......... 119M
600K .......... .......... .......... .......... .......... 45.4M
650K .......... .......... .......... .......... .......... 44.1M
700K .......... .......... .......... .......... .......... 59.1M
750K .......... .......... .......... .......... .......... 84.0M
800K .......... .......... .......... .......... .......... 167M
850K .......... .......... .......... .......... .......... 76.6M
900K .......... .......... .......... .......... .......... 59.4M
950K .......... .......... .......... .......... .......... 60.3M
1000K .......... .......... .......... .......... .......... 113M
1050K .......... .......... .......... .......... .......... 592M
1100K .......... .......... .......... .......... .......... 53.9M
1150K .......... .......... .......... .......... .......... 101M
1200K .......... .......... .......... .......... .......... 91.9M
1250K .......... .......... .......... .......... .......... 108M
1300K .......... .......... .......... .......... .......... 85.2M
1350K .......... .......... .......... .......... .......... 96.9M
1400K .......... .......... .......... .......... .......... 93.5M
1450K .......... .......... .......... .......... .......... 51.2M
1500K .......... .......... .......... .......... .......... 69.9M
1550K .......... .......... .......... .......... .......... 654M
1600K .......... .......... .......... .......... .......... 185M
1650K .......... .......... .......... .......... .......... 9.94M
1700K .......... .......... .......... .......... .......... 27.5M
1750K .......... .......... .......... .......... .......... 613M
1800K .......... .......... .......... .......... .......... 659M
1850K .......... .......... .......... .......... .......... 21.3M
1900K .......... .......... .......... .......... .......... 107M
1950K .......... .......... .......... .......... .......... 158M
2000K .......... .......... .......... .......... .......... 37.8M
2050K .......... .......... .......... .......... .......... 85.2M
2100K .......... .......... .......... .......... .......... 26.0M
2150K .......... .......... .......... .......... .......... 57.1M
2200K .......... .......... .......... .......... .......... 114M
2250K .......... .......... .......... .......... .......... 117M
2300K .......... .......... .......... .......... .......... 57.9M
2350K .......... .......... .......... .......... .......... 127M
2400K .......... .......... .......... .......... .......... 118M
2450K .......... .......... .......... .......... .......... 62.1M
2500K .......... .......... .......... .......... .......... 157M
2550K 723G=0.7s
2025-06-22 20:08:18 (3.60 MB/s) - i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240 saved [2611588]

View File

@@ -1,63 +0,0 @@
--2025-06-22 20:08:54-- https://www.ebay.com/sch/i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving www.ebay.com (www.ebay.com)... 23.56.163.160
Connecting to www.ebay.com (www.ebay.com)|23.56.163.160|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240.2
0K .......... .......... .......... .......... .......... 4.98M
50K .......... .......... .......... .......... .......... 75.7K
100K .......... .......... .......... .......... .......... 4.26M
150K .......... .......... .......... .......... .......... 10.1M
200K .......... .......... .......... .......... .......... 25.3M
250K .......... .......... .......... .......... .......... 27.1M
300K .......... .......... .......... .......... .......... 37.0M
350K .......... .......... .......... .......... .......... 31.8M
400K .......... .......... .......... .......... .......... 58.2M
450K .......... .......... .......... .......... .......... 44.6M
500K .......... .......... .......... .......... .......... 40.7M
550K .......... .......... .......... .......... .......... 48.7M
600K .......... .......... .......... .......... .......... 719M
650K .......... .......... .......... .......... .......... 62.0M
700K .......... .......... .......... .......... .......... 61.0M
750K .......... .......... .......... .......... .......... 144M
800K .......... .......... .......... .......... .......... 270M
850K .......... .......... .......... .......... .......... 36.5M
900K .......... .......... .......... .......... .......... 64.1M
950K .......... .......... .......... .......... .......... 204M
1000K .......... .......... .......... .......... .......... 102M
1050K .......... .......... .......... .......... .......... 90.0M
1100K .......... .......... .......... .......... .......... 179M
1150K .......... .......... .......... .......... .......... 132M
1200K .......... .......... .......... .......... .......... 84.9M
1250K .......... .......... .......... .......... .......... 90.3M
1300K .......... .......... .......... .......... .......... 141M
1350K .......... .......... .......... .......... .......... 187M
1400K .......... .......... .......... .......... .......... 116M
1450K .......... .......... .......... .......... .......... 86.2M
1500K .......... .......... .......... .......... .......... 118M
1550K .......... .......... .......... .......... .......... 113M
1600K .......... .......... .......... .......... .......... 120M
1650K .......... .......... .......... .......... .......... 113M
1700K .......... .......... .......... .......... .......... 113M
1750K .......... .......... .......... .......... .......... 107M
1800K .......... .......... .......... .......... .......... 113M
1850K .......... .......... .......... .......... .......... 5.40M
1900K .......... .......... .......... .......... .......... 93.9M
1950K .......... .......... .......... .......... .......... 104M
2000K .......... .......... .......... .......... .......... 85.4M
2050K .......... .......... .......... .......... .......... 126M
2100K .......... .......... .......... .......... .......... 27.8M
2150K .......... .......... .......... .......... .......... 9.09M
2200K .......... .......... .......... .......... .......... 119M
2250K .......... .......... .......... .......... .......... 17.0M
2300K .......... .......... .......... .......... .......... 21.5M
2350K .......... .......... .......... .......... .......... 128M
2400K .......... .......... .......... .......... .......... 117M
2450K .......... .......... .......... .......... .......... 88.9M
2500K .......... .......... .......... .......... .......... 16.9M
2550K .. 5.53T=0.7s
2025-06-22 20:08:55 (3.38 MB/s) - i.html?&_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10&_ipg=240.2 saved [2614240]