All checks were successful
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 3m49s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m2s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 4m17s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 9m36s
316 lines
10 KiB
Rust
316 lines
10 KiB
Rust
use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
|
use chrono::{DateTime, Utc};
|
|
use clap::Parser;
|
|
use ebay_scraper_rust::db::{
|
|
DBTable, ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
|
|
get_stats, listings_get_filtered,
|
|
};
|
|
use ebay_scraper_rust::parser::parse_dir;
|
|
use ebay_scraper_rust::{parser_storage_e0, parser_storage_e1};
|
|
use serde::{Deserialize, Serialize};
|
|
use std::path::PathBuf;
|
|
use std::sync::Mutex;
|
|
use std::time::Instant;
|
|
use tracing::{info, instrument};
|
|
|
|
use tracing_subscriber::filter::EnvFilter;
|
|
use tracing_subscriber::fmt;
|
|
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;
|
|
use tracing_subscriber::util::SubscriberInitExt;
|
|
|
|
mod xdg_dirs;
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[clap(
|
|
name = "ebay-scraper-rust",
|
|
version = "0.1.0",
|
|
about = "Scrapes eBay search results for homelab purposes"
|
|
)]
|
|
struct Args {}
|
|
|
|
#[derive(Deserialize, Debug)]
|
|
struct ListingsFilter {
|
|
since: Option<i64>,
|
|
limit: Option<i64>,
|
|
cents_per_tbytes_max: Option<i64>,
|
|
}
|
|
|
|
#[get("/listings")]
|
|
async fn listings_filtered_get(
|
|
db: Data<Mutex<rusqlite::Connection>>,
|
|
filter: web::Query<ListingsFilter>,
|
|
) -> Result<impl Responder> {
|
|
let start = Instant::now();
|
|
let res = listings_get_filtered(
|
|
&db.lock().unwrap(),
|
|
&DateTime::<Utc>::from_timestamp(filter.since.unwrap_or(0), 0).unwrap(),
|
|
filter.limit.unwrap_or(1_000),
|
|
filter.cents_per_tbytes_max.unwrap_or(100_00),
|
|
);
|
|
let elapsed = start.elapsed().as_micros() as f64 / 1000.0;
|
|
info!(
|
|
"Took {elapsed} milliseconds with {} listings found for a filter of {:?}",
|
|
res.len(),
|
|
filter
|
|
);
|
|
Ok(web::Json(res))
|
|
}
|
|
|
|
#[get("/listing/{id}")]
|
|
async fn listing_get(
|
|
db: Data<Mutex<rusqlite::Connection>>,
|
|
id: web::Path<i64>,
|
|
) -> Result<impl Responder> {
|
|
Ok(web::Json(Listing::lookup(&db.lock().unwrap(), *id)))
|
|
}
|
|
|
|
#[get("/listing/{id}/parsed")]
|
|
async fn listing_parse_get(
|
|
db: Data<Mutex<rusqlite::Connection>>,
|
|
id: web::Path<i64>,
|
|
) -> Result<impl Responder> {
|
|
Ok(web::Json(ParsedStorage::lookup(&db.lock().unwrap(), *id)))
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
struct APIHistory {
|
|
when: DateTime<Utc>,
|
|
current_bid_usd_cents: i64,
|
|
}
|
|
|
|
#[get("/listing/{id}/history")]
|
|
async fn listing_history_get(
|
|
db: Data<Mutex<rusqlite::Connection>>,
|
|
id: web::Path<i64>,
|
|
) -> Result<impl Responder> {
|
|
let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id)
|
|
.iter()
|
|
// .inspect(|e| info!("got: {:?}", e))
|
|
.filter_map(|e| {
|
|
Some(APIHistory {
|
|
when: e.timestamp,
|
|
current_bid_usd_cents: e.current_bid_usd_cents?,
|
|
})
|
|
})
|
|
.collect();
|
|
Ok(web::Json(history))
|
|
}
|
|
|
|
#[post("/listing/parse")]
|
|
async fn parse_listings(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
|
let mut cnt = 0;
|
|
let db_unlocked = db.lock().unwrap();
|
|
Listing::lookup_non_parsed(&db_unlocked)
|
|
.iter()
|
|
.map(|l| parser_storage_e0::parse_size_and_quantity(l.0, &l.1))
|
|
.inspect(|_| cnt = cnt + 1)
|
|
.for_each(|ps| ps.add_or_update(&db_unlocked));
|
|
|
|
Ok(web::Json(cnt))
|
|
}
|
|
|
|
#[get("/category")]
|
|
async fn category_getnames(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
|
Ok(web::Json(SearchURL::names(&db.lock().unwrap())))
|
|
}
|
|
|
|
#[post("/category/{category}/parse")]
|
|
#[instrument(skip_all)]
|
|
async fn category_parse(
|
|
db: Data<Mutex<rusqlite::Connection>>,
|
|
downloaddir: Data<PathBuf>,
|
|
category: web::Path<String>,
|
|
) -> Result<impl Responder> {
|
|
let start = Instant::now();
|
|
let count = parse_dir(
|
|
&downloaddir.join(category.clone()),
|
|
&category,
|
|
&db.lock().unwrap(),
|
|
)
|
|
.unwrap();
|
|
let elapsed = start.elapsed().as_micros() as f64 / 1000.0;
|
|
|
|
info!("Added {count} listings, took {elapsed} ms.");
|
|
Ok(count.to_string())
|
|
}
|
|
|
|
#[get("/stats")]
|
|
async fn stats_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
|
Ok(web::Json(get_stats(&db.lock().unwrap())))
|
|
}
|
|
|
|
#[get("/admin")]
|
|
async fn admin_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
|
let db = db.lock().unwrap();
|
|
let query_start_time = Instant::now();
|
|
let search_urls = SearchURL::get_all(&db).unwrap_or_default();
|
|
let parsed_pages = ParsedPage::get_all(&db).unwrap_or_default();
|
|
let parsed_storages = ParsedStorage::get_all(&db).unwrap_or_default();
|
|
let item_appearances = ItemAppearances::get_all(&db).unwrap_or_default();
|
|
let listings = Listing::get_all(&db).unwrap_or_default();
|
|
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
|
|
|
|
let html_gen_start_time = Instant::now();
|
|
let mut html = String::new();
|
|
html.push_str(
|
|
r#"<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Database Dump</title>
|
|
<link rel="stylesheet" href="https://unpkg.com/purecss@2.0.6/build/pure-min.css" xintegrity="sha384-Uu6IeWbM+gzNVXJcM9XV3SohHtmWE+3VGi496jvgX1jyvDTWuaAUiIEoIeVVERG" crossorigin="anonymous">
|
|
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.tablesorter/2.31.3/js/jquery.tablesorter.min.js"></script>
|
|
<style>
|
|
body { padding: 1em; }
|
|
.pure-table { margin-bottom: 2em; }
|
|
th { cursor: pointer; }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>Database Dump</h1>
|
|
"#,
|
|
);
|
|
|
|
html.push_str(&generate_table("SearchURLs", &search_urls));
|
|
html.push_str(&generate_table("Pages_Parsed", &parsed_pages));
|
|
html.push_str(&generate_table("Storage_Parsed", &parsed_storages));
|
|
html.push_str(&generate_table("Item_Appearances", &item_appearances));
|
|
html.push_str(&generate_table("Listings", &listings));
|
|
|
|
// Performance Metrics
|
|
let html_gen_time = html_gen_start_time.elapsed().as_micros() as f64 / 1000.0;
|
|
html.push_str(&format!(
|
|
r#"<p>
|
|
Database query time: <strong>{}ms</strong><br>
|
|
HTML generation time: <strong>{}ms</strong>
|
|
</p>"#,
|
|
total_query_time, html_gen_time
|
|
));
|
|
info!("DB Query ms: {total_query_time}, HTML Generation ms:{html_gen_time}");
|
|
|
|
// Footer and Scripts
|
|
html.push_str(
|
|
r#"
|
|
<script>
|
|
$(function() {
|
|
$(".sortable-table").tablesorter();
|
|
});
|
|
</script>
|
|
</body>
|
|
</html>"#,
|
|
);
|
|
|
|
Ok(web::Html::new(&html))
|
|
}
|
|
|
|
fn generate_table<T: Serialize>(title: &str, data: &[T]) -> String {
|
|
use serde_json::Value;
|
|
|
|
if data.is_empty() {
|
|
return format!(
|
|
"<h2>{} (0 rows)</h2><table class='pure-table pure-table-bordered pure-table-striped sortable-table'><thead><tr></tr></thead><tbody></tbody></table>",
|
|
title
|
|
);
|
|
}
|
|
|
|
let mut headers: Vec<String> = serde_json::to_value(&data[0])
|
|
.unwrap_or(Value::Null)
|
|
.as_object()
|
|
.map_or(Vec::new(), |obj| obj.keys().cloned().collect());
|
|
|
|
// Define the desired order for specific columns.
|
|
let desired_order = ["id", "item", "item_id", "timestamp"];
|
|
|
|
// Sort the headers. Columns in `desired_order` come first,
|
|
// in that order. The rest are sorted alphabetically.
|
|
headers.sort_by(|a, b| {
|
|
let a_pos = desired_order
|
|
.iter()
|
|
.position(|&p| p == a)
|
|
.unwrap_or(usize::MAX);
|
|
let b_pos = desired_order
|
|
.iter()
|
|
.position(|&p| p == b)
|
|
.unwrap_or(usize::MAX);
|
|
a_pos.cmp(&b_pos).then_with(|| a.cmp(b))
|
|
});
|
|
|
|
// Create the HTML for the table header row.
|
|
let header_html = headers
|
|
.iter()
|
|
.map(|header| format!("<th>{}</th>", header))
|
|
.collect::<String>();
|
|
|
|
// Create the HTML for all the table body rows.
|
|
let body_html = data
|
|
.iter()
|
|
.map(|item| {
|
|
let item_json = serde_json::to_value(item).unwrap_or(Value::Null);
|
|
let obj = item_json.as_object();
|
|
|
|
// Create all cells for a single row.
|
|
let cells_html = headers
|
|
.iter()
|
|
.map(|header| {
|
|
let value = obj.and_then(|o| o.get(header)).unwrap_or(&Value::Null);
|
|
// Remove quotes from the resulting JSON string value for cleaner output.
|
|
format!("<td>{}</td>", value.to_string().replace('"', ""))
|
|
})
|
|
.collect::<String>();
|
|
|
|
format!("<tr>{}</tr>", cells_html)
|
|
})
|
|
.collect::<String>();
|
|
|
|
// Assemble the final table HTML.
|
|
format!(
|
|
"<h2>{} ({} rows)</h2><table class='pure-table pure-table-bordered pure-table-striped sortable-table'><thead><tr>{}</tr></thead><tbody>{}</tbody></table>",
|
|
title,
|
|
data.len(),
|
|
header_html,
|
|
body_html
|
|
)
|
|
}
|
|
|
|
#[actix_web::main]
|
|
async fn main() -> std::io::Result<()> {
|
|
tracing_subscriber::registry()
|
|
.with(fmt::layer())
|
|
.with(EnvFilter::from_default_env())
|
|
.init();
|
|
let _ = Args::parse();
|
|
|
|
let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("scraper", None);
|
|
info!(
|
|
"Starting with scraped data dir of \"{}\".",
|
|
scrapedatadir.to_str().unwrap()
|
|
);
|
|
let db_mutex = Data::new(Mutex::new(get_initialized(None)));
|
|
|
|
// Prepare our backend via pulling in what catagories we are preconfigured with.
|
|
SearchURL::scan(&db_mutex.lock().unwrap(), &scrapedatadir, "url.json");
|
|
|
|
HttpServer::new(move || {
|
|
App::new()
|
|
// Listing handlers
|
|
.service(listing_get)
|
|
.service(listings_filtered_get)
|
|
.service(listing_history_get)
|
|
// Category handlers
|
|
.service(parse_listings)
|
|
.service(category_parse)
|
|
.service(category_getnames)
|
|
// Gnarly info dump
|
|
.service(admin_get)
|
|
.service(stats_get)
|
|
// Stuff which is passed into every request.
|
|
.app_data(db_mutex.clone())
|
|
.app_data(Data::new(scrapedatadir.clone()))
|
|
})
|
|
.bind(("0.0.0.0", 9876))?
|
|
.run()
|
|
.await
|
|
}
|