Add an admin based table dump and change buy_it_now_price to cents
All checks were successful
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 3m32s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 3m57s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 4m2s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 9m17s
All checks were successful
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 3m32s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 3m57s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 4m2s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 9m17s
This commit is contained in:
153
src/main.rs
153
src/main.rs
@@ -2,9 +2,10 @@ use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
||||
use chrono::DateTime;
|
||||
use clap::Parser;
|
||||
use ebay_scraper_rust::db::{
|
||||
ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
|
||||
DBTable, ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
|
||||
};
|
||||
use ebay_scraper_rust::{parser_ebay, parser_storage};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
use std::time::Instant;
|
||||
@@ -122,10 +123,15 @@ async fn parse_post(
|
||||
let dir = &downloaddir.join(category.clone());
|
||||
|
||||
// Ensure the category is created.
|
||||
let url: serde_json::Value =
|
||||
serde_json::from_str(&std::fs::read_to_string(dir.join("url.json")).unwrap()).unwrap();
|
||||
let url_fpath = dir.join("url.json");
|
||||
let url_contents = std::fs::read_to_string(&url_fpath)
|
||||
.inspect_err(|e| error!("Failed reading {}: {e}", url_fpath.display()))?;
|
||||
#[derive(Deserialize)]
|
||||
struct URLJSON {
|
||||
url: String,
|
||||
}
|
||||
let su = SearchURL {
|
||||
full_url: url.to_string(),
|
||||
full_url: serde_json::from_str::<URLJSON>(&url_contents).unwrap().url,
|
||||
name: category.to_string(),
|
||||
};
|
||||
su.add_or_update(&db.lock().unwrap());
|
||||
@@ -136,7 +142,6 @@ async fn parse_post(
|
||||
// See what pages haven't been seen before.
|
||||
let to_parse = pages.iter().filter(|t| {
|
||||
let ts = chrono::DateTime::from_timestamp(**t, 0).unwrap();
|
||||
info!("Checking if page with a timestamp of {ts} and catagory of {category} exists");
|
||||
let p = ParsedPage::lookup(&db.lock().unwrap(), ts);
|
||||
|
||||
// Timestamp never seen before, lets pass it on.
|
||||
@@ -174,18 +179,26 @@ async fn parse_post(
|
||||
}
|
||||
.add_or_update(&db.lock().unwrap());
|
||||
|
||||
let elements = parser_ebay::extract_data_from_html(
|
||||
&std::fs::read_to_string(dir.join(format!("{ts}.html"))).unwrap(),
|
||||
&ts,
|
||||
&category,
|
||||
)
|
||||
.unwrap();
|
||||
let page_path = dir.join(format!("{}.html", ts.timestamp()));
|
||||
let page_contents = std::fs::read_to_string(&page_path)
|
||||
.inspect_err(|e| error!("Failed reading {}, error:{e}", page_path.display()))?;
|
||||
let elements = parser_ebay::extract_data_from_html(&page_contents, &ts, &category).unwrap();
|
||||
info!(
|
||||
"Page {} contains {} elements",
|
||||
ts.timestamp(),
|
||||
elements.len()
|
||||
);
|
||||
|
||||
added_count += elements.len();
|
||||
for e in elements {
|
||||
e.0.add_or_update(&db.lock().unwrap());
|
||||
e.1.add_or_update(&db.lock().unwrap());
|
||||
info!("Inserting id:{}, title:{}", e.0.item_id, e.0.title);
|
||||
info!(
|
||||
"From page {}, inserting id:{}, title:{}",
|
||||
ts.timestamp(),
|
||||
e.0.item_id,
|
||||
e.0.title
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,6 +206,117 @@ async fn parse_post(
|
||||
Ok(added_count.to_string())
|
||||
}
|
||||
|
||||
#[get("admin")]
|
||||
async fn admin_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
||||
let db = db.lock().unwrap();
|
||||
let query_start_time = Instant::now();
|
||||
let search_urls = SearchURL::get_all(&db).unwrap_or_default();
|
||||
let parsed_pages = ParsedPage::get_all(&db).unwrap_or_default();
|
||||
let parsed_storages = ParsedStorage::get_all(&db).unwrap_or_default();
|
||||
let item_appearances = ItemAppearances::get_all(&db).unwrap_or_default();
|
||||
let listings = Listing::get_all(&db).unwrap_or_default();
|
||||
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
|
||||
|
||||
let html_gen_start_time = Instant::now();
|
||||
let mut html = String::new();
|
||||
html.push_str(
|
||||
r#"<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Database Dump</title>
|
||||
<link rel="stylesheet" href="https://unpkg.com/purecss@2.0.6/build/pure-min.css" xintegrity="sha384-Uu6IeWbM+gzNVXJcM9XV3SohHtmWE+3VGi496jvgX1jyvDTWuaAUiIEoIeVVERG" crossorigin="anonymous">
|
||||
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.tablesorter/2.31.3/js/jquery.tablesorter.min.js"></script>
|
||||
<style>
|
||||
body { padding: 1em; }
|
||||
.pure-table { margin-bottom: 2em; }
|
||||
th { cursor: pointer; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Database Dump</h1>
|
||||
"#,
|
||||
);
|
||||
|
||||
// Performance Metrics
|
||||
let html_gen_time = html_gen_start_time.elapsed().as_micros() as f64 / 1000.0;
|
||||
html.push_str(&format!(
|
||||
r#"<p>
|
||||
Database query time: <strong>{}ms</strong><br>
|
||||
HTML generation time: <strong>{}ms</strong>
|
||||
</p>"#,
|
||||
total_query_time, html_gen_time
|
||||
));
|
||||
info!("DB Query ms: {total_query_time}, HTML Generation ms:{html_gen_time}");
|
||||
|
||||
// --- Tables ---
|
||||
|
||||
// SearchURLs
|
||||
html.push_str(&generate_table("SearchURLs", &search_urls));
|
||||
|
||||
// ParsedPages
|
||||
html.push_str(&generate_table("Pages_Parsed", &parsed_pages));
|
||||
|
||||
// ParsedStorage
|
||||
html.push_str(&generate_table("Storage_Parsed", &parsed_storages));
|
||||
|
||||
// ItemAppearances
|
||||
html.push_str(&generate_table("Item_Appearances", &item_appearances));
|
||||
|
||||
// Listings
|
||||
html.push_str(&generate_table("Listings", &listings));
|
||||
|
||||
// Footer and Scripts
|
||||
html.push_str(
|
||||
r#"
|
||||
<script>
|
||||
$(function() {
|
||||
$(".sortable-table").tablesorter();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>"#,
|
||||
);
|
||||
Ok(web::Html::new(&html))
|
||||
}
|
||||
|
||||
fn generate_table<T: Serialize>(title: &str, data: &[T]) -> String {
|
||||
let mut table_html = format!(
|
||||
"<h2>{} ({} rows)</h2><table class='pure-table pure-table-bordered sortable-table'><thead><tr>",
|
||||
title,
|
||||
data.len()
|
||||
);
|
||||
|
||||
if data.len() > 0 {
|
||||
for header in serde_json::to_value(&data[0])
|
||||
.unwrap()
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.keys()
|
||||
{
|
||||
table_html.push_str(&format!("<th>{}</th>", header));
|
||||
}
|
||||
table_html.push_str("</tr></thead><tbody>");
|
||||
|
||||
for item in data {
|
||||
table_html.push_str("<tr>");
|
||||
let item_json = serde_json::to_value(item).unwrap();
|
||||
if let Some(obj) = item_json.as_object() {
|
||||
for (_key, value) in obj.iter() {
|
||||
table_html
|
||||
.push_str(&format!("<td>{}</td>", value.to_string().replace("\"", "")));
|
||||
}
|
||||
}
|
||||
table_html.push_str("</tr>");
|
||||
}
|
||||
}
|
||||
|
||||
table_html.push_str("</tbody></table>");
|
||||
table_html
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> std::io::Result<()> {
|
||||
tracing_subscriber::registry()
|
||||
@@ -201,7 +325,7 @@ async fn main() -> std::io::Result<()> {
|
||||
.init();
|
||||
let _ = Args::parse();
|
||||
|
||||
let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("ebay_scraper", None);
|
||||
let scrapedatadir = xdg_dirs::ensure_scrapedata_dir_exists("scraper", None);
|
||||
info!(
|
||||
"Starting with scraped data dir of \"{}\".",
|
||||
scrapedatadir.to_str().unwrap()
|
||||
@@ -216,10 +340,11 @@ async fn main() -> std::io::Result<()> {
|
||||
.service(listing_since_get)
|
||||
.service(parse_post)
|
||||
.service(parse_listings)
|
||||
.service(admin_get)
|
||||
.app_data(db_mutex.clone())
|
||||
.app_data(Data::new(scrapedatadir.clone()))
|
||||
})
|
||||
.bind(("127.0.0.1", 8080))?
|
||||
.bind(("0.0.0.0", 9876))?
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
Reference in New Issue
Block a user