Rust cleanup (separate files, idiomatic rust, etc)

2025-05-29 01:04:37 -04:00
parent a3ca94e200
commit c7413aeda3
7 changed files with 980 additions and 708 deletions
--- a/ebay_storage/rust/Cargo.lock
+++ b/ebay_storage/rust/Cargo.lock
--- a/ebay_storage/rust/Cargo.toml
+++ b/ebay_storage/rust/Cargo.toml
@@ -1,19 +1,21 @@
 [package]
 name = "ebay_scraper_rust"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 [dependencies]
-clap = { version = "4.4", features = ["derive"] }
+clap = { version = "4.5.39", features = ["derive"] }
-reqwest = { version = "0.11", features = ["json", "stream"] } # Removed "blocking" as we use tokio
+reqwest = { version = "0.12.18", features = ["json", "stream"] } # Removed "blocking" as we use tokio
-scraper = "0.18"
+scraper = "0.23.1"
-serde = { version = "1.0", features = ["derive"] }
+serde = { version = "1.0.219", features = ["derive"] }
-serde_json = "1.0"
+serde_json = "1.0.140"
-regex = "1.10"
+regex = "1.11.1"
-tokio = { version = "1", features = ["full"] }
+tokio = { version = "1.45.1", features = ["full"] }
-url = "2.5"
+url = "2.5.4"
 # path-slash is not strictly needed if using std::path::PathBuf correctly
-bytes = "1.5"
+bytes = "1.10.1"
-chrono = { version = "0.4", features = ["serde"] }
+chrono = { version = "0.4.41", features = ["serde"] }
-lazy_static = "1.4.0"
+lazy_static = "1.5.0"
-futures = "0.3" # For join_all on async tasks
+futures = "0.3.31" # For join_all on async tasks
 tracing = "0.1.41"
 tracing-subscriber = { version = "0.3.19", features = ["fmt"] }
--- a/ebay_storage/rust/src/cli.rs
+++ b/ebay_storage/rust/src/cli.rs
@@ -0,0 +1,45 @@
 // src/cli.rs
 use clap::Parser;
 #[derive(Parser, Debug)]
 #[clap(
    name = "ebay-scraper-rust",
    version = "0.1.0",
    about = "Scrapes eBay search results for SSD/HDD cost per TB."
 )]
 pub struct Cli {
    #[clap(subcommand)]
    pub command: Option<Commands>,
    /// The full eBay search URL to scrape.
    pub url: Option<String>,
    /// Save scraped HTML to a file (and download images if fetching from URL).
    #[clap(long)]
    pub save: Option<String>,
    /// Load HTML from a file (disables network). Image download will not occur with --load.
    #[clap(long)]
    pub load: Option<String>,
    /// Suppress informational logs, output only final JSON.
    #[clap(long)]
    pub only_json: bool,
 }
 #[derive(Parser, Debug)]
 pub enum Commands {
    /// Scrapes latest listings.
    Latest(LatestArgs),
 }
 #[derive(Parser, Debug)]
 pub struct LatestArgs {
    /// Items per page (60, 120, or 240)
    #[clap(long, default_value = "60")]
    pub per_page: String,
    /// Minimum cost (e.g., 50.00)
    #[clap(long, default_value = "0.00")]
    pub minimum_cost: f64,
 }
--- a/ebay_storage/rust/src/html_utils.rs
+++ b/ebay_storage/rust/src/html_utils.rs
@@ -0,0 +1,250 @@
 // src/html_utils.rs
 use chrono::Utc;
 use lazy_static::lazy_static;
 use regex::Regex;
 use scraper::{Html, Selector};
 use std::error::Error as StdError; // Use the same alias as main
 use std::fs::{self, File};
 use std::io::Write;
 use std::path::Path;
 use tracing::{error, info, warn};
 use url::Url;
 use super::item::{EbayItem, ParsedItemData};
 use super::parser_utils;
 // Define or import AppError to match main.rs
 type AppError = Box<dyn StdError + Send + Sync + 'static>;
 const PARSER_ENGINE_VERSION: i32 = 1;
 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36";
 lazy_static! {
    static ref ITEM_ID_REGEX: Regex = Regex::new(r"/itm/(\d+)").unwrap();
    static ref NEW_LISTING_REGEX: Regex = Regex::new(r"(?i)^\s*NEW LISTING\s*[:\-\s]*").unwrap();
 }
 /// Fetches HTML content from a URL.
 pub async fn fetch_html(url: &str) -> Result<String, AppError> {
    info!(target_url = url, "Navigating to URL");
    let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?;
    let response = client.get(url).send().await?;
    if !response.status().is_success() {
        let err_msg = format!(
            "Failed to fetch URL: {} - Status: {}",
            url,
            response.status()
        );
        error!(error_message = %err_msg, "URL fetch failed");
        return Err(err_msg.into());
    }
    let html_content = response.text().await?;
    info!(
        target_url = url,
        "Navigation successful. Page content retrieved."
    );
    Ok(html_content)
 }
 /// Extracts item data from HTML content.
 pub fn extract_data_from_html(html_content: &str) -> Result<Vec<EbayItem>, AppError> {
    let document = Html::parse_document(html_content);
    let mut items = Vec::new();
    let today = Utc::now();
    // MODIFIED: Using .unwrap() for Selector::parse() calls as requested
    let item_selector =
        Selector::parse("li.s-item, li.srp-results__item, div.s-item[role='listitem']").unwrap();
    let title_selector = Selector::parse(".s-item__title, .srp-results__title").unwrap();
    let price_selector = Selector::parse(".s-item__price").unwrap();
    let image_selector =
        Selector::parse(".s-item__image-wrapper img.s-item__image-img, .s-item__image img")
            .unwrap();
    let link_selector =
        Selector::parse("a.s-item__link[href*='/itm/'], .s-item__info > a[href*='/itm/']").unwrap();
    let bid_count_selector = Selector::parse(".s-item__bid-count").unwrap();
    let best_offer_selector =
        Selector::parse(".s-item__purchase-options--bo, .s-item__best-offer").unwrap();
    let secondary_info_selector =
        Selector::parse(".s-item__subtitle, .s-item__secondary-text, .s-item__detail--secondary")
            .unwrap();
    let auction_bin_price_selector = Selector::parse(".s-item__buy-it-now-price").unwrap();
    for element in document.select(&item_selector) {
        let raw_title_text = element
            .select(&title_selector)
            .next()
            .map(|el| el.text().collect::<String>().trim().to_string());
        let price_text = element
            .select(&price_selector)
            .next()
            .map(|el| el.text().collect::<String>().trim().to_string());
        let item_id = element
            .select(&link_selector)
            .next()
            .and_then(|link_el| link_el.value().attr("href"))
            .and_then(|href| ITEM_ID_REGEX.captures(href))
            .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()));
        if raw_title_text.is_none() || price_text.is_none() || item_id.is_none() {
            warn!("Skipping item due to missing title, price, or item ID.");
            continue;
        }
        let raw_title = raw_title_text.unwrap();
        let price_text = price_text.unwrap();
        let item_id = item_id.unwrap();
        let cleaned_title = NEW_LISTING_REGEX.replace(&raw_title, "").trim().to_string();
        let primary_display_price = parser_utils::parse_price(&price_text);
        let mut current_bid_price: Option<f64> = None;
        let mut final_buy_it_now_price: Option<f64> = None;
        let mut has_best_offer = false;
        let mut item_is_auction = false;
        if let Some(bid_el) = element.select(&bid_count_selector).next() {
            if bid_el
                .text()
                .collect::<String>()
                .to_lowercase()
                .contains("bid")
            {
                item_is_auction = true;
            }
        }
        if element.select(&best_offer_selector).next().is_some() {
            has_best_offer = true;
        } else {
            for el in element.select(&secondary_info_selector) {
                if el
                    .text()
                    .collect::<String>()
                    .to_lowercase()
                    .contains("or best offer")
                {
                    has_best_offer = true;
                    break;
                }
            }
        }
        if item_is_auction {
            current_bid_price = primary_display_price;
            if let Some(bin_el) = element.select(&auction_bin_price_selector).next() {
                final_buy_it_now_price =
                    parser_utils::parse_price(&bin_el.text().collect::<String>());
            }
        } else {
            final_buy_it_now_price = primary_display_price;
        }
        let image_url_val = element
            .select(&image_selector)
            .next()
            .and_then(|img_el| {
                img_el
                    .value()
                    .attr("data-src")
                    .or(img_el.value().attr("src"))
            })
            .map(|s| s.to_string());
        let parsed_size_info = parser_utils::parse_size_and_quantity(&cleaned_title);
        let cost_per_tb = if let Some(price) = primary_display_price {
            if parsed_size_info.total_tb > 0.0 {
                Some(((price / parsed_size_info.total_tb) * 100.0).round() / 100.0)
            } else {
                None
            }
        } else {
            None
        };
        let parsed_data = ParsedItemData {
            item_count: parsed_size_info.quantity,
            size_per_item_tb: if parsed_size_info.individual_size_tb > 0.0 {
                Some(parsed_size_info.individual_size_tb)
            } else {
                None
            },
            total_tb: if parsed_size_info.total_tb > 0.0 {
                Some(parsed_size_info.total_tb)
            } else {
                None
            },
            cost_per_tb,
            needed_description_check: parsed_size_info.needed_description_check,
            parser_engine: PARSER_ENGINE_VERSION,
        };
        items.push(EbayItem {
            title: cleaned_title,
            item_id,
            date_found: today,
            current_bid_price,
            buy_it_now_price: final_buy_it_now_price,
            has_best_offer,
            image_url: image_url_val,
            parsed: parsed_data,
        });
    }
    Ok(items)
 }
 /// Downloads an image from a URL and saves it, preserving path structure.
 pub async fn download_image(
    image_url_str: &str,
    base_save_directory: &Path,
 ) -> Result<(), AppError> {
    if image_url_str.is_empty() {
        return Ok(());
    }
    let parsed_url = Url::parse(image_url_str).map_err(|e| Box::new(e) as AppError)?;
    let image_path_from_url = parsed_url.path().trim_start_matches('/');
    if image_path_from_url.is_empty() {
        return Err(Box::from("Image URL has no path component") as AppError);
    }
    let full_local_image_path = base_save_directory.join(image_path_from_url);
    if let Some(parent_dir) = full_local_image_path.parent() {
        fs::create_dir_all(parent_dir).map_err(|e| Box::new(e) as AppError)?;
        info!(path = %parent_dir.display(), "Ensured image directory exists");
    }
    let client = reqwest::Client::builder()
        .user_agent(USER_AGENT)
        .build()
        .map_err(|e| Box::new(e) as AppError)?;
    let response = client
        .get(image_url_str)
        .send()
        .await
        .map_err(|e| Box::new(e) as AppError)?;
    if !response.status().is_success() {
        let err_msg = format!(
            "Failed to download image {}. Status: {}",
            image_url_str,
            response.status()
        );
        return Err(Box::from(err_msg) as AppError);
    }
    let mut file = File::create(&full_local_image_path).map_err(|e| Box::new(e) as AppError)?;
    let content = response
        .bytes()
        .await
        .map_err(|e| Box::new(e) as AppError)?;
    file.write_all(&content)
        .map_err(|e| Box::new(e) as AppError)?;
    info!(path = %full_local_image_path.display(), "Downloaded image");
    Ok(())
 }
--- a/ebay_storage/rust/src/item.rs
+++ b/ebay_storage/rust/src/item.rs
@@ -0,0 +1,45 @@
 // src/item.rs
 use chrono::{DateTime, Utc};
 use serde::Serialize;
 #[derive(Serialize, Debug)]
 pub struct EbayItem {
    pub title: String,
    #[serde(rename = "itemId")]
    pub item_id: String,
    #[serde(rename = "dateFound")]
    pub date_found: DateTime<Utc>,
    #[serde(rename = "currentBidPrice")]
    pub current_bid_price: Option<f64>,
    #[serde(rename = "buyItNowPrice", skip_serializing_if = "Option::is_none")]
    pub buy_it_now_price: Option<f64>,
    #[serde(rename = "hasBestOffer")]
    pub has_best_offer: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub image_url: Option<String>,
    pub parsed: ParsedItemData,
 }
 #[derive(Serialize, Debug)]
 pub struct ParsedItemData {
    #[serde(rename = "itemCount")]
    pub item_count: i32,
    #[serde(rename = "sizePerItemTB")]
    pub size_per_item_tb: Option<f64>,
    #[serde(rename = "totalTB")]
    pub total_tb: Option<f64>,
    #[serde(rename = "costPerTB")]
    pub cost_per_tb: Option<f64>,
    #[serde(rename = "needed_description_check")]
    pub needed_description_check: bool,
    #[serde(rename = "parser_engine")]
    pub parser_engine: i32,
 }
 #[derive(Debug)]
 pub struct SizeQuantityInfo {
    pub total_tb: f64,
    pub quantity: i32,
    pub individual_size_tb: f64,
    pub needed_description_check: bool,
 }
--- a/ebay_storage/rust/src/main.rs
+++ b/ebay_storage/rust/src/main.rs
@@ -1,426 +1,66 @@
-// main.rs
+// src/main.rs
 mod cli;
 mod item;
 mod parser_utils;
 mod html_utils;
 // Import necessary crates
 use clap::Parser;
-use regex::Regex;
+use std::fs;
 use scraper::{Html, Selector};
 use serde::Serialize; 
 use std::fs::{self, File};
 use std::io::Write; 
-use std::path::{Path, PathBuf};
+use std::path::PathBuf; 
-use std::error::Error;
+use std::error::Error as StdError; 
-use chrono::{DateTime, Utc};
+use tracing::{info, error, warn, Level}; 
-use lazy_static::lazy_static;
+use tracing_subscriber; 
 use url::Url;
-// Define constants
+use cli::{Cli, Commands}; 
-const PARSER_ENGINE_VERSION: i32 = 1;
+use item::EbayItem; 
 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36";
-// --- Lazy static Regex definitions ---
+// Define a more specific error type for the application
-lazy_static! {
+// This type is now implicitly used by html_utils.rs as well due to function signatures.
-    // Regex for parsing quantity from title (e.g., "LOT OF 10", "5-PACK")
+type AppError = Box<dyn StdError + Send + Sync + 'static>;
    static ref EXPLICIT_QTY_PATTERNS: Vec<Regex> = vec![
        Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?").unwrap(),
        Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\*\s*(\d+)").unwrap(),
        Regex::new(r"\b(?:PACK\s+OF|PACK|BULK)\s*\(?\s*(\d+)\s*\)?").unwrap(),
        Regex::new(r"\b(\d+)\s*-\s*PACK\b").unwrap(),
        Regex::new(r"\b(\d+)\s*COUNT\b").unwrap(),
    ];
    // Regex for parsing size from title (e.g., "500GB", "2TB")
    static ref SIZE_REGEX: Regex = Regex::new(r"(\d+(?:\.\d+)?)\s*(TB|GB)\b").unwrap();
    // Regex for titles indicating a range of sizes or mixed items
    static ref SIZE_RANGE_REGEX: Regex = Regex::new(r"\d+(?:\.\d+)?\s*(?:GB|TB)\s*(?:-|&|OR|TO)\s*\d+(?:\.\d+)?\s*(?:GB|TB)").unwrap();
    // Regex for extracting item ID from URL
    static ref ITEM_ID_REGEX: Regex = Regex::new(r"/itm/(\d+)").unwrap();
    // Regex for parsing price, potentially a range
    static ref PRICE_REGEX: Regex = Regex::new(r"\$?([\d,]+\.?\d*)").unwrap();
    // Regex for "NEW LISTING" prefix - case-insensitive to better match JS /i flag
    static ref NEW_LISTING_REGEX: Regex = Regex::new(r"(?i)^\s*NEW LISTING\s*[:\-\s]*").unwrap();
 }
 // --- Command Line Argument Parsing (using clap) ---
 #[derive(Parser, Debug)]
 #[clap(name = "ebay-scraper-rust", version = "0.1.0", about = "Scrapes eBay search results for SSD/HDD cost per TB.")]
 struct Cli {
    #[clap(subcommand)]
    command: Option<Commands>,
    /// The full eBay search URL to scrape.
    url: Option<String>,
    /// Save scraped HTML to a file (and download images if fetching from URL).
    #[clap(long)]
    save: Option<String>,
    /// Load HTML from a file (disables network). Image download will not occur with --load.
    #[clap(long)]
    load: Option<String>,
    /// Suppress informational logs, output only final JSON.
    #[clap(long)]
    only_json: bool,
 }
 #[derive(Parser, Debug)]
 enum Commands {
    /// Scrapes latest listings.
    Latest(LatestArgs),
 }
 #[derive(Parser, Debug)]
 struct LatestArgs {
    /// Items per page (60, 120, or 240)
    #[clap(long, default_value = "60")]
    per_page: String, // Keep as string for validation
    /// Minimum cost (e.g., 50.00)
    #[clap(long, default_value = "0.00")]
    minimum_cost: f64,
 }
 // --- Data Structures for Scraped Items (using serde) ---
 #[derive(Serialize, Debug)]
 struct EbayItem {
    title: String,
    #[serde(rename = "itemId")]
    item_id: String,
    #[serde(rename = "dateFound")]
    date_found: DateTime<Utc>,
    #[serde(rename = "currentBidPrice")] 
    current_bid_price: Option<f64>,
    #[serde(rename = "buyItNowPrice", skip_serializing_if = "Option::is_none")] // Keep skip for this one if JS does it
    buy_it_now_price: Option<f64>, 
    #[serde(rename = "hasBestOffer")]
    has_best_offer: bool,
    #[serde(skip_serializing_if = "Option::is_none")] // Keep skip for this one if JS does it
    image_url: Option<String>, 
    parsed: ParsedItemData,
 }
 #[derive(Serialize, Debug)]
 struct ParsedItemData {
    #[serde(rename = "itemCount")]
    item_count: i32,
    // MODIFIED: Removed skip_serializing_if to always include the field, even if null
    #[serde(rename = "sizePerItemTB")]
    size_per_item_tb: Option<f64>,
    #[serde(rename = "totalTB")]
    total_tb: Option<f64>,
    #[serde(rename = "costPerTB")]
    cost_per_tb: Option<f64>,
    #[serde(rename = "needed_description_check")]
    needed_description_check: bool,
    #[serde(rename = "parser_engine")]
    parser_engine: i32,
 }
 #[derive(Debug)]
 struct SizeQuantityInfo {
    total_tb: f64,
    quantity: i32,
    individual_size_tb: f64,
    needed_description_check: bool,
 }
 // --- Logging ---
 fn log_message(message: &str, quiet_mode: bool) {
    if !quiet_mode {
        eprintln!("{}", message);
    }
 }
 fn log_error(message: &str, quiet_mode: bool) {
    if !quiet_mode {
        eprintln!("ERROR: {}", message);
    }
 }
 // --- Parsing Logic ---
 mod parser {
    use super::*; 
    /// Parses size and quantity information from an item title.
    pub fn parse_size_and_quantity(title: &str) -> SizeQuantityInfo {
        let upper_title = title.to_uppercase();
        let mut total_tb = 0.0;
        let mut quantity = 1;
        let mut needed_description_check = false;
        let mut individual_size_tb = 0.0;
        for pattern in EXPLICIT_QTY_PATTERNS.iter() {
            if let Some(caps) = pattern.captures(&upper_title) {
                if let Some(qty_match) = caps.get(1) {
                    if let Ok(parsed_qty) = qty_match.as_str().parse::<i32>() {
                        if parsed_qty > 0 && parsed_qty < 500 { 
                            quantity = parsed_qty;
                            break;
                        }
                    }
                }
            }
        }
        let mut size_matches: Vec<(f64, String)> = Vec::new();
        for caps in SIZE_REGEX.captures_iter(&upper_title) {
            if let (Some(val_str), Some(unit_str)) = (caps.get(1), caps.get(2)) {
                if let Ok(val) = val_str.as_str().parse::<f64>() {
                    size_matches.push((val, unit_str.as_str().to_string()));
                }
            }
        }
        if !size_matches.is_empty() {
            let mut unique_sizes_tb: Vec<f64> = size_matches.iter()
                .map(|(val, unit)| if unit == "GB" { *val / 1000.0 } else { *val })
                .collect();
            unique_sizes_tb.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
            unique_sizes_tb.dedup();
            if !unique_sizes_tb.is_empty() {
                individual_size_tb = unique_sizes_tb[0]; 
                if unique_sizes_tb.len() > 1 {
                    needed_description_check = true;
                }
            }
        }
        if SIZE_RANGE_REGEX.is_match(&upper_title) {
            needed_description_check = true;
        }
        if quantity > 1 && upper_title.contains("MIXED") {
            needed_description_check = true;
        }
        if upper_title.contains("CHECK THE DESCRIPTION") || upper_title.contains("CHECK DESCRIPTION") || upper_title.contains("SEE DESCRIPTION") {
            if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 {
                needed_description_check = true;
            }
        }
        if individual_size_tb > 0.0 {
            total_tb = individual_size_tb * quantity as f64;
        }
        if quantity > 1 && total_tb == 0.0 && !size_matches.is_empty() { 
            needed_description_check = true;
        }
        if quantity == 1 && size_matches.len() == 1 && !needed_description_check {
            // This condition is implicitly handled
        }
        SizeQuantityInfo {
            total_tb: (total_tb * 10000.0).round() / 10000.0, 
            quantity,
            individual_size_tb: (individual_size_tb * 10000.0).round() / 10000.0,
            needed_description_check,
        }
    }
    /// Parses price from a string, taking the first price if it's a range.
    pub fn parse_price(price_text: &str) -> Option<f64> {
        let lower_price_text = price_text.to_lowercase();
        if lower_price_text.contains(" to ") {
            if let Some(first_part) = lower_price_text.split(" to ").next() {
                if let Some(caps) = PRICE_REGEX.captures(first_part) {
                    if let Some(price_match) = caps.get(1) {
                        return price_match.as_str().replace(',', "").parse().ok();
                    }
                }
            }
            return None;
        }
        if let Some(caps) = PRICE_REGEX.captures(price_text) {
            if let Some(price_match) = caps.get(1) {
                return price_match.as_str().replace(',', "").parse().ok();
            }
        }
        None
    }
 }
 // --- HTML Scraping Logic ---
 mod html_scraper {
    use super::*;
    /// Extracts item data from HTML content.
    pub fn extract_data_from_html(html_content: &str, quiet_mode: bool) -> Result<Vec<EbayItem>, Box<dyn Error>> {
        let document = Html::parse_document(html_content);
        let mut items = Vec::new();
        let today = Utc::now();
        let item_selector = Selector::parse("li.s-item, li.srp-results__item, div.s-item[role='listitem']").unwrap();
        let title_selector = Selector::parse(".s-item__title, .srp-results__title").unwrap();
        let price_selector = Selector::parse(".s-item__price").unwrap();
        let image_selector = Selector::parse(".s-item__image-wrapper img.s-item__image-img, .s-item__image img").unwrap();
        let link_selector = Selector::parse("a.s-item__link[href*='/itm/'], .s-item__info > a[href*='/itm/']").unwrap();
        let bid_count_selector = Selector::parse(".s-item__bid-count").unwrap();
        let best_offer_selector = Selector::parse(".s-item__purchase-options--bo, .s-item__best-offer").unwrap();
        let secondary_info_selector = Selector::parse(".s-item__subtitle, .s-item__secondary-text, .s-item__detail--secondary").unwrap();
        let auction_bin_price_selector = Selector::parse(".s-item__buy-it-now-price").unwrap();
        for element in document.select(&item_selector) {
            let raw_title_text = element.select(&title_selector).next().map(|el| el.text().collect::<String>().trim().to_string());
            let price_text = element.select(&price_selector).next().map(|el| el.text().collect::<String>().trim().to_string());
            let item_id = element.select(&link_selector).next()
                .and_then(|link_el| link_el.value().attr("href"))
                .and_then(|href| ITEM_ID_REGEX.captures(href))
                .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string()));
            if raw_title_text.is_none() || price_text.is_none() || item_id.is_none() {
                log_message("Skipping item due to missing title, price, or item ID.", quiet_mode);
                continue;
            }
            let raw_title = raw_title_text.unwrap();
            let price_text = price_text.unwrap();
            let item_id = item_id.unwrap();
            let cleaned_title = NEW_LISTING_REGEX.replace(&raw_title, "").trim().to_string();
            let primary_display_price = parser::parse_price(&price_text);
            let mut current_bid_price: Option<f64> = None;
            let mut final_buy_it_now_price: Option<f64> = None; 
            let mut has_best_offer = false;
            let mut item_is_auction = false;
            if let Some(bid_el) = element.select(&bid_count_selector).next() {
                if bid_el.text().collect::<String>().to_lowercase().contains("bid") {
                    item_is_auction = true;
                }
            }
            if element.select(&best_offer_selector).next().is_some() {
                has_best_offer = true;
            } else {
                for el in element.select(&secondary_info_selector) {
                    if el.text().collect::<String>().to_lowercase().contains("or best offer") {
                        has_best_offer = true;
                        break;
                    }
                }
            }
            if item_is_auction {
                current_bid_price = primary_display_price;
                if let Some(bin_el) = element.select(&auction_bin_price_selector).next() {
                    final_buy_it_now_price = parser::parse_price(&bin_el.text().collect::<String>());
                }
            } else {
                final_buy_it_now_price = primary_display_price;
            }
            let image_url_val = element.select(&image_selector).next()
                .and_then(|img_el| {
                    img_el.value().attr("data-src").or(img_el.value().attr("src"))
                })
                .map(|s| s.to_string());
            let parsed_size_info = parser::parse_size_and_quantity(&cleaned_title);
            let cost_per_tb = if let Some(price) = primary_display_price { 
                if parsed_size_info.total_tb > 0.0 {
                    Some(((price / parsed_size_info.total_tb) * 100.0).round() / 100.0)
                } else { None }
            } else { None };
            let parsed_data = ParsedItemData {
                item_count: parsed_size_info.quantity,
                size_per_item_tb: if parsed_size_info.individual_size_tb > 0.0 { Some(parsed_size_info.individual_size_tb) } else { None },
                total_tb: if parsed_size_info.total_tb > 0.0 { Some(parsed_size_info.total_tb) } else { None },
                cost_per_tb, // This will be None if conditions aren't met, and serialized as null
                needed_description_check: parsed_size_info.needed_description_check,
                parser_engine: PARSER_ENGINE_VERSION,
            };
            items.push(EbayItem {
                title: cleaned_title,
                item_id,
                date_found: today,
                current_bid_price, 
                buy_it_now_price: final_buy_it_now_price, 
                has_best_offer,
                image_url: image_url_val, 
                parsed: parsed_data,
            });
        }
        Ok(items)
    }
    /// Downloads an image from a URL and saves it, preserving path structure.
    pub async fn download_image(image_url_str: &str, base_save_directory: &Path, quiet_mode: bool) -> Result<(), Box<dyn Error>> {
        if image_url_str.is_empty() {
            return Ok(()); 
        }
        let parsed_url = Url::parse(image_url_str)?;
        let image_path_from_url = parsed_url.path().trim_start_matches('/');
        if image_path_from_url.is_empty() {
            return Err("Image URL has no path component".into());
        }
        let full_local_image_path = base_save_directory.join(image_path_from_url);
        if let Some(parent_dir) = full_local_image_path.parent() {
            fs::create_dir_all(parent_dir)?;
            log_message(&format!("Ensured image directory exists: {}", parent_dir.display()), quiet_mode);
        }
        let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?;
        let response = client.get(image_url_str).send().await?;
        if !response.status().is_success() {
            return Err(format!("Failed to download image {}. Status: {}", image_url_str, response.status()).into());
        }
        let mut file = File::create(&full_local_image_path)?;
        let content = response.bytes().await?;
        file.write_all(&content)?;
        log_message(&format!("Downloaded image: {}", full_local_image_path.display()), quiet_mode);
        Ok(())
    }
 }
 // --- Main Application Logic ---
 #[tokio::main]
-async fn main() -> Result<(), Box<dyn Error>> {
+async fn main() -> Result<(), AppError> { 
-    let cli = Cli::parse();
+    let cli_args = Cli::parse();
    let quiet_mode = cli.only_json;
-    log_message("Starting scraping process...", quiet_mode);
+    let subscriber_builder = tracing_subscriber::fmt().with_writer(std::io::stderr);
    if cli_args.only_json {
        subscriber_builder
            .with_max_level(Level::ERROR) 
            .try_init()?; 
    } else {
        subscriber_builder
            .with_max_level(Level::INFO) 
            .try_init()?; 
    }
    info!("Starting scraping process...");
    let html_content_to_parse: String;
    let mut should_download_images = false;
    let mut image_base_save_dir: Option<PathBuf> = None;
-
+    if let Some(html_file) = &cli_args.load {
-    if let Some(html_file) = &cli.load {
+        info!(file_path = %html_file, "Loading HTML from file");
-        log_message(&format!("Loading HTML from {}...", html_file), quiet_mode);
+        html_content_to_parse = fs::read_to_string(html_file)?; // std::io::Error converts to AppError via ?
-        html_content_to_parse = fs::read_to_string(html_file)?;
+        info!("HTML loaded. Network requests for page content disabled.");
        log_message("HTML loaded. Network requests for page content disabled.", quiet_mode);
    } else {
-        let url_to_fetch = match (&cli.command, &cli.url) {
+        let url_to_fetch = match (&cli_args.command, &cli_args.url) {
            (Some(Commands::Latest(latest_args)), _) => {
                let valid_per_page = ["60", "120", "240"];
                if !valid_per_page.contains(&latest_args.per_page.as_str()) {
                    let err_msg = format!("--per_page must be one of {}, got {}", valid_per_page.join(", "), latest_args.per_page);
-                    log_error(&err_msg, quiet_mode); 
+                    error!(error_message = %err_msg, "Invalid per_page argument");
-                    return Err(err_msg.into());
+                    return Err(err_msg.into()); // String converts to AppError
                }
                if latest_args.minimum_cost < 0.0 {
-                     let err_msg = "--minimum_cost must be a non-negative number.";
+                    let err_msg = "--minimum_cost must be a non-negative number.";
-                     log_error(err_msg, quiet_mode);
+                    error!(error_message = %err_msg, "Invalid minimum_cost argument");
-                     return Err(err_msg.into());
+                    return Err(err_msg.into()); // String converts to AppError
                }
                let base_url = "https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10";
                let url = format!("{}&_ipg={}&_udlo={:.2}", base_url, latest_args.per_page, latest_args.minimum_cost);
-                log_message(&format!("Constructed URL for 'latest': {}", url), quiet_mode);
+                info!(constructed_url = %url, "Constructed URL for 'latest' command");
                url
            }
            (None, Some(url_arg)) => {
@@ -428,27 +68,18 @@ async fn main() -> Result<(), Box<dyn Error>> {
            }
            (None, None) => {
                let err_msg = "No URL provided and no command specified. Use --help for usage.";
-                log_error(err_msg, true); 
+                eprintln!("ERROR: {}", err_msg); 
-                return Err(err_msg.into());
+                return Err(err_msg.into()); // String converts to AppError
            }
        };
-        log_message(&format!("Navigating to {}...", url_to_fetch), quiet_mode);
+        html_content_to_parse = html_utils::fetch_html(&url_to_fetch).await?; // This now returns Result<String, AppError>
        let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?;
        let response = client.get(&url_to_fetch).send().await?;
        if !response.status().is_success() {
            let err_msg = format!("Failed to fetch URL: {} - Status: {}", url_to_fetch, response.status());
            log_error(&err_msg, quiet_mode);
            return Err(err_msg.into());
        }
        html_content_to_parse = response.text().await?;
        log_message("Navigation successful. Page content retrieved.", quiet_mode);
-        if let Some(save_path_str) = &cli.save {
+        if let Some(save_path_str) = &cli_args.save {
-            log_message(&format!("Saving HTML to {}...", save_path_str), quiet_mode);
+            info!(file_path = %save_path_str, "Saving HTML to file");
-            let mut file = File::create(save_path_str)?;
+            let mut file = fs::File::create(save_path_str)?; // std::io::Error converts
-            file.write_all(html_content_to_parse.as_bytes())?;
+            file.write_all(html_content_to_parse.as_bytes())?; // std::io::Error converts
-            log_message("HTML saved.", quiet_mode);
+            info!("HTML saved.");
            should_download_images = true;
            let save_file_path = PathBuf::from(save_path_str);
@@ -461,13 +92,13 @@ async fn main() -> Result<(), Box<dyn Error>> {
        }
    }
-    log_message("Extracting data...", quiet_mode);
+    info!("Extracting data...");
-    let extracted_results = html_scraper::extract_data_from_html(&html_content_to_parse, quiet_mode)?;
+    let extracted_results: Vec<EbayItem> = html_utils::extract_data_from_html(&html_content_to_parse)?; // This now returns Result<Vec<EbayItem>, AppError>
-    log_message(&format!("Data extraction complete. Found {} items.", extracted_results.len()), quiet_mode);
+    info!(item_count = extracted_results.len(), "Data extraction complete.");
    if should_download_images && !extracted_results.is_empty() {
        if let Some(img_base_dir) = image_base_save_dir {
-            log_message(&format!("Downloading images into subdirectories of {}...", img_base_dir.display()), quiet_mode);
+            info!(directory = %img_base_dir.display(), "Downloading images");
            let mut download_futures = Vec::new();
            for item in &extracted_results {
@@ -477,31 +108,30 @@ async fn main() -> Result<(), Box<dyn Error>> {
                    let item_id_clone = item.item_id.clone(); 
                    download_futures.push(async move {
-                        if let Err(e) = html_scraper::download_image(&img_url_clone, &img_base_dir_clone, quiet_mode).await {
+                        if let Err(e) = html_utils::download_image(&img_url_clone, &img_base_dir_clone).await { // This now returns Result<(), AppError>
-                            log_error(&format!("Skipping image download for item ID {} (URL: {}) due to error: {}", item_id_clone, img_url_clone, e), quiet_mode);
+                            warn!(item_id = %item_id_clone, image_url = %img_url_clone, error = %e, "Skipping image download due to error");
                        }
                    });
                }
            }
            futures::future::join_all(download_futures).await;
-            log_message("Image download process finished.", quiet_mode);
+            info!("Image download process finished.");
        }
    }
-    if quiet_mode {
+    if cli_args.only_json {
-        println!("{}", serde_json::to_string(&extracted_results)?);
+        println!("{}", serde_json::to_string(&extracted_results)?); // serde_json::Error converts
    } else {
-        println!("{}", serde_json::to_string_pretty(&extracted_results)?);
+        println!("{}", serde_json::to_string_pretty(&extracted_results)?); // serde_json::Error converts
    }
    Ok(())
 }
 // --- Unit tests for parser functions (optional, but good practice) ---
 #[cfg(test)]
 mod tests {
-    use super::parser::*; 
+    use super::parser_utils::*; 
-    use super::SizeQuantityInfo; 
+    use super::item::SizeQuantityInfo; 
    fn assert_sq_info_eq(actual: SizeQuantityInfo, expected_total_tb: f64, expected_quantity: i32, expected_ind_size_tb: f64, expected_check: bool) {
        assert!((actual.total_tb - expected_total_tb).abs() < 0.0001, "TotalTB mismatch. Expected: {}, Got: {}", expected_total_tb, actual.total_tb);
@@ -527,7 +157,7 @@ mod tests {
        ];
        for (title, total_tb, quantity, ind_size_tb, check) in test_cases {
-            println!("Testing title: {}", title);
+            tracing::debug!(testing_title = %title, "Running test_parse_size_and_quantity"); 
            let result = parse_size_and_quantity(title);
            assert_sq_info_eq(result, total_tb, quantity, ind_size_tb, check);
        }
@@ -544,4 +174,3 @@ mod tests {
        assert_eq!(parse_price("25.50"), Some(25.50));
    }
 }
--- a/ebay_storage/rust/src/parser_utils.rs
+++ b/ebay_storage/rust/src/parser_utils.rs
@@ -0,0 +1,122 @@
 // src/parser_utils.rs
 use super::item::SizeQuantityInfo;
 use lazy_static::lazy_static;
 use regex::Regex; // Assuming item.rs is in the same directory (src)
 lazy_static! {
    static ref EXPLICIT_QTY_PATTERNS: Vec<Regex> = vec![
        Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?").unwrap(),
        Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\*\s*(\d+)").unwrap(),
        Regex::new(r"\b(?:PACK\s+OF|PACK|BULK)\s*\(?\s*(\d+)\s*\)?").unwrap(),
        Regex::new(r"\b(\d+)\s*-\s*PACK\b").unwrap(),
        Regex::new(r"\b(\d+)\s*COUNT\b").unwrap(),
    ];
    static ref SIZE_REGEX: Regex = Regex::new(r"(\d+(?:\.\d+)?)\s*(TB|GB)\b").unwrap();
    static ref SIZE_RANGE_REGEX: Regex =
        Regex::new(r"\d+(?:\.\d+)?\s*(?:GB|TB)\s*(?:-|&|OR|TO)\s*\d+(?:\.\d+)?\s*(?:GB|TB)")
            .unwrap();
    static ref PRICE_REGEX: Regex = Regex::new(r"\$?([\d,]+\.?\d*)").unwrap();
 }
 /// Parses size and quantity information from an item title.
 pub fn parse_size_and_quantity(title: &str) -> SizeQuantityInfo {
    let upper_title = title.to_uppercase();
    let mut total_tb = 0.0;
    let mut quantity = 1;
    let mut needed_description_check = false;
    let mut individual_size_tb = 0.0;
    for pattern in EXPLICIT_QTY_PATTERNS.iter() {
        if let Some(caps) = pattern.captures(&upper_title) {
            if let Some(qty_match) = caps.get(1) {
                if let Ok(parsed_qty) = qty_match.as_str().parse::<i32>() {
                    if parsed_qty > 0 && parsed_qty < 500 {
                        quantity = parsed_qty;
                        break;
                    }
                }
            }
        }
    }
    let mut size_matches: Vec<(f64, String)> = Vec::new();
    for caps in SIZE_REGEX.captures_iter(&upper_title) {
        if let (Some(val_str), Some(unit_str)) = (caps.get(1), caps.get(2)) {
            if let Ok(val) = val_str.as_str().parse::<f64>() {
                size_matches.push((val, unit_str.as_str().to_string()));
            }
        }
    }
    if !size_matches.is_empty() {
        let mut unique_sizes_tb: Vec<f64> = size_matches
            .iter()
            .map(|(val, unit)| if unit == "GB" { *val / 1000.0 } else { *val })
            .collect();
        unique_sizes_tb.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
        unique_sizes_tb.dedup();
        if !unique_sizes_tb.is_empty() {
            individual_size_tb = unique_sizes_tb[0];
            if unique_sizes_tb.len() > 1 {
                needed_description_check = true;
            }
        }
    }
    if SIZE_RANGE_REGEX.is_match(&upper_title) {
        needed_description_check = true;
    }
    if quantity > 1 && upper_title.contains("MIXED") {
        needed_description_check = true;
    }
    if upper_title.contains("CHECK THE DESCRIPTION")
        || upper_title.contains("CHECK DESCRIPTION")
        || upper_title.contains("SEE DESCRIPTION")
    {
        if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 {
            needed_description_check = true;
        }
    }
    if individual_size_tb > 0.0 {
        total_tb = individual_size_tb * quantity as f64;
    }
    if quantity > 1 && total_tb == 0.0 && !size_matches.is_empty() {
        needed_description_check = true;
    }
    if quantity == 1 && size_matches.len() == 1 && !needed_description_check {
        // This condition is implicitly handled
    }
    SizeQuantityInfo {
        total_tb: (total_tb * 10000.0).round() / 10000.0,
        quantity,
        individual_size_tb: (individual_size_tb * 10000.0).round() / 10000.0,
        needed_description_check,
    }
 }
 /// Parses price from a string, taking the first price if it's a range.
 pub fn parse_price(price_text: &str) -> Option<f64> {
    let lower_price_text = price_text.to_lowercase();
    if lower_price_text.contains(" to ") {
        if let Some(first_part) = lower_price_text.split(" to ").next() {
            if let Some(caps) = PRICE_REGEX.captures(first_part) {
                if let Some(price_match) = caps.get(1) {
                    return price_match.as_str().replace(',', "").parse().ok();
                }
            }
        }
        return None;
    }
    if let Some(caps) = PRICE_REGEX.captures(price_text) {
        if let Some(price_match) = caps.get(1) {
            return price_match.as_str().replace(',', "").parse().ok();
        }
    }
    None
 }