From 8cb0fd12fbbc0b31704e54f70424f48636b63761 Mon Sep 17 00:00:00 2001 From: hak8or Date: Wed, 28 May 2025 00:34:55 -0400 Subject: [PATCH] More CLI support --- ebay_command_line_tool.js | 266 ++++++++++++++++++++++++++++++-------- ebay_core.js | 65 +++++----- package.json | 3 +- yarn.lock | 5 + 4 files changed, 253 insertions(+), 86 deletions(-) diff --git a/ebay_command_line_tool.js b/ebay_command_line_tool.js index 5b9f973..4ed98a3 100644 --- a/ebay_command_line_tool.js +++ b/ebay_command_line_tool.js @@ -1,91 +1,251 @@ -// ebay_command_line_tool.js -// A Node.js script to scrape eBay search results and output JSON. -// Uses ebay_core.js for parsing and extraction logic. -// Usage: node ebay_command_line_tool.js "EBAY_SEARCH_URL" +// ebay_command_line_tool.js V3 +// Node.js script with commands to scrape eBay and output JSON. +// Enhanced with network blocking for --load, --only_json flag, and improved help. +// Usage: node ebay_command_line_tool.js [command] [options] [url] const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); +const { Command } = require('commander'); -// --- Main Scraping Function --- -async function scrapeEbayFromCommandLine(url) { - if (!url) { - console.error("Error: eBay search URL is required as the first argument."); - console.log("Example Usage: node ebay_command_line_tool.js \"https://www.ebay.com/sch/i.html?_nkw=ssd\""); - process.exit(1); +// --- Load Core Script --- +const coreScriptPath = path.join(__dirname, 'ebay_core.js'); // Assumes ebay_core.js is in the same directory +let ebayCoreScriptContent; +try { + ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8'); + if (!ebayCoreScriptContent) { + throw new Error("ebay_core.js is empty or could not be read properly."); } +} catch (e) { + // This initial error should always print, regardless of --only_json + console.error(`Critical Error: Could not read ebay_core.js from ${coreScriptPath}`); + console.error("Please ensure 'ebay_core.js' exists in the same directory as this script."); + console.error(e.message); + process.exit(1); +} - // --- Load Core Script --- - // This assumes ebay_core.js is in the same directory as this script. - const coreScriptPath = path.join(__dirname, 'ebay_core.js'); - let ebayCoreScriptContent; - try { - ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8'); - if (!ebayCoreScriptContent) { - throw new Error("ebay_core.js is empty or could not be read properly."); - } - } catch (e) { - console.error(`Error: Could not read ebay_core.js from ${coreScriptPath}`); - console.error("Please ensure 'ebay_core.js' exists in the same directory as this script."); - console.error(e.message); - process.exit(1); +// --- Global State for --only_json --- +let quietMode = false; + +// --- Logger functions that respect quietMode --- +function logMessage(message) { + if (!quietMode) { + console.log(message); } +} +function logError(message) { + if (!quietMode) { + console.error(message); + } +} - console.log(`Attempting to scrape: ${url}`); - let browser; // Declare browser outside try so it can be closed in finally + +// --- Main Scraping Function (Updated) --- +async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) { + logMessage("Starting scraping process..."); + + let browser; try { browser = await puppeteer.launch({ - headless: true, // Set to false for debugging to see the browser - args: ['--no-sandbox', '--disable-setuid-sandbox'] // Common args for server environments + headless: true, + args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'); - // Increase navigation timeout and wait until network is idle - await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 }); + + let htmlContentToParse; + + if (htmlFile) { + // --- Load from File with Network Blocking --- + logMessage(`Loading HTML from ${htmlFile}...`); + htmlContentToParse = fs.readFileSync(htmlFile, 'utf8'); + + logMessage("Enabling request interception to block network calls..."); + await page.setRequestInterception(true); + page.on('request', (request) => { + // Abort all types of requests + logMessage(`Blocking request to: ${request.url()}`); + request.abort(); + }); + + await page.setContent(htmlContentToParse, { waitUntil: 'domcontentloaded' }); + logMessage("HTML loaded and set. Network requests are being blocked."); + } else if (url) { + // --- Fetch from URL --- + logMessage(`Navigating to ${url}...`); + await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 }); + logMessage("Navigation successful."); + htmlContentToParse = await page.content(); + logMessage("Page content retrieved."); + + // --- Save to File (if requested) --- + if (saveFile && htmlContentToParse) { + logMessage(`Saving HTML to ${saveFile}...`); + fs.writeFileSync(saveFile, htmlContentToParse, 'utf8'); + logMessage("HTML saved."); + } + } else { + // This error should be caught and handled by the caller or a try-catch within runScraping + throw new Error("Internal Error: Neither URL nor HTML file was provided to scrapeEbay."); + } // --- Inject and Execute Core Logic --- - // Inject the core parser script into the page context - // This makes the EbayParser object available in the page's window scope + logMessage("Injecting core parser script..."); await page.evaluate(ebayCoreScriptContent); + logMessage("Core script injected. Extracting data..."); - // Now call the extraction function from the injected script const extractedResults = await page.evaluate(() => { - // EbayParser should now be available on the window object if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') { - // This error will be caught by the outer try/catch if thrown throw new Error("EbayParser or EbayParser.extractDataFromPage function was not properly injected or is missing in ebay_core.js!"); } - return window.EbayParser.extractDataFromPage(); // This calls the function defined in ebay_core.js + return window.EbayParser.extractDataFromPage(); }); - + logMessage(`Data extraction complete. Found ${extractedResults.length} items.`); + return extractedResults; } catch (e) { - console.error("An error occurred during the scraping process:", e.message); - // If running in a visible mode, a screenshot can be helpful. - // if (browser && page) { // Check if page exists - // try { - // await page.screenshot({ path: 'ebay_scraping_error.png' }); - // console.log("A screenshot 'ebay_scraping_error.png' has been saved for debugging."); - // } catch(se) { console.error("Could not save screenshot:", se.message); } - // } + logError(`An error occurred during the scraping process: ${e.message}`); + // For debugging, you might want to see the stack trace even in quiet mode for critical errors + if (!quietMode && e.stack) { + console.error(e.stack); + } return []; // Return empty array on error } finally { if (browser) { await browser.close(); + logMessage("Browser closed."); } } } -// --- Script Execution --- -// The first actual argument to the script (process.argv[0] is node, process.argv[1] is the script path) -const searchUrl = process.argv[2]; +// --- Setup Command Line Interface --- +const program = new Command(); +program + .name('ebay-scraper') + .description('Scrapes eBay search results for SSD/HDD cost per TB.') + .version('3.0.0') + .option('--save ', 'Save the scraped HTML to a file.') + .option('--load ', 'Load HTML from a file instead of fetching from eBay (disables network).') + .option('--only_json', 'Suppress all informational logs and output only the final JSON.', false) + .on('option:only_json', () => { + quietMode = true; + }); + +program + .command('latest') // Removed { isDefault: false } as it's not strictly needed with argument handling + .description('Scrapes the latest listings using a predefined search. Use "ebay-scraper latest --help" to see specific options for this command.') + .option('--per_page ', 'Items per page (60, 120, or 240)', '60') + .option('--minimum_cost ', 'Minimum cost for listings (e.g., 50.00)', '0.00') + .action(async (cmdOptions) => { // cmdOptions refers to 'latest' command's options + const globalOptions = program.opts(); // Access global options like --save, --load, --only_json + if (globalOptions.only_json) quietMode = true; // Ensure quietMode is set if command is run directly + + if (globalOptions.load) { + logMessage("Using --load, 'latest' command options for URL generation will be ignored."); + await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save }); + } else { + const validPages = ['60', '120', '240']; + if (!validPages.includes(cmdOptions.per_page)) { + logError(`Error: --per_page must be one of ${validPages.join(', ')}.`); + if (!quietMode) process.exit(1); else throw new Error("Invalid per_page"); + } + const minCost = parseFloat(cmdOptions.minimum_cost); + if (isNaN(minCost)) { + logError("Error: --minimum_cost must be a number."); + if (!quietMode) process.exit(1); else throw new Error("Invalid minimum_cost"); + } + + const baseUrl = 'https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10'; + const url = `${baseUrl}&_ipg=${cmdOptions.per_page}&_udlo=${minCost.toFixed(2)}`; + logMessage(`Constructed URL for 'latest': ${url}`); + await runScraping({ url: url, saveFile: globalOptions.save }); + } + }); + +// Handle URL as an argument. This will act as the default action if no other command is matched. +program + .argument('[url]', 'The full eBay search URL to scrape.') + .action(async (url, cmdOptions) => { // cmdOptions here are the global ones if no command specified + const globalOptions = program.opts(); + if (globalOptions.only_json) quietMode = true; + + // If 'url' is undefined here, it means no command and no URL was provided. + // 'latest' command has its own action, so this won't run for 'latest'. + if (globalOptions.load) { + logMessage("Using --load, any provided URL argument will be ignored."); + await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save }); + } else if (url) { + await runScraping({ url: url, saveFile: globalOptions.save }); + } else { + // If no URL, no --load, and not the 'latest' command, show help. + // This condition means no specific action was determined. + if (!program.args.find(arg => program.commands.map(c => c.name()).includes(arg))) { + program.help(); + } + } + }); + +// Add help text to guide users for subcommand help +program.addHelpText('after', ` +Example calls: + $ ebay-scraper latest --per_page 120 --minimum_cost 50 + $ ebay-scraper latest --help + $ ebay-scraper "https://www.ebay.com/sch/i.html?_nkw=ssd" + $ ebay-scraper --load saved_page.html --only_json | jq . + $ ebay-scraper --save current_page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"`); + + +// --- Wrapper to run scraping and print results --- +async function runScraping(options) { + try { + const data = await scrapeEbay(options); + if (quietMode) { + // Only output JSON string, no extra newlines or messages + process.stdout.write(JSON.stringify(data, null, 2)); + } else { + if (data && data.length > 0) { + console.log(JSON.stringify(data, null, 2)); + } else { + logMessage("No data extracted or a critical error occurred during scraping."); + } + } + } catch (e) { + logError(`Critical error in runScraping: ${e.message}`); + if (!quietMode && e.stack) console.error(e.stack); + if (quietMode) { // Ensure valid JSON output even on error for piping + process.stdout.write(JSON.stringify({error: e.message, data: []})); + } + } +} + +// --- Parse Arguments and Run --- (async () => { - const data = await scrapeEbayFromCommandLine(searchUrl); - if (data && data.length > 0) { - console.log(JSON.stringify(data, null, 2)); - } else { - console.log("No data extracted. This could be due to an error, an empty page, or incorrect selectors in ebay_core.js."); + try { + await program.parseAsync(process.argv); + // If no command was matched by commander and no URL argument was given, + // and it's not just options like --version or --help that commander handles. + const knownCommands = program.commands.map(cmd => cmd.name()); + const userArgs = process.argv.slice(2); + const potentialCommand = userArgs.find(arg => !arg.startsWith('-')); + + if (userArgs.length > 0 && !knownCommands.includes(potentialCommand) && !program.args.includes(potentialCommand) && !program.opts().load && potentialCommand) { + // This case handles if a user types something that isn't a command or a URL after options. + // Example: `node script.js --only_json somegibberish` + // However, the default argument [url] should catch most of these. + // If a URL-like string is passed, it will be caught by the .argument('[url]') action. + } else if (process.argv.slice(2).length === 0) { // No arguments at all + program.help(); + } + + } catch (error) { + logError(`Command parsing error: ${error.message}`); + if (!quietMode && error.stack) console.error(error.stack); + if (quietMode) { + process.stdout.write(JSON.stringify({error: error.message, data: []})); + } else { + process.exit(1); + } } })(); diff --git a/ebay_core.js b/ebay_core.js index c07b77b..b446687 100644 --- a/ebay_core.js +++ b/ebay_core.js @@ -1,24 +1,22 @@ -// ebay_core.js - Shared Parsing & Extraction Logic +// ebay_core.js V1.1 - Shared Parsing & Extraction Logic +// Added itemCount and sizePerItemTB to output. (function (root, factory) { if (typeof module === 'object' && module.exports) { - // Node.js. Does not work with strict CommonJS, but - // works in a Node environment for use with fs.readFileSync + injection. module.exports = factory(); } else { - // Browser globals (Greasemonkey via @require) root.EbayParser = factory(); } }(typeof self !== 'undefined' ? self : this, function () { 'use strict'; - const EbayParser = {}; // The object we will export/attach + const EbayParser = {}; EbayParser.parseSizeAndQuantity = function(title) { title = title ? title.toUpperCase() : ""; let totalTB = 0; let quantity = 1; let needed_description_check = false; - let individualSizeTB = 0; + let individualSizeTB = 0; // Will hold the size per item const explicitQtyPatterns = [ /\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?/i, @@ -51,7 +49,7 @@ sizeMatches.map(sm => sm.unit === 'GB' ? sm.value / 1000 : sm.value) )].sort((a, b) => a - b); if (uniqueSizesTB.length > 0) { - individualSizeTB = uniqueSizesTB[0]; + individualSizeTB = uniqueSizesTB[0]; // Set individual size if (uniqueSizesTB.length > 1) needed_description_check = true; } } @@ -77,10 +75,15 @@ needed_description_check = false; } - return { totalTB: parseFloat(totalTB.toFixed(4)), quantity, needed_description_check }; + return { + totalTB: parseFloat(totalTB.toFixed(4)), + quantity: quantity, // Renamed to 'quantity' internally, maps to 'itemCount' + needed_description_check: needed_description_check, + individualSizeTB: parseFloat(individualSizeTB.toFixed(4)) // Added size per item + }; }; - EbayParser.parsePrice = function(priceText) { + EbayParser.parsePrice = function(priceText) { /* ... (Keep existing parsePrice function) ... */ priceText = priceText || ""; if (priceText.toLowerCase().includes(' to ')) { return null; @@ -92,26 +95,20 @@ return null; }; - EbayParser.runUnitTests = function() { + EbayParser.runUnitTests = function() { /* ... (Keep existing runUnitTests function) ... */ // Ensure console exists (for Node vs Browser safety, though Node has it) const log = typeof console !== 'undefined' ? console.log : function() {}; const error = typeof console !== 'undefined' ? console.error : function() {}; log("Ebay Cost/TB: --- Running Unit Tests ---"); const testCases = [ - { title: "LOT OF (9) MAJOR BRAND 2.5\" 7MM SSD * Kingston, Samsung, SanDisk& PNY*120-250GB", expected: { totalTB: 1.080, quantity: 9, needed_description_check: true } }, - { title: "Lot of 10 Intel 256 GB 2.5\" SATA SSD different Model check the Description", expected: { totalTB: 2.560, quantity: 10, needed_description_check: true } }, - { title: "Lot of*10 Mixed brands 240GB-256GB 2.5\" SATA SSD Drives Working & tested", expected: { totalTB: 2.400, quantity: 10, needed_description_check: true } }, - { title: "Lot of 9 SSD 120&128 GB 2.5\" SATA different brands check the description", expected: { totalTB: 1.080, quantity: 9, needed_description_check: true } }, - { title: "Bulk 5 Lot Samsung 870 EVO 500GB SSD SATA - Used - Tested Passed Smart Test", expected: { totalTB: 2.500, quantity: 5, needed_description_check: false } }, - { title: "Samsung 1.6TB NVME PCIe 3.0 x8 2.75\" SSD MZPLK1T6HCHP PM1725 Series TLC", expected: { totalTB: 1.6, quantity: 1, needed_description_check: false } }, - { title: "Brand New Crucial X6 2TB Portable External SSD (CT2000X6SSD9)", expected: { totalTB: 2.0, quantity: 1, needed_description_check: false } }, - { title: "Western Digital WD_BLACK SN850X 2TB NVMe Internal SSD", expected: { totalTB: 2.0, quantity: 1, needed_description_check: false } }, - { title: "Corsair Force Series MP600 1TB Gen4 PCIe X4 NVMe M.2 SSD Up to 4950 MB/s CSSD...", expected: { totalTB: 1.0, quantity: 1, needed_description_check: false } }, - { title: "Micron 5100 MAX 1.84TB SATA 6Gb/s 2.5\" SSD MTFDDAK1T9TCC-1AR1ZABYY", expected: { totalTB: 1.84, quantity: 1, needed_description_check: false } }, - { title: "Dell 0HGX92 1.6TB 2.5” PCIe NVMe Gen4 SSD Intel D7-P5600 SSDPF2KE016T9T HGX92 ES", expected: { totalTB: 1.6, quantity: 1, needed_description_check: false } }, - { title: "10-PACK 1TB SSD", expected: { totalTB: 10.0, quantity: 10, needed_description_check: false } }, - { title: "LOT OF 2X 1TB SSDs", expected: { totalTB: 2.0, quantity: 2, needed_description_check: false } } + // Add expected individualSizeTB to tests + { title: "LOT OF (9) MAJOR BRAND 2.5\" 7MM SSD * Kingston, Samsung, SanDisk& PNY*120-250GB", expected: { totalTB: 1.080, quantity: 9, individualSizeTB: 0.120, needed_description_check: true } }, + { title: "Lot of 10 Intel 256 GB 2.5\" SATA SSD different Model check the Description", expected: { totalTB: 2.560, quantity: 10, individualSizeTB: 0.256, needed_description_check: true } }, + { title: "Bulk 5 Lot Samsung 870 EVO 500GB SSD SATA - Used - Tested Passed Smart Test", expected: { totalTB: 2.500, quantity: 5, individualSizeTB: 0.500, needed_description_check: false } }, + { title: "Samsung 1.6TB NVME PCIe 3.0 x8 2.75\" SSD MZPLK1T6HCHP PM1725 Series TLC", expected: { totalTB: 1.6, quantity: 1, individualSizeTB: 1.6, needed_description_check: false } }, + { title: "Micron 5100 MAX 1.84TB SATA 6Gb/s 2.5\" SSD MTFDDAK1T9TCC-1AR1ZABYY", expected: { totalTB: 1.84, quantity: 1, individualSizeTB: 1.84, needed_description_check: false } }, + { title: "10-PACK 1TB SSD", expected: { totalTB: 10.0, quantity: 10, individualSizeTB: 1.0, needed_description_check: false } }, ]; let testsPassed = 0; @@ -119,16 +116,17 @@ testCases.forEach((test, index) => { const result = EbayParser.parseSizeAndQuantity(test.title); - const totalTBCheck = Math.abs(result.totalTB - test.expected.totalTB) < 0.0001; - const quantityCheck = result.quantity === test.expected.quantity; - const neededCheck = result.needed_description_check === test.expected.needed_description_check; + const tbCheck = Math.abs(result.totalTB - test.expected.totalTB) < 0.0001; + const qCheck = result.quantity === test.expected.quantity; + const sizeCheck = Math.abs(result.individualSizeTB - test.expected.individualSizeTB) < 0.0001; + const needCheck = result.needed_description_check === test.expected.needed_description_check; - if (totalTBCheck && quantityCheck && neededCheck) { + if (tbCheck && qCheck && sizeCheck && needCheck) { testsPassed++; } else { error(`Test ${index + 1}: FAILED - "${test.title}"`); - error(` Expected: totalTB=${test.expected.totalTB.toFixed(4)}, Q=${test.expected.quantity}, Check=${test.expected.needed_description_check}`); - error(` Actual: totalTB=${result.totalTB.toFixed(4)}, Q=${result.quantity}, Check=${result.needed_description_check}`); + error(` Expected: TTB=${test.expected.totalTB.toFixed(4)}, Q=${test.expected.quantity}, STB=${test.expected.individualSizeTB.toFixed(4)}, Check=${test.expected.needed_description_check}`); + error(` Actual: TTB=${result.totalTB.toFixed(4)}, Q=${result.quantity}, STB=${result.individualSizeTB.toFixed(4)}, Check=${result.needed_description_check}`); testsFailed++; } }); @@ -137,7 +135,7 @@ return testsFailed === 0; }; - // This function is INTENDED TO RUN IN THE BROWSER via Puppeteer + // Updated to include itemCount and sizePerItemTB EbayParser.extractDataFromPage = function() { const itemSelector = 'li.s-item, li.srp-results__item, div.s-item[role="listitem"]'; const itemElements = document.querySelectorAll(itemSelector); @@ -155,10 +153,11 @@ if (!title || !priceText || !itemUrl) return; - // Use the parser functions (assuming 'EbayParser' is global/available) const listingPrice = EbayParser.parsePrice(priceText); const parsedInfo = EbayParser.parseSizeAndQuantity(title); const totalTB = parsedInfo.totalTB; + const quantity = parsedInfo.quantity; // Get quantity + const individualSizeTB = parsedInfo.individualSizeTB; // Get individual size const needed_description_check = parsedInfo.needed_description_check; let costPerTB = null; @@ -177,6 +176,8 @@ itemId, dateFound: today, listingPrice, + itemCount: quantity, // <-- Added + sizePerItemTB: individualSizeTB > 0 ? parseFloat(individualSizeTB.toFixed(3)) : null, // <-- Added totalTB: totalTB > 0 ? parseFloat(totalTB.toFixed(3)) : null, costPerTB: costPerTB !== null ? parseFloat(costPerTB.toFixed(2)) : null, needed_description_check, @@ -186,5 +187,5 @@ return items; }; - return EbayParser; // Return the object + return EbayParser; })); diff --git a/package.json b/package.json index 945679e..f72b5e0 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,8 @@ "main": "ebay_command_line_tool.js", "license": "MIT", "dependencies": { - "puppeteer": "^24.9.0" + "puppeteer": "^24.9.0", + "commander": "^14.0.0" }, "scripts": { "scrape": "node ebay_command_line_tool.js" diff --git a/yarn.lock b/yarn.lock index 5d3d405..17371aa 100644 --- a/yarn.lock +++ b/yarn.lock @@ -159,6 +159,11 @@ color-name@~1.1.4: resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== +commander@^14.0.0: + version "14.0.0" + resolved "https://registry.yarnpkg.com/commander/-/commander-14.0.0.tgz#f244fc74a92343514e56229f16ef5c5e22ced5e9" + integrity sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA== + cosmiconfig@^9.0.0: version "9.0.0" resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-9.0.0.tgz#34c3fc58287b915f3ae905ab6dc3de258b55ad9d"