// ebay_command_line_tool.js V3 // Node.js script with commands to scrape eBay and output JSON. // Enhanced with network blocking for --load, --only_json flag, and improved help. // Usage: node ebay_command_line_tool.js [command] [options] [url] const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); const { Command } = require('commander'); // --- Load Core Script --- const coreScriptPath = path.join(__dirname, 'ebay_core.js'); // Assumes ebay_core.js is in the same directory let ebayCoreScriptContent; try { ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8'); if (!ebayCoreScriptContent) { throw new Error("ebay_core.js is empty or could not be read properly."); } } catch (e) { // This initial error should always print, regardless of --only_json console.error(`Critical Error: Could not read ebay_core.js from ${coreScriptPath}`); console.error("Please ensure 'ebay_core.js' exists in the same directory as this script."); console.error(e.message); process.exit(1); } // --- Global State for --only_json --- let quietMode = false; // --- Logger functions that respect quietMode --- function logMessage(message) { if (!quietMode) { console.log(message); } } function logError(message) { if (!quietMode) { console.error(message); } } // --- Main Scraping Function (Updated) --- async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) { logMessage("Starting scraping process..."); let browser; try { browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'); let htmlContentToParse; if (htmlFile) { // --- Load from File with Network Blocking --- logMessage(`Loading HTML from ${htmlFile}...`); htmlContentToParse = fs.readFileSync(htmlFile, 'utf8'); logMessage("Enabling request interception to block network calls..."); await page.setRequestInterception(true); page.on('request', (request) => { // Abort all types of requests logMessage(`Blocking request to: ${request.url()}`); request.abort(); }); await page.setContent(htmlContentToParse, { waitUntil: 'domcontentloaded' }); logMessage("HTML loaded and set. Network requests are being blocked."); } else if (url) { // --- Fetch from URL --- logMessage(`Navigating to ${url}...`); await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 }); logMessage("Navigation successful."); htmlContentToParse = await page.content(); logMessage("Page content retrieved."); // --- Save to File (if requested) --- if (saveFile && htmlContentToParse) { logMessage(`Saving HTML to ${saveFile}...`); fs.writeFileSync(saveFile, htmlContentToParse, 'utf8'); logMessage("HTML saved."); } } else { // This error should be caught and handled by the caller or a try-catch within runScraping throw new Error("Internal Error: Neither URL nor HTML file was provided to scrapeEbay."); } // --- Inject and Execute Core Logic --- logMessage("Injecting core parser script..."); await page.evaluate(ebayCoreScriptContent); logMessage("Core script injected. Extracting data..."); const extractedResults = await page.evaluate(() => { if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') { throw new Error("EbayParser or EbayParser.extractDataFromPage function was not properly injected or is missing in ebay_core.js!"); } return window.EbayParser.extractDataFromPage(); }); logMessage(`Data extraction complete. Found ${extractedResults.length} items.`); return extractedResults; } catch (e) { logError(`An error occurred during the scraping process: ${e.message}`); // For debugging, you might want to see the stack trace even in quiet mode for critical errors if (!quietMode && e.stack) { console.error(e.stack); } return []; // Return empty array on error } finally { if (browser) { await browser.close(); logMessage("Browser closed."); } } } // --- Setup Command Line Interface --- const program = new Command(); program .name('ebay-scraper') .description('Scrapes eBay search results for SSD/HDD cost per TB.') .version('3.0.0') .option('--save ', 'Save the scraped HTML to a file.') .option('--load ', 'Load HTML from a file instead of fetching from eBay (disables network).') .option('--only_json', 'Suppress all informational logs and output only the final JSON.', false) .on('option:only_json', () => { quietMode = true; }); program .command('latest') // Removed { isDefault: false } as it's not strictly needed with argument handling .description('Scrapes the latest listings using a predefined search. Use "ebay-scraper latest --help" to see specific options for this command.') .option('--per_page ', 'Items per page (60, 120, or 240)', '60') .option('--minimum_cost ', 'Minimum cost for listings (e.g., 50.00)', '0.00') .action(async (cmdOptions) => { // cmdOptions refers to 'latest' command's options const globalOptions = program.opts(); // Access global options like --save, --load, --only_json if (globalOptions.only_json) quietMode = true; // Ensure quietMode is set if command is run directly if (globalOptions.load) { logMessage("Using --load, 'latest' command options for URL generation will be ignored."); await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save }); } else { const validPages = ['60', '120', '240']; if (!validPages.includes(cmdOptions.per_page)) { logError(`Error: --per_page must be one of ${validPages.join(', ')}.`); if (!quietMode) process.exit(1); else throw new Error("Invalid per_page"); } const minCost = parseFloat(cmdOptions.minimum_cost); if (isNaN(minCost)) { logError("Error: --minimum_cost must be a number."); if (!quietMode) process.exit(1); else throw new Error("Invalid minimum_cost"); } const baseUrl = 'https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10'; const url = `${baseUrl}&_ipg=${cmdOptions.per_page}&_udlo=${minCost.toFixed(2)}`; logMessage(`Constructed URL for 'latest': ${url}`); await runScraping({ url: url, saveFile: globalOptions.save }); } }); // Handle URL as an argument. This will act as the default action if no other command is matched. program .argument('[url]', 'The full eBay search URL to scrape.') .action(async (url, cmdOptions) => { // cmdOptions here are the global ones if no command specified const globalOptions = program.opts(); if (globalOptions.only_json) quietMode = true; // If 'url' is undefined here, it means no command and no URL was provided. // 'latest' command has its own action, so this won't run for 'latest'. if (globalOptions.load) { logMessage("Using --load, any provided URL argument will be ignored."); await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save }); } else if (url) { await runScraping({ url: url, saveFile: globalOptions.save }); } else { // If no URL, no --load, and not the 'latest' command, show help. // This condition means no specific action was determined. if (!program.args.find(arg => program.commands.map(c => c.name()).includes(arg))) { program.help(); } } }); // Add help text to guide users for subcommand help program.addHelpText('after', ` Example calls: $ ebay-scraper latest --per_page 120 --minimum_cost 50 $ ebay-scraper latest --help $ ebay-scraper "https://www.ebay.com/sch/i.html?_nkw=ssd" $ ebay-scraper --load saved_page.html --only_json | jq . $ ebay-scraper --save current_page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"`); // --- Wrapper to run scraping and print results --- async function runScraping(options) { try { const data = await scrapeEbay(options); if (quietMode) { // Only output JSON string, no extra newlines or messages process.stdout.write(JSON.stringify(data, null, 2)); } else { if (data && data.length > 0) { console.log(JSON.stringify(data, null, 2)); } else { logMessage("No data extracted or a critical error occurred during scraping."); } } } catch (e) { logError(`Critical error in runScraping: ${e.message}`); if (!quietMode && e.stack) console.error(e.stack); if (quietMode) { // Ensure valid JSON output even on error for piping process.stdout.write(JSON.stringify({error: e.message, data: []})); } } } // --- Parse Arguments and Run --- (async () => { try { await program.parseAsync(process.argv); // If no command was matched by commander and no URL argument was given, // and it's not just options like --version or --help that commander handles. const knownCommands = program.commands.map(cmd => cmd.name()); const userArgs = process.argv.slice(2); const potentialCommand = userArgs.find(arg => !arg.startsWith('-')); if (userArgs.length > 0 && !knownCommands.includes(potentialCommand) && !program.args.includes(potentialCommand) && !program.opts().load && potentialCommand) { // This case handles if a user types something that isn't a command or a URL after options. // Example: `node script.js --only_json somegibberish` // However, the default argument [url] should catch most of these. // If a URL-like string is passed, it will be caught by the .argument('[url]') action. } else if (process.argv.slice(2).length === 0) { // No arguments at all program.help(); } } catch (error) { logError(`Command parsing error: ${error.message}`); if (!quietMode && error.stack) console.error(error.stack); if (quietMode) { process.stdout.write(JSON.stringify({error: error.message, data: []})); } else { process.exit(1); } } })();