More CLI support
This commit is contained in:
@ -1,91 +1,251 @@
|
||||
// ebay_command_line_tool.js
|
||||
// A Node.js script to scrape eBay search results and output JSON.
|
||||
// Uses ebay_core.js for parsing and extraction logic.
|
||||
// Usage: node ebay_command_line_tool.js "EBAY_SEARCH_URL"
|
||||
// ebay_command_line_tool.js V3
|
||||
// Node.js script with commands to scrape eBay and output JSON.
|
||||
// Enhanced with network blocking for --load, --only_json flag, and improved help.
|
||||
// Usage: node ebay_command_line_tool.js [command] [options] [url]
|
||||
|
||||
const puppeteer = require('puppeteer');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { Command } = require('commander');
|
||||
|
||||
// --- Main Scraping Function ---
|
||||
async function scrapeEbayFromCommandLine(url) {
|
||||
if (!url) {
|
||||
console.error("Error: eBay search URL is required as the first argument.");
|
||||
console.log("Example Usage: node ebay_command_line_tool.js \"https://www.ebay.com/sch/i.html?_nkw=ssd\"");
|
||||
process.exit(1);
|
||||
// --- Load Core Script ---
|
||||
const coreScriptPath = path.join(__dirname, 'ebay_core.js'); // Assumes ebay_core.js is in the same directory
|
||||
let ebayCoreScriptContent;
|
||||
try {
|
||||
ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8');
|
||||
if (!ebayCoreScriptContent) {
|
||||
throw new Error("ebay_core.js is empty or could not be read properly.");
|
||||
}
|
||||
} catch (e) {
|
||||
// This initial error should always print, regardless of --only_json
|
||||
console.error(`Critical Error: Could not read ebay_core.js from ${coreScriptPath}`);
|
||||
console.error("Please ensure 'ebay_core.js' exists in the same directory as this script.");
|
||||
console.error(e.message);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// --- Load Core Script ---
|
||||
// This assumes ebay_core.js is in the same directory as this script.
|
||||
const coreScriptPath = path.join(__dirname, 'ebay_core.js');
|
||||
let ebayCoreScriptContent;
|
||||
try {
|
||||
ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8');
|
||||
if (!ebayCoreScriptContent) {
|
||||
throw new Error("ebay_core.js is empty or could not be read properly.");
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Error: Could not read ebay_core.js from ${coreScriptPath}`);
|
||||
console.error("Please ensure 'ebay_core.js' exists in the same directory as this script.");
|
||||
console.error(e.message);
|
||||
process.exit(1);
|
||||
// --- Global State for --only_json ---
|
||||
let quietMode = false;
|
||||
|
||||
// --- Logger functions that respect quietMode ---
|
||||
function logMessage(message) {
|
||||
if (!quietMode) {
|
||||
console.log(message);
|
||||
}
|
||||
}
|
||||
function logError(message) {
|
||||
if (!quietMode) {
|
||||
console.error(message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Attempting to scrape: ${url}`);
|
||||
let browser; // Declare browser outside try so it can be closed in finally
|
||||
|
||||
// --- Main Scraping Function (Updated) ---
|
||||
async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) {
|
||||
logMessage("Starting scraping process...");
|
||||
|
||||
let browser;
|
||||
try {
|
||||
browser = await puppeteer.launch({
|
||||
headless: true, // Set to false for debugging to see the browser
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'] // Common args for server environments
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36');
|
||||
// Increase navigation timeout and wait until network is idle
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
|
||||
|
||||
let htmlContentToParse;
|
||||
|
||||
if (htmlFile) {
|
||||
// --- Load from File with Network Blocking ---
|
||||
logMessage(`Loading HTML from ${htmlFile}...`);
|
||||
htmlContentToParse = fs.readFileSync(htmlFile, 'utf8');
|
||||
|
||||
logMessage("Enabling request interception to block network calls...");
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (request) => {
|
||||
// Abort all types of requests
|
||||
logMessage(`Blocking request to: ${request.url()}`);
|
||||
request.abort();
|
||||
});
|
||||
|
||||
await page.setContent(htmlContentToParse, { waitUntil: 'domcontentloaded' });
|
||||
logMessage("HTML loaded and set. Network requests are being blocked.");
|
||||
} else if (url) {
|
||||
// --- Fetch from URL ---
|
||||
logMessage(`Navigating to ${url}...`);
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
|
||||
logMessage("Navigation successful.");
|
||||
htmlContentToParse = await page.content();
|
||||
logMessage("Page content retrieved.");
|
||||
|
||||
// --- Save to File (if requested) ---
|
||||
if (saveFile && htmlContentToParse) {
|
||||
logMessage(`Saving HTML to ${saveFile}...`);
|
||||
fs.writeFileSync(saveFile, htmlContentToParse, 'utf8');
|
||||
logMessage("HTML saved.");
|
||||
}
|
||||
} else {
|
||||
// This error should be caught and handled by the caller or a try-catch within runScraping
|
||||
throw new Error("Internal Error: Neither URL nor HTML file was provided to scrapeEbay.");
|
||||
}
|
||||
|
||||
// --- Inject and Execute Core Logic ---
|
||||
// Inject the core parser script into the page context
|
||||
// This makes the EbayParser object available in the page's window scope
|
||||
logMessage("Injecting core parser script...");
|
||||
await page.evaluate(ebayCoreScriptContent);
|
||||
logMessage("Core script injected. Extracting data...");
|
||||
|
||||
// Now call the extraction function from the injected script
|
||||
const extractedResults = await page.evaluate(() => {
|
||||
// EbayParser should now be available on the window object
|
||||
if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') {
|
||||
// This error will be caught by the outer try/catch if thrown
|
||||
throw new Error("EbayParser or EbayParser.extractDataFromPage function was not properly injected or is missing in ebay_core.js!");
|
||||
}
|
||||
return window.EbayParser.extractDataFromPage(); // This calls the function defined in ebay_core.js
|
||||
return window.EbayParser.extractDataFromPage();
|
||||
});
|
||||
logMessage(`Data extraction complete. Found ${extractedResults.length} items.`);
|
||||
|
||||
return extractedResults;
|
||||
|
||||
} catch (e) {
|
||||
console.error("An error occurred during the scraping process:", e.message);
|
||||
// If running in a visible mode, a screenshot can be helpful.
|
||||
// if (browser && page) { // Check if page exists
|
||||
// try {
|
||||
// await page.screenshot({ path: 'ebay_scraping_error.png' });
|
||||
// console.log("A screenshot 'ebay_scraping_error.png' has been saved for debugging.");
|
||||
// } catch(se) { console.error("Could not save screenshot:", se.message); }
|
||||
// }
|
||||
logError(`An error occurred during the scraping process: ${e.message}`);
|
||||
// For debugging, you might want to see the stack trace even in quiet mode for critical errors
|
||||
if (!quietMode && e.stack) {
|
||||
console.error(e.stack);
|
||||
}
|
||||
return []; // Return empty array on error
|
||||
} finally {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
logMessage("Browser closed.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Script Execution ---
|
||||
// The first actual argument to the script (process.argv[0] is node, process.argv[1] is the script path)
|
||||
const searchUrl = process.argv[2];
|
||||
// --- Setup Command Line Interface ---
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name('ebay-scraper')
|
||||
.description('Scrapes eBay search results for SSD/HDD cost per TB.')
|
||||
.version('3.0.0')
|
||||
.option('--save <filename>', 'Save the scraped HTML to a file.')
|
||||
.option('--load <filename>', 'Load HTML from a file instead of fetching from eBay (disables network).')
|
||||
.option('--only_json', 'Suppress all informational logs and output only the final JSON.', false)
|
||||
.on('option:only_json', () => {
|
||||
quietMode = true;
|
||||
});
|
||||
|
||||
program
|
||||
.command('latest') // Removed { isDefault: false } as it's not strictly needed with argument handling
|
||||
.description('Scrapes the latest listings using a predefined search. Use "ebay-scraper latest --help" to see specific options for this command.')
|
||||
.option('--per_page <number>', 'Items per page (60, 120, or 240)', '60')
|
||||
.option('--minimum_cost <number>', 'Minimum cost for listings (e.g., 50.00)', '0.00')
|
||||
.action(async (cmdOptions) => { // cmdOptions refers to 'latest' command's options
|
||||
const globalOptions = program.opts(); // Access global options like --save, --load, --only_json
|
||||
if (globalOptions.only_json) quietMode = true; // Ensure quietMode is set if command is run directly
|
||||
|
||||
if (globalOptions.load) {
|
||||
logMessage("Using --load, 'latest' command options for URL generation will be ignored.");
|
||||
await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save });
|
||||
} else {
|
||||
const validPages = ['60', '120', '240'];
|
||||
if (!validPages.includes(cmdOptions.per_page)) {
|
||||
logError(`Error: --per_page must be one of ${validPages.join(', ')}.`);
|
||||
if (!quietMode) process.exit(1); else throw new Error("Invalid per_page");
|
||||
}
|
||||
const minCost = parseFloat(cmdOptions.minimum_cost);
|
||||
if (isNaN(minCost)) {
|
||||
logError("Error: --minimum_cost must be a number.");
|
||||
if (!quietMode) process.exit(1); else throw new Error("Invalid minimum_cost");
|
||||
}
|
||||
|
||||
const baseUrl = 'https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10';
|
||||
const url = `${baseUrl}&_ipg=${cmdOptions.per_page}&_udlo=${minCost.toFixed(2)}`;
|
||||
logMessage(`Constructed URL for 'latest': ${url}`);
|
||||
await runScraping({ url: url, saveFile: globalOptions.save });
|
||||
}
|
||||
});
|
||||
|
||||
// Handle URL as an argument. This will act as the default action if no other command is matched.
|
||||
program
|
||||
.argument('[url]', 'The full eBay search URL to scrape.')
|
||||
.action(async (url, cmdOptions) => { // cmdOptions here are the global ones if no command specified
|
||||
const globalOptions = program.opts();
|
||||
if (globalOptions.only_json) quietMode = true;
|
||||
|
||||
// If 'url' is undefined here, it means no command and no URL was provided.
|
||||
// 'latest' command has its own action, so this won't run for 'latest'.
|
||||
if (globalOptions.load) {
|
||||
logMessage("Using --load, any provided URL argument will be ignored.");
|
||||
await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save });
|
||||
} else if (url) {
|
||||
await runScraping({ url: url, saveFile: globalOptions.save });
|
||||
} else {
|
||||
// If no URL, no --load, and not the 'latest' command, show help.
|
||||
// This condition means no specific action was determined.
|
||||
if (!program.args.find(arg => program.commands.map(c => c.name()).includes(arg))) {
|
||||
program.help();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Add help text to guide users for subcommand help
|
||||
program.addHelpText('after', `
|
||||
Example calls:
|
||||
$ ebay-scraper latest --per_page 120 --minimum_cost 50
|
||||
$ ebay-scraper latest --help
|
||||
$ ebay-scraper "https://www.ebay.com/sch/i.html?_nkw=ssd"
|
||||
$ ebay-scraper --load saved_page.html --only_json | jq .
|
||||
$ ebay-scraper --save current_page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"`);
|
||||
|
||||
|
||||
// --- Wrapper to run scraping and print results ---
|
||||
async function runScraping(options) {
|
||||
try {
|
||||
const data = await scrapeEbay(options);
|
||||
if (quietMode) {
|
||||
// Only output JSON string, no extra newlines or messages
|
||||
process.stdout.write(JSON.stringify(data, null, 2));
|
||||
} else {
|
||||
if (data && data.length > 0) {
|
||||
console.log(JSON.stringify(data, null, 2));
|
||||
} else {
|
||||
logMessage("No data extracted or a critical error occurred during scraping.");
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
logError(`Critical error in runScraping: ${e.message}`);
|
||||
if (!quietMode && e.stack) console.error(e.stack);
|
||||
if (quietMode) { // Ensure valid JSON output even on error for piping
|
||||
process.stdout.write(JSON.stringify({error: e.message, data: []}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Parse Arguments and Run ---
|
||||
(async () => {
|
||||
const data = await scrapeEbayFromCommandLine(searchUrl);
|
||||
if (data && data.length > 0) {
|
||||
console.log(JSON.stringify(data, null, 2));
|
||||
} else {
|
||||
console.log("No data extracted. This could be due to an error, an empty page, or incorrect selectors in ebay_core.js.");
|
||||
try {
|
||||
await program.parseAsync(process.argv);
|
||||
// If no command was matched by commander and no URL argument was given,
|
||||
// and it's not just options like --version or --help that commander handles.
|
||||
const knownCommands = program.commands.map(cmd => cmd.name());
|
||||
const userArgs = process.argv.slice(2);
|
||||
const potentialCommand = userArgs.find(arg => !arg.startsWith('-'));
|
||||
|
||||
if (userArgs.length > 0 && !knownCommands.includes(potentialCommand) && !program.args.includes(potentialCommand) && !program.opts().load && potentialCommand) {
|
||||
// This case handles if a user types something that isn't a command or a URL after options.
|
||||
// Example: `node script.js --only_json somegibberish`
|
||||
// However, the default argument [url] should catch most of these.
|
||||
// If a URL-like string is passed, it will be caught by the .argument('[url]') action.
|
||||
} else if (process.argv.slice(2).length === 0) { // No arguments at all
|
||||
program.help();
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
logError(`Command parsing error: ${error.message}`);
|
||||
if (!quietMode && error.stack) console.error(error.stack);
|
||||
if (quietMode) {
|
||||
process.stdout.write(JSON.stringify({error: error.message, data: []}));
|
||||
} else {
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
65
ebay_core.js
65
ebay_core.js
@ -1,24 +1,22 @@
|
||||
// ebay_core.js - Shared Parsing & Extraction Logic
|
||||
// ebay_core.js V1.1 - Shared Parsing & Extraction Logic
|
||||
// Added itemCount and sizePerItemTB to output.
|
||||
(function (root, factory) {
|
||||
if (typeof module === 'object' && module.exports) {
|
||||
// Node.js. Does not work with strict CommonJS, but
|
||||
// works in a Node environment for use with fs.readFileSync + injection.
|
||||
module.exports = factory();
|
||||
} else {
|
||||
// Browser globals (Greasemonkey via @require)
|
||||
root.EbayParser = factory();
|
||||
}
|
||||
}(typeof self !== 'undefined' ? self : this, function () {
|
||||
'use strict';
|
||||
|
||||
const EbayParser = {}; // The object we will export/attach
|
||||
const EbayParser = {};
|
||||
|
||||
EbayParser.parseSizeAndQuantity = function(title) {
|
||||
title = title ? title.toUpperCase() : "";
|
||||
let totalTB = 0;
|
||||
let quantity = 1;
|
||||
let needed_description_check = false;
|
||||
let individualSizeTB = 0;
|
||||
let individualSizeTB = 0; // Will hold the size per item
|
||||
|
||||
const explicitQtyPatterns = [
|
||||
/\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?/i,
|
||||
@ -51,7 +49,7 @@
|
||||
sizeMatches.map(sm => sm.unit === 'GB' ? sm.value / 1000 : sm.value)
|
||||
)].sort((a, b) => a - b);
|
||||
if (uniqueSizesTB.length > 0) {
|
||||
individualSizeTB = uniqueSizesTB[0];
|
||||
individualSizeTB = uniqueSizesTB[0]; // Set individual size
|
||||
if (uniqueSizesTB.length > 1) needed_description_check = true;
|
||||
}
|
||||
}
|
||||
@ -77,10 +75,15 @@
|
||||
needed_description_check = false;
|
||||
}
|
||||
|
||||
return { totalTB: parseFloat(totalTB.toFixed(4)), quantity, needed_description_check };
|
||||
return {
|
||||
totalTB: parseFloat(totalTB.toFixed(4)),
|
||||
quantity: quantity, // Renamed to 'quantity' internally, maps to 'itemCount'
|
||||
needed_description_check: needed_description_check,
|
||||
individualSizeTB: parseFloat(individualSizeTB.toFixed(4)) // Added size per item
|
||||
};
|
||||
};
|
||||
|
||||
EbayParser.parsePrice = function(priceText) {
|
||||
EbayParser.parsePrice = function(priceText) { /* ... (Keep existing parsePrice function) ... */
|
||||
priceText = priceText || "";
|
||||
if (priceText.toLowerCase().includes(' to ')) {
|
||||
return null;
|
||||
@ -92,26 +95,20 @@
|
||||
return null;
|
||||
};
|
||||
|
||||
EbayParser.runUnitTests = function() {
|
||||
EbayParser.runUnitTests = function() { /* ... (Keep existing runUnitTests function) ... */
|
||||
// Ensure console exists (for Node vs Browser safety, though Node has it)
|
||||
const log = typeof console !== 'undefined' ? console.log : function() {};
|
||||
const error = typeof console !== 'undefined' ? console.error : function() {};
|
||||
|
||||
log("Ebay Cost/TB: --- Running Unit Tests ---");
|
||||
const testCases = [
|
||||
{ title: "LOT OF (9) MAJOR BRAND 2.5\" 7MM SSD * Kingston, Samsung, SanDisk& PNY*120-250GB", expected: { totalTB: 1.080, quantity: 9, needed_description_check: true } },
|
||||
{ title: "Lot of 10 Intel 256 GB 2.5\" SATA SSD different Model check the Description", expected: { totalTB: 2.560, quantity: 10, needed_description_check: true } },
|
||||
{ title: "Lot of*10 Mixed brands 240GB-256GB 2.5\" SATA SSD Drives Working & tested", expected: { totalTB: 2.400, quantity: 10, needed_description_check: true } },
|
||||
{ title: "Lot of 9 SSD 120&128 GB 2.5\" SATA different brands check the description", expected: { totalTB: 1.080, quantity: 9, needed_description_check: true } },
|
||||
{ title: "Bulk 5 Lot Samsung 870 EVO 500GB SSD SATA - Used - Tested Passed Smart Test", expected: { totalTB: 2.500, quantity: 5, needed_description_check: false } },
|
||||
{ title: "Samsung 1.6TB NVME PCIe 3.0 x8 2.75\" SSD MZPLK1T6HCHP PM1725 Series TLC", expected: { totalTB: 1.6, quantity: 1, needed_description_check: false } },
|
||||
{ title: "Brand New Crucial X6 2TB Portable External SSD (CT2000X6SSD9)", expected: { totalTB: 2.0, quantity: 1, needed_description_check: false } },
|
||||
{ title: "Western Digital WD_BLACK SN850X 2TB NVMe Internal SSD", expected: { totalTB: 2.0, quantity: 1, needed_description_check: false } },
|
||||
{ title: "Corsair Force Series MP600 1TB Gen4 PCIe X4 NVMe M.2 SSD Up to 4950 MB/s CSSD...", expected: { totalTB: 1.0, quantity: 1, needed_description_check: false } },
|
||||
{ title: "Micron 5100 MAX 1.84TB SATA 6Gb/s 2.5\" SSD MTFDDAK1T9TCC-1AR1ZABYY", expected: { totalTB: 1.84, quantity: 1, needed_description_check: false } },
|
||||
{ title: "Dell 0HGX92 1.6TB 2.5” PCIe NVMe Gen4 SSD Intel D7-P5600 SSDPF2KE016T9T HGX92 ES", expected: { totalTB: 1.6, quantity: 1, needed_description_check: false } },
|
||||
{ title: "10-PACK 1TB SSD", expected: { totalTB: 10.0, quantity: 10, needed_description_check: false } },
|
||||
{ title: "LOT OF 2X 1TB SSDs", expected: { totalTB: 2.0, quantity: 2, needed_description_check: false } }
|
||||
// Add expected individualSizeTB to tests
|
||||
{ title: "LOT OF (9) MAJOR BRAND 2.5\" 7MM SSD * Kingston, Samsung, SanDisk& PNY*120-250GB", expected: { totalTB: 1.080, quantity: 9, individualSizeTB: 0.120, needed_description_check: true } },
|
||||
{ title: "Lot of 10 Intel 256 GB 2.5\" SATA SSD different Model check the Description", expected: { totalTB: 2.560, quantity: 10, individualSizeTB: 0.256, needed_description_check: true } },
|
||||
{ title: "Bulk 5 Lot Samsung 870 EVO 500GB SSD SATA - Used - Tested Passed Smart Test", expected: { totalTB: 2.500, quantity: 5, individualSizeTB: 0.500, needed_description_check: false } },
|
||||
{ title: "Samsung 1.6TB NVME PCIe 3.0 x8 2.75\" SSD MZPLK1T6HCHP PM1725 Series TLC", expected: { totalTB: 1.6, quantity: 1, individualSizeTB: 1.6, needed_description_check: false } },
|
||||
{ title: "Micron 5100 MAX 1.84TB SATA 6Gb/s 2.5\" SSD MTFDDAK1T9TCC-1AR1ZABYY", expected: { totalTB: 1.84, quantity: 1, individualSizeTB: 1.84, needed_description_check: false } },
|
||||
{ title: "10-PACK 1TB SSD", expected: { totalTB: 10.0, quantity: 10, individualSizeTB: 1.0, needed_description_check: false } },
|
||||
];
|
||||
|
||||
let testsPassed = 0;
|
||||
@ -119,16 +116,17 @@
|
||||
|
||||
testCases.forEach((test, index) => {
|
||||
const result = EbayParser.parseSizeAndQuantity(test.title);
|
||||
const totalTBCheck = Math.abs(result.totalTB - test.expected.totalTB) < 0.0001;
|
||||
const quantityCheck = result.quantity === test.expected.quantity;
|
||||
const neededCheck = result.needed_description_check === test.expected.needed_description_check;
|
||||
const tbCheck = Math.abs(result.totalTB - test.expected.totalTB) < 0.0001;
|
||||
const qCheck = result.quantity === test.expected.quantity;
|
||||
const sizeCheck = Math.abs(result.individualSizeTB - test.expected.individualSizeTB) < 0.0001;
|
||||
const needCheck = result.needed_description_check === test.expected.needed_description_check;
|
||||
|
||||
if (totalTBCheck && quantityCheck && neededCheck) {
|
||||
if (tbCheck && qCheck && sizeCheck && needCheck) {
|
||||
testsPassed++;
|
||||
} else {
|
||||
error(`Test ${index + 1}: FAILED - "${test.title}"`);
|
||||
error(` Expected: totalTB=${test.expected.totalTB.toFixed(4)}, Q=${test.expected.quantity}, Check=${test.expected.needed_description_check}`);
|
||||
error(` Actual: totalTB=${result.totalTB.toFixed(4)}, Q=${result.quantity}, Check=${result.needed_description_check}`);
|
||||
error(` Expected: TTB=${test.expected.totalTB.toFixed(4)}, Q=${test.expected.quantity}, STB=${test.expected.individualSizeTB.toFixed(4)}, Check=${test.expected.needed_description_check}`);
|
||||
error(` Actual: TTB=${result.totalTB.toFixed(4)}, Q=${result.quantity}, STB=${result.individualSizeTB.toFixed(4)}, Check=${result.needed_description_check}`);
|
||||
testsFailed++;
|
||||
}
|
||||
});
|
||||
@ -137,7 +135,7 @@
|
||||
return testsFailed === 0;
|
||||
};
|
||||
|
||||
// This function is INTENDED TO RUN IN THE BROWSER via Puppeteer
|
||||
// Updated to include itemCount and sizePerItemTB
|
||||
EbayParser.extractDataFromPage = function() {
|
||||
const itemSelector = 'li.s-item, li.srp-results__item, div.s-item[role="listitem"]';
|
||||
const itemElements = document.querySelectorAll(itemSelector);
|
||||
@ -155,10 +153,11 @@
|
||||
|
||||
if (!title || !priceText || !itemUrl) return;
|
||||
|
||||
// Use the parser functions (assuming 'EbayParser' is global/available)
|
||||
const listingPrice = EbayParser.parsePrice(priceText);
|
||||
const parsedInfo = EbayParser.parseSizeAndQuantity(title);
|
||||
const totalTB = parsedInfo.totalTB;
|
||||
const quantity = parsedInfo.quantity; // Get quantity
|
||||
const individualSizeTB = parsedInfo.individualSizeTB; // Get individual size
|
||||
const needed_description_check = parsedInfo.needed_description_check;
|
||||
|
||||
let costPerTB = null;
|
||||
@ -177,6 +176,8 @@
|
||||
itemId,
|
||||
dateFound: today,
|
||||
listingPrice,
|
||||
itemCount: quantity, // <-- Added
|
||||
sizePerItemTB: individualSizeTB > 0 ? parseFloat(individualSizeTB.toFixed(3)) : null, // <-- Added
|
||||
totalTB: totalTB > 0 ? parseFloat(totalTB.toFixed(3)) : null,
|
||||
costPerTB: costPerTB !== null ? parseFloat(costPerTB.toFixed(2)) : null,
|
||||
needed_description_check,
|
||||
@ -186,5 +187,5 @@
|
||||
return items;
|
||||
};
|
||||
|
||||
return EbayParser; // Return the object
|
||||
return EbayParser;
|
||||
}));
|
||||
|
@ -4,7 +4,8 @@
|
||||
"main": "ebay_command_line_tool.js",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"puppeteer": "^24.9.0"
|
||||
"puppeteer": "^24.9.0",
|
||||
"commander": "^14.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"scrape": "node ebay_command_line_tool.js"
|
||||
|
48
readme.md
Normal file
48
readme.md
Normal file
@ -0,0 +1,48 @@
|
||||
# Greasemonkey scripts
|
||||
|
||||
## Ebay Scraper (Storage)
|
||||
|
||||
A truly awful *very* LLM generated scraping tool used to help me find good deals on ebay for Storage. Again, this is LLM generated, basically vibe coded, so the code quality has virtually zero oversight. The generation of this was done using Gemini 2.5 Pro, see the [first](https://g.co/gemini/share/bf17780ad083) and [second](https://g.co/gemini/share/3d80b96e42e9) conversations used to generate this.
|
||||
|
||||
```bash
|
||||
greasemonkey on master is 📦 v1.0.0 via ⬢ v23.11.1 at ☸ default took 7s 593ms
|
||||
➜ yarn --silent scrape --help
|
||||
Usage: ebay-scraper [options] [command] [url]
|
||||
|
||||
Scrapes eBay search results for SSD/HDD cost per TB.
|
||||
|
||||
Arguments:
|
||||
url The full eBay search URL to scrape.
|
||||
|
||||
Options:
|
||||
-V, --version output the version number
|
||||
--save <filename> Save the scraped HTML to a file.
|
||||
--load <filename> Load HTML from a file instead of fetching from eBay (disables network).
|
||||
--only_json Suppress all informational logs and output only the final JSON. (default: false)
|
||||
-h, --help display help for command
|
||||
|
||||
Commands:
|
||||
latest [options] Scrapes the latest listings using a predefined search. Use "ebay-scraper latest --help" to see specific options for this
|
||||
command.
|
||||
|
||||
Example calls:
|
||||
$ ebay-scraper latest --per_page 120 --minimum_cost 50
|
||||
$ ebay-scraper latest --help
|
||||
$ ebay-scraper "https://www.ebay.com/sch/i.html?_nkw=ssd"
|
||||
$ ebay-scraper --load saved_page.html --only_json | jq .
|
||||
$ ebay-scraper --save current_page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"
|
||||
```
|
||||
|
||||
|
||||
```bash
|
||||
greasemonkey on master is 📦 v1.0.0 via ⬢ v23.11.1 at ☸ default
|
||||
➜ yarn --silent scrape latest --help
|
||||
Usage: ebay-scraper latest [options]
|
||||
|
||||
Scrapes the latest listings using a predefined search. Use "ebay-scraper latest --help" to see specific options for this command.
|
||||
|
||||
Options:
|
||||
--per_page <number> Items per page (60, 120, or 240) (default: "60")
|
||||
--minimum_cost <number> Minimum cost for listings (e.g., 50.00) (default: "0.00")
|
||||
-h, --help display help for command
|
||||
```
|
@ -159,6 +159,11 @@ color-name@~1.1.4:
|
||||
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
|
||||
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
|
||||
|
||||
commander@^14.0.0:
|
||||
version "14.0.0"
|
||||
resolved "https://registry.yarnpkg.com/commander/-/commander-14.0.0.tgz#f244fc74a92343514e56229f16ef5c5e22ced5e9"
|
||||
integrity sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==
|
||||
|
||||
cosmiconfig@^9.0.0:
|
||||
version "9.0.0"
|
||||
resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-9.0.0.tgz#34c3fc58287b915f3ae905ab6dc3de258b55ad9d"
|
||||
|
Reference in New Issue
Block a user