Even more updates (mostly CLI focused)
This commit is contained in:
@ -1,116 +1,171 @@
|
|||||||
// ebay_command_line_tool.js V3
|
// ebay_command_line_tool.js V4.1
|
||||||
// Node.js script with commands to scrape eBay and output JSON.
|
// Node.js script with commands to scrape eBay and output JSON.
|
||||||
// Enhanced with network blocking for --load, --only_json flag, and improved help.
|
// Images are now saved preserving their URL path structure within the save directory.
|
||||||
// Usage: node ebay_command_line_tool.js [command] [options] [url]
|
|
||||||
|
|
||||||
const puppeteer = require('puppeteer');
|
const puppeteer = require('puppeteer');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const { Command } = require('commander');
|
const { Command } = require('commander');
|
||||||
|
const https = require('https'); // For downloading images
|
||||||
|
const http = require('http'); // For downloading images (fallback)
|
||||||
|
const { URL } = require('url'); // For parsing image URLs
|
||||||
|
|
||||||
// --- Load Core Script ---
|
// --- Load Core Script ---
|
||||||
const coreScriptPath = path.join(__dirname, 'ebay_core.js'); // Assumes ebay_core.js is in the same directory
|
const coreScriptPath = path.join(__dirname, 'ebay_core.js');
|
||||||
let ebayCoreScriptContent;
|
let ebayCoreScriptContent;
|
||||||
try {
|
try {
|
||||||
ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8');
|
ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8');
|
||||||
if (!ebayCoreScriptContent) {
|
if (!ebayCoreScriptContent) throw new Error("ebay_core.js is empty.");
|
||||||
throw new Error("ebay_core.js is empty or could not be read properly.");
|
|
||||||
}
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// This initial error should always print, regardless of --only_json
|
console.error(`Critical Error: Could not read ebay_core.js: ${e.message}`);
|
||||||
console.error(`Critical Error: Could not read ebay_core.js from ${coreScriptPath}`);
|
|
||||||
console.error("Please ensure 'ebay_core.js' exists in the same directory as this script.");
|
|
||||||
console.error(e.message);
|
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Global State for --only_json ---
|
|
||||||
let quietMode = false;
|
let quietMode = false;
|
||||||
|
function logMessage(message) { if (!quietMode) console.log(message); }
|
||||||
|
function logError(message) { if (!quietMode) console.error(message); }
|
||||||
|
|
||||||
// --- Logger functions that respect quietMode ---
|
// --- Image Downloading Function (Updated) ---
|
||||||
function logMessage(message) {
|
async function downloadImage(imageUrl, baseSaveDirectory) {
|
||||||
if (!quietMode) {
|
if (!imageUrl) return;
|
||||||
console.log(message);
|
try {
|
||||||
}
|
const parsedUrl = new URL(imageUrl);
|
||||||
}
|
|
||||||
function logError(message) {
|
// Get the full path from the URL (e.g., /images/g/5okAAeSwIGdoN8Ed/s-l500.webp)
|
||||||
if (!quietMode) {
|
// Ensure leading slash is removed for path.join to work as expected relative to baseSaveDirectory
|
||||||
console.error(message);
|
const imagePathFromUrl = parsedUrl.pathname.startsWith('/') ? parsedUrl.pathname.substring(1) : parsedUrl.pathname;
|
||||||
|
|
||||||
|
// Separate the directory part and the filename part from the URL path
|
||||||
|
const imageName = path.basename(imagePathFromUrl);
|
||||||
|
const imageSubdirectory = path.dirname(imagePathFromUrl);
|
||||||
|
|
||||||
|
// Construct the full local directory path
|
||||||
|
const fullLocalDirectory = path.join(baseSaveDirectory, imageSubdirectory);
|
||||||
|
const fullLocalImagePath = path.join(fullLocalDirectory, imageName);
|
||||||
|
|
||||||
|
// Ensure directory exists
|
||||||
|
if (!fs.existsSync(fullLocalDirectory)) {
|
||||||
|
fs.mkdirSync(fullLocalDirectory, { recursive: true });
|
||||||
|
logMessage(`Created image directory: ${fullLocalDirectory}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if file already exists to avoid re-downloading (optional, can be useful)
|
||||||
|
// if (fs.existsSync(fullLocalImagePath)) {
|
||||||
|
// logMessage(`Image already exists, skipping: ${fullLocalImagePath}`);
|
||||||
|
// return Promise.resolve();
|
||||||
|
// }
|
||||||
|
|
||||||
|
const fileStream = fs.createWriteStream(fullLocalImagePath);
|
||||||
|
const protocol = parsedUrl.protocol === 'https:' ? https : http;
|
||||||
|
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const request = protocol.get(imageUrl, (response) => {
|
||||||
|
if (response.statusCode !== 200) {
|
||||||
|
logError(`Failed to download image ${imageUrl}. Status: ${response.statusCode}`);
|
||||||
|
response.resume(); // Consume response data to free up resources
|
||||||
|
reject(new Error(`Status code ${response.statusCode} for ${imageUrl}`));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
response.pipe(fileStream);
|
||||||
|
fileStream.on('finish', () => {
|
||||||
|
fileStream.close(); // close() is async, call resolve after it's done
|
||||||
|
logMessage(`Downloaded image: ${fullLocalImagePath}`);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
fileStream.on('error', (err) => { // Handle stream errors
|
||||||
|
logError(`Error writing image file ${fullLocalImagePath}: ${err.message}`);
|
||||||
|
fs.unlink(fullLocalImagePath, () => {}); // Attempt to delete partial file
|
||||||
|
reject(err);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
request.on('error', (err) => { // Handle request errors
|
||||||
|
logError(`Error downloading image ${imageUrl}: ${err.message}`);
|
||||||
|
// No partial file to unlink here as the request itself failed
|
||||||
|
reject(err);
|
||||||
|
});
|
||||||
|
// Set a timeout for the request
|
||||||
|
request.setTimeout(30000, () => { // 30 seconds timeout
|
||||||
|
request.destroy(); // Destroy the request object on timeout
|
||||||
|
logError(`Timeout downloading image ${imageUrl}`);
|
||||||
|
reject(new Error(`Timeout downloading image ${imageUrl}`));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
logError(`Error processing image URL ${imageUrl}: ${error.message}`);
|
||||||
|
return Promise.reject(error); // Propagate the error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// --- Main Scraping Function (Updated) ---
|
// --- Main Scraping Function ---
|
||||||
async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) {
|
async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) {
|
||||||
logMessage("Starting scraping process...");
|
logMessage("Starting scraping process...");
|
||||||
|
|
||||||
let browser;
|
let browser;
|
||||||
try {
|
try {
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] });
|
||||||
headless: true,
|
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
||||||
});
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36');
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36');
|
||||||
|
|
||||||
let htmlContentToParse;
|
let htmlContentToParse;
|
||||||
|
|
||||||
if (htmlFile) {
|
if (htmlFile) {
|
||||||
// --- Load from File with Network Blocking ---
|
|
||||||
logMessage(`Loading HTML from ${htmlFile}...`);
|
logMessage(`Loading HTML from ${htmlFile}...`);
|
||||||
htmlContentToParse = fs.readFileSync(htmlFile, 'utf8');
|
htmlContentToParse = fs.readFileSync(htmlFile, 'utf8');
|
||||||
|
|
||||||
logMessage("Enabling request interception to block network calls...");
|
|
||||||
await page.setRequestInterception(true);
|
await page.setRequestInterception(true);
|
||||||
page.on('request', (request) => {
|
page.on('request', (request) => { request.abort(); });
|
||||||
// Abort all types of requests
|
|
||||||
logMessage(`Blocking request to: ${request.url()}`);
|
|
||||||
request.abort();
|
|
||||||
});
|
|
||||||
|
|
||||||
await page.setContent(htmlContentToParse, { waitUntil: 'domcontentloaded' });
|
await page.setContent(htmlContentToParse, { waitUntil: 'domcontentloaded' });
|
||||||
logMessage("HTML loaded and set. Network requests are being blocked.");
|
logMessage("HTML loaded. Network requests blocked.");
|
||||||
} else if (url) {
|
} else if (url) {
|
||||||
// --- Fetch from URL ---
|
|
||||||
logMessage(`Navigating to ${url}...`);
|
logMessage(`Navigating to ${url}...`);
|
||||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
|
||||||
logMessage("Navigation successful.");
|
logMessage("Navigation successful.");
|
||||||
htmlContentToParse = await page.content();
|
htmlContentToParse = await page.content();
|
||||||
logMessage("Page content retrieved.");
|
logMessage("Page content retrieved.");
|
||||||
|
|
||||||
// --- Save to File (if requested) ---
|
|
||||||
if (saveFile && htmlContentToParse) {
|
if (saveFile && htmlContentToParse) {
|
||||||
logMessage(`Saving HTML to ${saveFile}...`);
|
logMessage(`Saving HTML to ${saveFile}...`);
|
||||||
fs.writeFileSync(saveFile, htmlContentToParse, 'utf8');
|
fs.writeFileSync(saveFile, htmlContentToParse, 'utf8');
|
||||||
logMessage("HTML saved.");
|
logMessage("HTML saved.");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// This error should be caught and handled by the caller or a try-catch within runScraping
|
throw new Error("Internal Error: Neither URL nor HTML file was provided.");
|
||||||
throw new Error("Internal Error: Neither URL nor HTML file was provided to scrapeEbay.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Inject and Execute Core Logic ---
|
|
||||||
logMessage("Injecting core parser script...");
|
logMessage("Injecting core parser script...");
|
||||||
await page.evaluate(ebayCoreScriptContent);
|
await page.evaluate(ebayCoreScriptContent);
|
||||||
logMessage("Core script injected. Extracting data...");
|
logMessage("Core script injected. Extracting data...");
|
||||||
|
|
||||||
const extractedResults = await page.evaluate(() => {
|
const extractedResults = await page.evaluate(() => {
|
||||||
if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') {
|
if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') {
|
||||||
throw new Error("EbayParser or EbayParser.extractDataFromPage function was not properly injected or is missing in ebay_core.js!");
|
throw new Error("EbayParser not found!");
|
||||||
}
|
}
|
||||||
return window.EbayParser.extractDataFromPage();
|
return window.EbayParser.extractDataFromPage();
|
||||||
});
|
});
|
||||||
logMessage(`Data extraction complete. Found ${extractedResults.length} items.`);
|
logMessage(`Data extraction complete. Found ${extractedResults.length} items.`);
|
||||||
|
|
||||||
return extractedResults;
|
// If HTML was fetched and --save was used, now download images
|
||||||
|
if (url && saveFile && extractedResults.length > 0) {
|
||||||
} catch (e) {
|
const baseSaveName = path.parse(saveFile).name; // e.g., "foo2"
|
||||||
logError(`An error occurred during the scraping process: ${e.message}`);
|
// The main directory for this save operation (e.g., "foo2/")
|
||||||
// For debugging, you might want to see the stack trace even in quiet mode for critical errors
|
const mainImageSaveDirectory = path.join(path.dirname(saveFile), baseSaveName);
|
||||||
if (!quietMode && e.stack) {
|
logMessage(`Downloading images for ${baseSaveName} into subdirectories of ${mainImageSaveDirectory}...`);
|
||||||
console.error(e.stack);
|
|
||||||
|
const downloadPromises = [];
|
||||||
|
for (const item of extractedResults) {
|
||||||
|
if (item.image_url) {
|
||||||
|
// Pass the mainImageSaveDirectory as the base for creating nested structure
|
||||||
|
downloadPromises.push(
|
||||||
|
downloadImage(item.image_url, mainImageSaveDirectory).catch(e => {
|
||||||
|
logError(`Skipping image download for item ID ${item.itemId || 'unknown'} (URL: ${item.image_url}) due to error: ${e.message}`);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await Promise.all(downloadPromises); // Wait for all image downloads to attempt completion
|
||||||
|
logMessage("Image download process finished.");
|
||||||
}
|
}
|
||||||
return []; // Return empty array on error
|
return extractedResults;
|
||||||
|
} catch (e) {
|
||||||
|
logError(`Scraping process error: ${e.message}`);
|
||||||
|
if (!quietMode && e.stack) console.error(e.stack);
|
||||||
|
return [];
|
||||||
} finally {
|
} finally {
|
||||||
if (browser) {
|
if (browser) {
|
||||||
await browser.close();
|
await browser.close();
|
||||||
@ -119,31 +174,26 @@ async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Setup Command Line Interface ---
|
|
||||||
const program = new Command();
|
const program = new Command();
|
||||||
|
|
||||||
program
|
program
|
||||||
.name('ebay-scraper')
|
.name('ebay-scraper')
|
||||||
.description('Scrapes eBay search results for SSD/HDD cost per TB.')
|
.description('Scrapes eBay search results.')
|
||||||
.version('3.0.0')
|
.version('4.1.0') // Version bump
|
||||||
.option('--save <filename>', 'Save the scraped HTML to a file.')
|
.option('--save <filename>', 'Save scraped HTML to a file (and download images if fetching from URL).')
|
||||||
.option('--load <filename>', 'Load HTML from a file instead of fetching from eBay (disables network).')
|
.option('--load <filename>', 'Load HTML from a file (disables network). Image download will not occur with --load.')
|
||||||
.option('--only_json', 'Suppress all informational logs and output only the final JSON.', false)
|
.option('--only_json', 'Suppress informational logs, output only final JSON.', false)
|
||||||
.on('option:only_json', () => {
|
.on('option:only_json', () => { quietMode = true; });
|
||||||
quietMode = true;
|
|
||||||
});
|
|
||||||
|
|
||||||
program
|
program
|
||||||
.command('latest') // Removed { isDefault: false } as it's not strictly needed with argument handling
|
.command('latest')
|
||||||
.description('Scrapes the latest listings using a predefined search. Use "ebay-scraper latest --help" to see specific options for this command.')
|
.description('Scrapes latest listings. Use "ebay-scraper latest --help" for options.')
|
||||||
.option('--per_page <number>', 'Items per page (60, 120, or 240)', '60')
|
.option('--per_page <number>', 'Items per page (60, 120, or 240)', '60')
|
||||||
.option('--minimum_cost <number>', 'Minimum cost for listings (e.g., 50.00)', '0.00')
|
.option('--minimum_cost <number>', 'Minimum cost (e.g., 50.00)', '0.00')
|
||||||
.action(async (cmdOptions) => { // cmdOptions refers to 'latest' command's options
|
.action(async (cmdOptions) => {
|
||||||
const globalOptions = program.opts(); // Access global options like --save, --load, --only_json
|
const globalOptions = program.opts();
|
||||||
if (globalOptions.only_json) quietMode = true; // Ensure quietMode is set if command is run directly
|
if (globalOptions.only_json) quietMode = true;
|
||||||
|
|
||||||
if (globalOptions.load) {
|
if (globalOptions.load) {
|
||||||
logMessage("Using --load, 'latest' command options for URL generation will be ignored.");
|
logMessage("Using --load for 'latest'. URL generation options ignored. Images will not be downloaded.");
|
||||||
await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save });
|
await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save });
|
||||||
} else {
|
} else {
|
||||||
const validPages = ['60', '120', '240'];
|
const validPages = ['60', '120', '240'];
|
||||||
@ -156,7 +206,6 @@ program
|
|||||||
logError("Error: --minimum_cost must be a number.");
|
logError("Error: --minimum_cost must be a number.");
|
||||||
if (!quietMode) process.exit(1); else throw new Error("Invalid minimum_cost");
|
if (!quietMode) process.exit(1); else throw new Error("Invalid minimum_cost");
|
||||||
}
|
}
|
||||||
|
|
||||||
const baseUrl = 'https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10';
|
const baseUrl = 'https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10';
|
||||||
const url = `${baseUrl}&_ipg=${cmdOptions.per_page}&_udlo=${minCost.toFixed(2)}`;
|
const url = `${baseUrl}&_ipg=${cmdOptions.per_page}&_udlo=${minCost.toFixed(2)}`;
|
||||||
logMessage(`Constructed URL for 'latest': ${url}`);
|
logMessage(`Constructed URL for 'latest': ${url}`);
|
||||||
@ -164,88 +213,67 @@ program
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Handle URL as an argument. This will act as the default action if no other command is matched.
|
|
||||||
program
|
program
|
||||||
.argument('[url]', 'The full eBay search URL to scrape.')
|
.argument('[url]', 'The full eBay search URL to scrape.')
|
||||||
.action(async (url, cmdOptions) => { // cmdOptions here are the global ones if no command specified
|
.action(async (url) => {
|
||||||
const globalOptions = program.opts();
|
const globalOptions = program.opts();
|
||||||
if (globalOptions.only_json) quietMode = true;
|
if (globalOptions.only_json) quietMode = true;
|
||||||
|
|
||||||
// If 'url' is undefined here, it means no command and no URL was provided.
|
|
||||||
// 'latest' command has its own action, so this won't run for 'latest'.
|
|
||||||
if (globalOptions.load) {
|
if (globalOptions.load) {
|
||||||
logMessage("Using --load, any provided URL argument will be ignored.");
|
logMessage("Using --load. Provided URL argument ignored. Images will not be downloaded.");
|
||||||
await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save });
|
await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save });
|
||||||
} else if (url) {
|
} else if (url) {
|
||||||
await runScraping({ url: url, saveFile: globalOptions.save });
|
await runScraping({ url: url, saveFile: globalOptions.save });
|
||||||
} else {
|
} else {
|
||||||
// If no URL, no --load, and not the 'latest' command, show help.
|
// If no URL, no --load, and not the 'latest' command, show help.
|
||||||
// This condition means no specific action was determined.
|
// Check if 'latest' was an argument. If so, commander handles its action.
|
||||||
if (!program.args.find(arg => program.commands.map(c => c.name()).includes(arg))) {
|
// If not, and no URL, then show help.
|
||||||
|
const isLatestCommand = process.argv.includes('latest');
|
||||||
|
if (!isLatestCommand) {
|
||||||
program.help();
|
program.help();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add help text to guide users for subcommand help
|
|
||||||
program.addHelpText('after', `
|
program.addHelpText('after', `
|
||||||
Example calls:
|
Example calls:
|
||||||
$ ebay-scraper latest --per_page 120 --minimum_cost 50
|
$ ebay-scraper latest --per_page 120
|
||||||
$ ebay-scraper latest --help
|
|
||||||
$ ebay-scraper "https://www.ebay.com/sch/i.html?_nkw=ssd"
|
$ ebay-scraper "https://www.ebay.com/sch/i.html?_nkw=ssd"
|
||||||
$ ebay-scraper --load saved_page.html --only_json | jq .
|
$ ebay-scraper --load page.html --only_json | jq .
|
||||||
$ ebay-scraper --save current_page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"`);
|
$ ebay-scraper --save page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"`);
|
||||||
|
|
||||||
|
|
||||||
// --- Wrapper to run scraping and print results ---
|
|
||||||
async function runScraping(options) {
|
async function runScraping(options) {
|
||||||
try {
|
try {
|
||||||
const data = await scrapeEbay(options);
|
const data = await scrapeEbay(options);
|
||||||
if (quietMode) {
|
if (quietMode) {
|
||||||
// Only output JSON string, no extra newlines or messages
|
|
||||||
process.stdout.write(JSON.stringify(data, null, 2));
|
process.stdout.write(JSON.stringify(data, null, 2));
|
||||||
} else {
|
} else {
|
||||||
if (data && data.length > 0) {
|
if (data && data.length > 0) console.log(JSON.stringify(data, null, 2));
|
||||||
console.log(JSON.stringify(data, null, 2));
|
else logMessage("No data extracted or a critical error occurred.");
|
||||||
} else {
|
|
||||||
logMessage("No data extracted or a critical error occurred during scraping.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logError(`Critical error in runScraping: ${e.message}`);
|
logError(`Critical error in runScraping: ${e.message}`);
|
||||||
if (!quietMode && e.stack) console.error(e.stack);
|
if (!quietMode && e.stack) console.error(e.stack);
|
||||||
if (quietMode) { // Ensure valid JSON output even on error for piping
|
if (quietMode) process.stdout.write(JSON.stringify({error: e.message, data: []}));
|
||||||
process.stdout.write(JSON.stringify({error: e.message, data: []}));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Parse Arguments and Run ---
|
|
||||||
(async () => {
|
(async () => {
|
||||||
try {
|
try {
|
||||||
await program.parseAsync(process.argv);
|
await program.parseAsync(process.argv);
|
||||||
// If no command was matched by commander and no URL argument was given,
|
// If no command was specified and no URL, Commander's default help might not trigger if only options are present.
|
||||||
// and it's not just options like --version or --help that commander handles.
|
// This ensures help is shown if no actionable arguments are given.
|
||||||
const knownCommands = program.commands.map(cmd => cmd.name());
|
const args = process.argv.slice(2);
|
||||||
const userArgs = process.argv.slice(2);
|
const hasActionableArg = args.some(arg => !arg.startsWith('-') || program.commands.some(cmd => cmd.name() === arg));
|
||||||
const potentialCommand = userArgs.find(arg => !arg.startsWith('-'));
|
if (args.length > 0 && !hasActionableArg && !program.opts().load) { // If only options like --only_json but no command/url/load
|
||||||
|
program.help();
|
||||||
if (userArgs.length > 0 && !knownCommands.includes(potentialCommand) && !program.args.includes(potentialCommand) && !program.opts().load && potentialCommand) {
|
} else if (args.length === 0) { // No arguments at all
|
||||||
// This case handles if a user types something that isn't a command or a URL after options.
|
|
||||||
// Example: `node script.js --only_json somegibberish`
|
|
||||||
// However, the default argument [url] should catch most of these.
|
|
||||||
// If a URL-like string is passed, it will be caught by the .argument('[url]') action.
|
|
||||||
} else if (process.argv.slice(2).length === 0) { // No arguments at all
|
|
||||||
program.help();
|
program.help();
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logError(`Command parsing error: ${error.message}`);
|
logError(`Command parsing error: ${error.message}`);
|
||||||
if (!quietMode && error.stack) console.error(error.stack);
|
if (!quietMode && error.stack) console.error(error.stack);
|
||||||
if (quietMode) {
|
if (quietMode) process.stdout.write(JSON.stringify({error: error.message, data: []}));
|
||||||
process.stdout.write(JSON.stringify({error: error.message, data: []}));
|
else process.exit(1);
|
||||||
} else {
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
145
ebay_core.js
145
ebay_core.js
@ -1,5 +1,7 @@
|
|||||||
// ebay_core.js V1.1 - Shared Parsing & Extraction Logic
|
// ebay_core.js V1.4 - Shared Parsing & Extraction Logic
|
||||||
// Added itemCount and sizePerItemTB to output.
|
// - Restructured JSON output with a "parsed" sub-object.
|
||||||
|
// - Added parser_engine version.
|
||||||
|
// - Removed itemUrl, added image_url.
|
||||||
(function (root, factory) {
|
(function (root, factory) {
|
||||||
if (typeof module === 'object' && module.exports) {
|
if (typeof module === 'object' && module.exports) {
|
||||||
module.exports = factory();
|
module.exports = factory();
|
||||||
@ -10,13 +12,14 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const EbayParser = {};
|
const EbayParser = {};
|
||||||
|
const PARSER_ENGINE_VERSION = 1;
|
||||||
|
|
||||||
EbayParser.parseSizeAndQuantity = function(title) {
|
EbayParser.parseSizeAndQuantity = function(title) {
|
||||||
title = title ? title.toUpperCase() : "";
|
title = title ? title.toUpperCase() : "";
|
||||||
let totalTB = 0;
|
let totalTB = 0;
|
||||||
let quantity = 1;
|
let quantity = 1;
|
||||||
let needed_description_check = false;
|
let needed_description_check = false;
|
||||||
let individualSizeTB = 0; // Will hold the size per item
|
let individualSizeTB = 0;
|
||||||
|
|
||||||
const explicitQtyPatterns = [
|
const explicitQtyPatterns = [
|
||||||
/\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?/i,
|
/\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?/i,
|
||||||
@ -49,7 +52,7 @@
|
|||||||
sizeMatches.map(sm => sm.unit === 'GB' ? sm.value / 1000 : sm.value)
|
sizeMatches.map(sm => sm.unit === 'GB' ? sm.value / 1000 : sm.value)
|
||||||
)].sort((a, b) => a - b);
|
)].sort((a, b) => a - b);
|
||||||
if (uniqueSizesTB.length > 0) {
|
if (uniqueSizesTB.length > 0) {
|
||||||
individualSizeTB = uniqueSizesTB[0]; // Set individual size
|
individualSizeTB = uniqueSizesTB[0];
|
||||||
if (uniqueSizesTB.length > 1) needed_description_check = true;
|
if (uniqueSizesTB.length > 1) needed_description_check = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -71,21 +74,26 @@
|
|||||||
if (quantity > 1 && totalTB === 0) {
|
if (quantity > 1 && totalTB === 0) {
|
||||||
needed_description_check = true;
|
needed_description_check = true;
|
||||||
}
|
}
|
||||||
if (quantity === 1 && sizeMatches.length === 1 && !needed_description_check) {
|
if (quantity === 1 && sizeMatches.length === 1 && !needed_description_check) {
|
||||||
needed_description_check = false;
|
needed_description_check = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
totalTB: parseFloat(totalTB.toFixed(4)),
|
totalTB: parseFloat(totalTB.toFixed(4)),
|
||||||
quantity: quantity, // Renamed to 'quantity' internally, maps to 'itemCount'
|
quantity: quantity,
|
||||||
needed_description_check: needed_description_check,
|
needed_description_check: needed_description_check,
|
||||||
individualSizeTB: parseFloat(individualSizeTB.toFixed(4)) // Added size per item
|
individualSizeTB: parseFloat(individualSizeTB.toFixed(4))
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
EbayParser.parsePrice = function(priceText) { /* ... (Keep existing parsePrice function) ... */
|
EbayParser.parsePrice = function(priceText) {
|
||||||
priceText = priceText || "";
|
priceText = priceText || "";
|
||||||
if (priceText.toLowerCase().includes(' to ')) {
|
if (priceText.toLowerCase().includes(' to ')) {
|
||||||
|
const rangeParts = priceText.split(/to/i);
|
||||||
|
const firstPriceMatch = rangeParts[0] ? rangeParts[0].match(/\$?([\d,]+\.?\d*)/) : null;
|
||||||
|
if (firstPriceMatch) {
|
||||||
|
return parseFloat(firstPriceMatch[1].replace(/,/g, ''));
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
const priceMatch = priceText.match(/\$?([\d,]+\.?\d*)/);
|
const priceMatch = priceText.match(/\$?([\d,]+\.?\d*)/);
|
||||||
@ -95,14 +103,11 @@
|
|||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
EbayParser.runUnitTests = function() { /* ... (Keep existing runUnitTests function) ... */
|
EbayParser.runUnitTests = function() {
|
||||||
// Ensure console exists (for Node vs Browser safety, though Node has it)
|
|
||||||
const log = typeof console !== 'undefined' ? console.log : function() {};
|
const log = typeof console !== 'undefined' ? console.log : function() {};
|
||||||
const error = typeof console !== 'undefined' ? console.error : function() {};
|
const error = typeof console !== 'undefined' ? console.error : function() {};
|
||||||
|
|
||||||
log("Ebay Cost/TB: --- Running Unit Tests ---");
|
log("Ebay Cost/TB: --- Running Unit Tests ---");
|
||||||
const testCases = [
|
const testCases = [
|
||||||
// Add expected individualSizeTB to tests
|
|
||||||
{ title: "LOT OF (9) MAJOR BRAND 2.5\" 7MM SSD * Kingston, Samsung, SanDisk& PNY*120-250GB", expected: { totalTB: 1.080, quantity: 9, individualSizeTB: 0.120, needed_description_check: true } },
|
{ title: "LOT OF (9) MAJOR BRAND 2.5\" 7MM SSD * Kingston, Samsung, SanDisk& PNY*120-250GB", expected: { totalTB: 1.080, quantity: 9, individualSizeTB: 0.120, needed_description_check: true } },
|
||||||
{ title: "Lot of 10 Intel 256 GB 2.5\" SATA SSD different Model check the Description", expected: { totalTB: 2.560, quantity: 10, individualSizeTB: 0.256, needed_description_check: true } },
|
{ title: "Lot of 10 Intel 256 GB 2.5\" SATA SSD different Model check the Description", expected: { totalTB: 2.560, quantity: 10, individualSizeTB: 0.256, needed_description_check: true } },
|
||||||
{ title: "Bulk 5 Lot Samsung 870 EVO 500GB SSD SATA - Used - Tested Passed Smart Test", expected: { totalTB: 2.500, quantity: 5, individualSizeTB: 0.500, needed_description_check: false } },
|
{ title: "Bulk 5 Lot Samsung 870 EVO 500GB SSD SATA - Used - Tested Passed Smart Test", expected: { totalTB: 2.500, quantity: 5, individualSizeTB: 0.500, needed_description_check: false } },
|
||||||
@ -110,32 +115,26 @@
|
|||||||
{ title: "Micron 5100 MAX 1.84TB SATA 6Gb/s 2.5\" SSD MTFDDAK1T9TCC-1AR1ZABYY", expected: { totalTB: 1.84, quantity: 1, individualSizeTB: 1.84, needed_description_check: false } },
|
{ title: "Micron 5100 MAX 1.84TB SATA 6Gb/s 2.5\" SSD MTFDDAK1T9TCC-1AR1ZABYY", expected: { totalTB: 1.84, quantity: 1, individualSizeTB: 1.84, needed_description_check: false } },
|
||||||
{ title: "10-PACK 1TB SSD", expected: { totalTB: 10.0, quantity: 10, individualSizeTB: 1.0, needed_description_check: false } },
|
{ title: "10-PACK 1TB SSD", expected: { totalTB: 10.0, quantity: 10, individualSizeTB: 1.0, needed_description_check: false } },
|
||||||
];
|
];
|
||||||
|
|
||||||
let testsPassed = 0;
|
let testsPassed = 0;
|
||||||
let testsFailed = 0;
|
let testsFailed = 0;
|
||||||
|
|
||||||
testCases.forEach((test, index) => {
|
testCases.forEach((test, index) => {
|
||||||
const result = EbayParser.parseSizeAndQuantity(test.title);
|
const result = EbayParser.parseSizeAndQuantity(test.title);
|
||||||
const tbCheck = Math.abs(result.totalTB - test.expected.totalTB) < 0.0001;
|
const tbCheck = Math.abs(result.totalTB - test.expected.totalTB) < 0.0001;
|
||||||
const qCheck = result.quantity === test.expected.quantity;
|
const qCheck = result.quantity === test.expected.quantity;
|
||||||
const sizeCheck = Math.abs(result.individualSizeTB - test.expected.individualSizeTB) < 0.0001;
|
const sizeCheck = Math.abs(result.individualSizeTB - test.expected.individualSizeTB) < 0.0001;
|
||||||
const needCheck = result.needed_description_check === test.expected.needed_description_check;
|
const needCheck = result.needed_description_check === test.expected.needed_description_check;
|
||||||
|
if (tbCheck && qCheck && sizeCheck && needCheck) testsPassed++;
|
||||||
if (tbCheck && qCheck && sizeCheck && needCheck) {
|
else {
|
||||||
testsPassed++;
|
|
||||||
} else {
|
|
||||||
error(`Test ${index + 1}: FAILED - "${test.title}"`);
|
error(`Test ${index + 1}: FAILED - "${test.title}"`);
|
||||||
error(` Expected: TTB=${test.expected.totalTB.toFixed(4)}, Q=${test.expected.quantity}, STB=${test.expected.individualSizeTB.toFixed(4)}, Check=${test.expected.needed_description_check}`);
|
error(` Expected: TTB=${test.expected.totalTB.toFixed(4)}, Q=${test.expected.quantity}, STB=${test.expected.individualSizeTB.toFixed(4)}, Check=${test.expected.needed_description_check}`);
|
||||||
error(` Actual: TTB=${result.totalTB.toFixed(4)}, Q=${result.quantity}, STB=${result.individualSizeTB.toFixed(4)}, Check=${result.needed_description_check}`);
|
error(` Actual: TTB=${result.totalTB.toFixed(4)}, Q=${result.quantity}, STB=${result.individualSizeTB.toFixed(4)}, Check=${result.needed_description_check}`);
|
||||||
testsFailed++;
|
testsFailed++;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
log(`--- Unit Test Summary: ${testsPassed} Passed, ${testsFailed} Failed ---`);
|
log(`--- Unit Test Summary: ${testsPassed} Passed, ${testsFailed} Failed ---`);
|
||||||
return testsFailed === 0;
|
return testsFailed === 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Updated to include itemCount and sizePerItemTB
|
|
||||||
EbayParser.extractDataFromPage = function() {
|
EbayParser.extractDataFromPage = function() {
|
||||||
const itemSelector = 'li.s-item, li.srp-results__item, div.s-item[role="listitem"]';
|
const itemSelector = 'li.s-item, li.srp-results__item, div.s-item[role="listitem"]';
|
||||||
const itemElements = document.querySelectorAll(itemSelector);
|
const itemElements = document.querySelectorAll(itemSelector);
|
||||||
@ -144,44 +143,104 @@
|
|||||||
|
|
||||||
itemElements.forEach(item => {
|
itemElements.forEach(item => {
|
||||||
const titleElement = item.querySelector('.s-item__title, .srp-results__title');
|
const titleElement = item.querySelector('.s-item__title, .srp-results__title');
|
||||||
const priceElement = item.querySelector('.s-item__price, .srp-results__price');
|
const priceElement = item.querySelector('.s-item__price');
|
||||||
const linkElement = item.querySelector('.s-item__link, a[href*="/itm/"]');
|
// const linkElement = item.querySelector('.s-item__link, a[href*="/itm/"]'); // Not used for itemUrl anymore
|
||||||
|
const imageElement = item.querySelector('.s-item__image-wrapper img.s-item__image-img, .s-item__image img'); // Common image selectors
|
||||||
|
|
||||||
const title = titleElement ? titleElement.innerText.trim() : null;
|
let rawTitle = titleElement ? titleElement.innerText.trim() : null;
|
||||||
const priceText = priceElement ? priceElement.innerText.trim() : null;
|
const priceText = priceElement ? priceElement.innerText.trim() : null;
|
||||||
const itemUrl = linkElement ? linkElement.href : null;
|
// const itemUrl = linkElement ? linkElement.href : null; // Removed
|
||||||
|
|
||||||
if (!title || !priceText || !itemUrl) return;
|
// Try to get image URL, prefer data-src for lazy-loaded images, fallback to src
|
||||||
|
let imageUrl = null;
|
||||||
|
if (imageElement) {
|
||||||
|
imageUrl = imageElement.dataset.src || imageElement.getAttribute('src');
|
||||||
|
}
|
||||||
|
|
||||||
const listingPrice = EbayParser.parsePrice(priceText);
|
|
||||||
const parsedInfo = EbayParser.parseSizeAndQuantity(title);
|
if (!rawTitle || !priceText) return; // Item ID is now critical, URL was for item ID
|
||||||
|
|
||||||
|
let cleanedTitle = rawTitle;
|
||||||
|
const newListingRegex = /^\s*NEW LISTING\s*[:\-\s]*/i;
|
||||||
|
if (newListingRegex.test(cleanedTitle)) {
|
||||||
|
cleanedTitle = rawTitle.replace(newListingRegex, "").trim();
|
||||||
|
} else if (newListingRegex.test(rawTitle)) {
|
||||||
|
cleanedTitle = rawTitle.replace(newListingRegex, "").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
const primaryDisplayPrice = EbayParser.parsePrice(priceText);
|
||||||
|
|
||||||
|
let currentBidPrice = null;
|
||||||
|
let finalBuyItNowPrice = null;
|
||||||
|
let hasBestOffer = false;
|
||||||
|
let itemIsAuction = false;
|
||||||
|
|
||||||
|
const bidCountElement = item.querySelector('.s-item__bid-count');
|
||||||
|
if (bidCountElement && bidCountElement.innerText.toLowerCase().includes('bid')) {
|
||||||
|
itemIsAuction = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bestOfferElement = item.querySelector('.s-item__purchase-options--bo, .s-item__best-offer');
|
||||||
|
if (bestOfferElement) {
|
||||||
|
hasBestOffer = true;
|
||||||
|
} else {
|
||||||
|
const secondaryInfoElements = item.querySelectorAll('.s-item__subtitle, .s-item__secondary-text, .s-item__detail--secondary');
|
||||||
|
secondaryInfoElements.forEach(el => {
|
||||||
|
if (el.innerText.toLowerCase().includes('or best offer')) {
|
||||||
|
hasBestOffer = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (itemIsAuction) {
|
||||||
|
currentBidPrice = primaryDisplayPrice;
|
||||||
|
const auctionBinPriceElement = item.querySelector('.s-item__buy-it-now-price');
|
||||||
|
if (auctionBinPriceElement) {
|
||||||
|
finalBuyItNowPrice = EbayParser.parsePrice(auctionBinPriceElement.innerText);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
finalBuyItNowPrice = primaryDisplayPrice;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsedInfo = EbayParser.parseSizeAndQuantity(cleanedTitle);
|
||||||
const totalTB = parsedInfo.totalTB;
|
const totalTB = parsedInfo.totalTB;
|
||||||
const quantity = parsedInfo.quantity; // Get quantity
|
const quantity = parsedInfo.quantity;
|
||||||
const individualSizeTB = parsedInfo.individualSizeTB; // Get individual size
|
const individualSizeTB = parsedInfo.individualSizeTB;
|
||||||
const needed_description_check = parsedInfo.needed_description_check;
|
const needed_description_check = parsedInfo.needed_description_check;
|
||||||
|
|
||||||
let costPerTB = null;
|
let costPerTB = null;
|
||||||
if (listingPrice !== null && totalTB > 0) {
|
if (primaryDisplayPrice !== null && totalTB > 0) {
|
||||||
costPerTB = listingPrice / totalTB;
|
costPerTB = primaryDisplayPrice / totalTB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract Item ID from the item's link (still need a link element for this)
|
||||||
let itemId = null;
|
let itemId = null;
|
||||||
const itemMatch = itemUrl.match(/\/itm\/(\d+)/);
|
const linkForIdElement = item.querySelector('a.s-item__link[href*="/itm/"], .s-item__info > a[href*="/itm/"]');
|
||||||
if (itemMatch && itemMatch[1]) {
|
if (linkForIdElement && linkForIdElement.href) {
|
||||||
itemId = itemMatch[1];
|
const itemMatch = linkForIdElement.href.match(/\/itm\/(\d+)/);
|
||||||
|
if (itemMatch && itemMatch[1]) {
|
||||||
|
itemId = itemMatch[1];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if(!itemId) return; // Skip if no item ID can be found, as it's crucial
|
||||||
|
|
||||||
items.push({
|
items.push({
|
||||||
title,
|
title: cleanedTitle,
|
||||||
itemId,
|
itemId: itemId, // Crucial
|
||||||
dateFound: today,
|
dateFound: today,
|
||||||
listingPrice,
|
currentBidPrice: currentBidPrice,
|
||||||
itemCount: quantity, // <-- Added
|
buyItNowPrice: finalBuyItNowPrice,
|
||||||
sizePerItemTB: individualSizeTB > 0 ? parseFloat(individualSizeTB.toFixed(3)) : null, // <-- Added
|
hasBestOffer: hasBestOffer,
|
||||||
totalTB: totalTB > 0 ? parseFloat(totalTB.toFixed(3)) : null,
|
image_url: imageUrl, // <-- Added
|
||||||
costPerTB: costPerTB !== null ? parseFloat(costPerTB.toFixed(2)) : null,
|
parsed: { // <-- Nested object
|
||||||
needed_description_check,
|
itemCount: quantity,
|
||||||
itemUrl
|
sizePerItemTB: individualSizeTB > 0 ? parseFloat(individualSizeTB.toFixed(3)) : null,
|
||||||
|
totalTB: totalTB > 0 ? parseFloat(totalTB.toFixed(3)) : null,
|
||||||
|
costPerTB: costPerTB !== null ? parseFloat(costPerTB.toFixed(2)) : null,
|
||||||
|
needed_description_check: needed_description_check,
|
||||||
|
parser_engine: PARSER_ENGINE_VERSION // <-- Added
|
||||||
|
}
|
||||||
|
// itemUrl: itemUrl, // <-- Removed
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
return items;
|
return items;
|
||||||
|
Reference in New Issue
Block a user