Initial CLI support

This commit is contained in:
2025-05-27 23:54:20 -04:00
parent 8043c10df2
commit 411e21ca7a
6 changed files with 1035 additions and 362 deletions

91
ebay_command_line_tool.js Normal file
View File

@ -0,0 +1,91 @@
// ebay_command_line_tool.js
// A Node.js script to scrape eBay search results and output JSON.
// Uses ebay_core.js for parsing and extraction logic.
// Usage: node ebay_command_line_tool.js "EBAY_SEARCH_URL"
const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
// --- Main Scraping Function ---
async function scrapeEbayFromCommandLine(url) {
if (!url) {
console.error("Error: eBay search URL is required as the first argument.");
console.log("Example Usage: node ebay_command_line_tool.js \"https://www.ebay.com/sch/i.html?_nkw=ssd\"");
process.exit(1);
}
// --- Load Core Script ---
// This assumes ebay_core.js is in the same directory as this script.
const coreScriptPath = path.join(__dirname, 'ebay_core.js');
let ebayCoreScriptContent;
try {
ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8');
if (!ebayCoreScriptContent) {
throw new Error("ebay_core.js is empty or could not be read properly.");
}
} catch (e) {
console.error(`Error: Could not read ebay_core.js from ${coreScriptPath}`);
console.error("Please ensure 'ebay_core.js' exists in the same directory as this script.");
console.error(e.message);
process.exit(1);
}
console.log(`Attempting to scrape: ${url}`);
let browser; // Declare browser outside try so it can be closed in finally
try {
browser = await puppeteer.launch({
headless: true, // Set to false for debugging to see the browser
args: ['--no-sandbox', '--disable-setuid-sandbox'] // Common args for server environments
});
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36');
// Increase navigation timeout and wait until network is idle
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
// --- Inject and Execute Core Logic ---
// Inject the core parser script into the page context
// This makes the EbayParser object available in the page's window scope
await page.evaluate(ebayCoreScriptContent);
// Now call the extraction function from the injected script
const extractedResults = await page.evaluate(() => {
// EbayParser should now be available on the window object
if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') {
// This error will be caught by the outer try/catch if thrown
throw new Error("EbayParser or EbayParser.extractDataFromPage function was not properly injected or is missing in ebay_core.js!");
}
return window.EbayParser.extractDataFromPage(); // This calls the function defined in ebay_core.js
});
return extractedResults;
} catch (e) {
console.error("An error occurred during the scraping process:", e.message);
// If running in a visible mode, a screenshot can be helpful.
// if (browser && page) { // Check if page exists
// try {
// await page.screenshot({ path: 'ebay_scraping_error.png' });
// console.log("A screenshot 'ebay_scraping_error.png' has been saved for debugging.");
// } catch(se) { console.error("Could not save screenshot:", se.message); }
// }
return []; // Return empty array on error
} finally {
if (browser) {
await browser.close();
}
}
}
// --- Script Execution ---
// The first actual argument to the script (process.argv[0] is node, process.argv[1] is the script path)
const searchUrl = process.argv[2];
(async () => {
const data = await scrapeEbayFromCommandLine(searchUrl);
if (data && data.length > 0) {
console.log(JSON.stringify(data, null, 2));
} else {
console.log("No data extracted. This could be due to an error, an empty page, or incorrect selectors in ebay_core.js.");
}
})();