92 lines
4.1 KiB
JavaScript
92 lines
4.1 KiB
JavaScript
// ebay_command_line_tool.js
|
|
// A Node.js script to scrape eBay search results and output JSON.
|
|
// Uses ebay_core.js for parsing and extraction logic.
|
|
// Usage: node ebay_command_line_tool.js "EBAY_SEARCH_URL"
|
|
|
|
const puppeteer = require('puppeteer');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
// --- Main Scraping Function ---
|
|
async function scrapeEbayFromCommandLine(url) {
|
|
if (!url) {
|
|
console.error("Error: eBay search URL is required as the first argument.");
|
|
console.log("Example Usage: node ebay_command_line_tool.js \"https://www.ebay.com/sch/i.html?_nkw=ssd\"");
|
|
process.exit(1);
|
|
}
|
|
|
|
// --- Load Core Script ---
|
|
// This assumes ebay_core.js is in the same directory as this script.
|
|
const coreScriptPath = path.join(__dirname, 'ebay_core.js');
|
|
let ebayCoreScriptContent;
|
|
try {
|
|
ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8');
|
|
if (!ebayCoreScriptContent) {
|
|
throw new Error("ebay_core.js is empty or could not be read properly.");
|
|
}
|
|
} catch (e) {
|
|
console.error(`Error: Could not read ebay_core.js from ${coreScriptPath}`);
|
|
console.error("Please ensure 'ebay_core.js' exists in the same directory as this script.");
|
|
console.error(e.message);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`Attempting to scrape: ${url}`);
|
|
let browser; // Declare browser outside try so it can be closed in finally
|
|
try {
|
|
browser = await puppeteer.launch({
|
|
headless: true, // Set to false for debugging to see the browser
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'] // Common args for server environments
|
|
});
|
|
const page = await browser.newPage();
|
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36');
|
|
// Increase navigation timeout and wait until network is idle
|
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
|
|
|
|
// --- Inject and Execute Core Logic ---
|
|
// Inject the core parser script into the page context
|
|
// This makes the EbayParser object available in the page's window scope
|
|
await page.evaluate(ebayCoreScriptContent);
|
|
|
|
// Now call the extraction function from the injected script
|
|
const extractedResults = await page.evaluate(() => {
|
|
// EbayParser should now be available on the window object
|
|
if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') {
|
|
// This error will be caught by the outer try/catch if thrown
|
|
throw new Error("EbayParser or EbayParser.extractDataFromPage function was not properly injected or is missing in ebay_core.js!");
|
|
}
|
|
return window.EbayParser.extractDataFromPage(); // This calls the function defined in ebay_core.js
|
|
});
|
|
|
|
return extractedResults;
|
|
|
|
} catch (e) {
|
|
console.error("An error occurred during the scraping process:", e.message);
|
|
// If running in a visible mode, a screenshot can be helpful.
|
|
// if (browser && page) { // Check if page exists
|
|
// try {
|
|
// await page.screenshot({ path: 'ebay_scraping_error.png' });
|
|
// console.log("A screenshot 'ebay_scraping_error.png' has been saved for debugging.");
|
|
// } catch(se) { console.error("Could not save screenshot:", se.message); }
|
|
// }
|
|
return []; // Return empty array on error
|
|
} finally {
|
|
if (browser) {
|
|
await browser.close();
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- Script Execution ---
|
|
// The first actual argument to the script (process.argv[0] is node, process.argv[1] is the script path)
|
|
const searchUrl = process.argv[2];
|
|
|
|
(async () => {
|
|
const data = await scrapeEbayFromCommandLine(searchUrl);
|
|
if (data && data.length > 0) {
|
|
console.log(JSON.stringify(data, null, 2));
|
|
} else {
|
|
console.log("No data extracted. This could be due to an error, an empty page, or incorrect selectors in ebay_core.js.");
|
|
}
|
|
})();
|