From 89bd668a9cb831637a35656b14095531ddfcc03d Mon Sep 17 00:00:00 2001 From: hak8or Date: Wed, 28 May 2025 23:04:08 -0400 Subject: [PATCH] Even more updates (mostly CLI focused) --- ebay_command_line_tool.js | 258 +++++++++++++++++++++----------------- ebay_core.js | 145 ++++++++++++++------- 2 files changed, 245 insertions(+), 158 deletions(-) diff --git a/ebay_command_line_tool.js b/ebay_command_line_tool.js index 4ed98a3..91282e6 100644 --- a/ebay_command_line_tool.js +++ b/ebay_command_line_tool.js @@ -1,116 +1,171 @@ -// ebay_command_line_tool.js V3 +// ebay_command_line_tool.js V4.1 // Node.js script with commands to scrape eBay and output JSON. -// Enhanced with network blocking for --load, --only_json flag, and improved help. -// Usage: node ebay_command_line_tool.js [command] [options] [url] +// Images are now saved preserving their URL path structure within the save directory. const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); const { Command } = require('commander'); +const https = require('https'); // For downloading images +const http = require('http'); // For downloading images (fallback) +const { URL } = require('url'); // For parsing image URLs // --- Load Core Script --- -const coreScriptPath = path.join(__dirname, 'ebay_core.js'); // Assumes ebay_core.js is in the same directory +const coreScriptPath = path.join(__dirname, 'ebay_core.js'); let ebayCoreScriptContent; try { ebayCoreScriptContent = fs.readFileSync(coreScriptPath, 'utf8'); - if (!ebayCoreScriptContent) { - throw new Error("ebay_core.js is empty or could not be read properly."); - } + if (!ebayCoreScriptContent) throw new Error("ebay_core.js is empty."); } catch (e) { - // This initial error should always print, regardless of --only_json - console.error(`Critical Error: Could not read ebay_core.js from ${coreScriptPath}`); - console.error("Please ensure 'ebay_core.js' exists in the same directory as this script."); - console.error(e.message); + console.error(`Critical Error: Could not read ebay_core.js: ${e.message}`); process.exit(1); } -// --- Global State for --only_json --- let quietMode = false; +function logMessage(message) { if (!quietMode) console.log(message); } +function logError(message) { if (!quietMode) console.error(message); } -// --- Logger functions that respect quietMode --- -function logMessage(message) { - if (!quietMode) { - console.log(message); - } -} -function logError(message) { - if (!quietMode) { - console.error(message); +// --- Image Downloading Function (Updated) --- +async function downloadImage(imageUrl, baseSaveDirectory) { + if (!imageUrl) return; + try { + const parsedUrl = new URL(imageUrl); + + // Get the full path from the URL (e.g., /images/g/5okAAeSwIGdoN8Ed/s-l500.webp) + // Ensure leading slash is removed for path.join to work as expected relative to baseSaveDirectory + const imagePathFromUrl = parsedUrl.pathname.startsWith('/') ? parsedUrl.pathname.substring(1) : parsedUrl.pathname; + + // Separate the directory part and the filename part from the URL path + const imageName = path.basename(imagePathFromUrl); + const imageSubdirectory = path.dirname(imagePathFromUrl); + + // Construct the full local directory path + const fullLocalDirectory = path.join(baseSaveDirectory, imageSubdirectory); + const fullLocalImagePath = path.join(fullLocalDirectory, imageName); + + // Ensure directory exists + if (!fs.existsSync(fullLocalDirectory)) { + fs.mkdirSync(fullLocalDirectory, { recursive: true }); + logMessage(`Created image directory: ${fullLocalDirectory}`); + } + + // Check if file already exists to avoid re-downloading (optional, can be useful) + // if (fs.existsSync(fullLocalImagePath)) { + // logMessage(`Image already exists, skipping: ${fullLocalImagePath}`); + // return Promise.resolve(); + // } + + const fileStream = fs.createWriteStream(fullLocalImagePath); + const protocol = parsedUrl.protocol === 'https:' ? https : http; + + return new Promise((resolve, reject) => { + const request = protocol.get(imageUrl, (response) => { + if (response.statusCode !== 200) { + logError(`Failed to download image ${imageUrl}. Status: ${response.statusCode}`); + response.resume(); // Consume response data to free up resources + reject(new Error(`Status code ${response.statusCode} for ${imageUrl}`)); + return; + } + response.pipe(fileStream); + fileStream.on('finish', () => { + fileStream.close(); // close() is async, call resolve after it's done + logMessage(`Downloaded image: ${fullLocalImagePath}`); + resolve(); + }); + fileStream.on('error', (err) => { // Handle stream errors + logError(`Error writing image file ${fullLocalImagePath}: ${err.message}`); + fs.unlink(fullLocalImagePath, () => {}); // Attempt to delete partial file + reject(err); + }); + }); + request.on('error', (err) => { // Handle request errors + logError(`Error downloading image ${imageUrl}: ${err.message}`); + // No partial file to unlink here as the request itself failed + reject(err); + }); + // Set a timeout for the request + request.setTimeout(30000, () => { // 30 seconds timeout + request.destroy(); // Destroy the request object on timeout + logError(`Timeout downloading image ${imageUrl}`); + reject(new Error(`Timeout downloading image ${imageUrl}`)); + }); + }); + } catch (error) { + logError(`Error processing image URL ${imageUrl}: ${error.message}`); + return Promise.reject(error); // Propagate the error } } -// --- Main Scraping Function (Updated) --- +// --- Main Scraping Function --- async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) { logMessage("Starting scraping process..."); - let browser; try { - browser = await puppeteer.launch({ - headless: true, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }); + browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'] }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'); let htmlContentToParse; - if (htmlFile) { - // --- Load from File with Network Blocking --- logMessage(`Loading HTML from ${htmlFile}...`); htmlContentToParse = fs.readFileSync(htmlFile, 'utf8'); - - logMessage("Enabling request interception to block network calls..."); await page.setRequestInterception(true); - page.on('request', (request) => { - // Abort all types of requests - logMessage(`Blocking request to: ${request.url()}`); - request.abort(); - }); - + page.on('request', (request) => { request.abort(); }); await page.setContent(htmlContentToParse, { waitUntil: 'domcontentloaded' }); - logMessage("HTML loaded and set. Network requests are being blocked."); + logMessage("HTML loaded. Network requests blocked."); } else if (url) { - // --- Fetch from URL --- logMessage(`Navigating to ${url}...`); await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 }); logMessage("Navigation successful."); htmlContentToParse = await page.content(); logMessage("Page content retrieved."); - - // --- Save to File (if requested) --- if (saveFile && htmlContentToParse) { logMessage(`Saving HTML to ${saveFile}...`); fs.writeFileSync(saveFile, htmlContentToParse, 'utf8'); logMessage("HTML saved."); } } else { - // This error should be caught and handled by the caller or a try-catch within runScraping - throw new Error("Internal Error: Neither URL nor HTML file was provided to scrapeEbay."); + throw new Error("Internal Error: Neither URL nor HTML file was provided."); } - // --- Inject and Execute Core Logic --- logMessage("Injecting core parser script..."); await page.evaluate(ebayCoreScriptContent); logMessage("Core script injected. Extracting data..."); - const extractedResults = await page.evaluate(() => { if (typeof window.EbayParser === 'undefined' || typeof window.EbayParser.extractDataFromPage !== 'function') { - throw new Error("EbayParser or EbayParser.extractDataFromPage function was not properly injected or is missing in ebay_core.js!"); + throw new Error("EbayParser not found!"); } return window.EbayParser.extractDataFromPage(); }); logMessage(`Data extraction complete. Found ${extractedResults.length} items.`); - return extractedResults; - - } catch (e) { - logError(`An error occurred during the scraping process: ${e.message}`); - // For debugging, you might want to see the stack trace even in quiet mode for critical errors - if (!quietMode && e.stack) { - console.error(e.stack); + // If HTML was fetched and --save was used, now download images + if (url && saveFile && extractedResults.length > 0) { + const baseSaveName = path.parse(saveFile).name; // e.g., "foo2" + // The main directory for this save operation (e.g., "foo2/") + const mainImageSaveDirectory = path.join(path.dirname(saveFile), baseSaveName); + logMessage(`Downloading images for ${baseSaveName} into subdirectories of ${mainImageSaveDirectory}...`); + + const downloadPromises = []; + for (const item of extractedResults) { + if (item.image_url) { + // Pass the mainImageSaveDirectory as the base for creating nested structure + downloadPromises.push( + downloadImage(item.image_url, mainImageSaveDirectory).catch(e => { + logError(`Skipping image download for item ID ${item.itemId || 'unknown'} (URL: ${item.image_url}) due to error: ${e.message}`); + }) + ); + } + } + await Promise.all(downloadPromises); // Wait for all image downloads to attempt completion + logMessage("Image download process finished."); } - return []; // Return empty array on error + return extractedResults; + } catch (e) { + logError(`Scraping process error: ${e.message}`); + if (!quietMode && e.stack) console.error(e.stack); + return []; } finally { if (browser) { await browser.close(); @@ -119,31 +174,26 @@ async function scrapeEbay({ url = null, htmlFile = null, saveFile = null }) { } } -// --- Setup Command Line Interface --- const program = new Command(); - program .name('ebay-scraper') - .description('Scrapes eBay search results for SSD/HDD cost per TB.') - .version('3.0.0') - .option('--save ', 'Save the scraped HTML to a file.') - .option('--load ', 'Load HTML from a file instead of fetching from eBay (disables network).') - .option('--only_json', 'Suppress all informational logs and output only the final JSON.', false) - .on('option:only_json', () => { - quietMode = true; - }); + .description('Scrapes eBay search results.') + .version('4.1.0') // Version bump + .option('--save ', 'Save scraped HTML to a file (and download images if fetching from URL).') + .option('--load ', 'Load HTML from a file (disables network). Image download will not occur with --load.') + .option('--only_json', 'Suppress informational logs, output only final JSON.', false) + .on('option:only_json', () => { quietMode = true; }); program - .command('latest') // Removed { isDefault: false } as it's not strictly needed with argument handling - .description('Scrapes the latest listings using a predefined search. Use "ebay-scraper latest --help" to see specific options for this command.') + .command('latest') + .description('Scrapes latest listings. Use "ebay-scraper latest --help" for options.') .option('--per_page ', 'Items per page (60, 120, or 240)', '60') - .option('--minimum_cost ', 'Minimum cost for listings (e.g., 50.00)', '0.00') - .action(async (cmdOptions) => { // cmdOptions refers to 'latest' command's options - const globalOptions = program.opts(); // Access global options like --save, --load, --only_json - if (globalOptions.only_json) quietMode = true; // Ensure quietMode is set if command is run directly - + .option('--minimum_cost ', 'Minimum cost (e.g., 50.00)', '0.00') + .action(async (cmdOptions) => { + const globalOptions = program.opts(); + if (globalOptions.only_json) quietMode = true; if (globalOptions.load) { - logMessage("Using --load, 'latest' command options for URL generation will be ignored."); + logMessage("Using --load for 'latest'. URL generation options ignored. Images will not be downloaded."); await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save }); } else { const validPages = ['60', '120', '240']; @@ -156,7 +206,6 @@ program logError("Error: --minimum_cost must be a number."); if (!quietMode) process.exit(1); else throw new Error("Invalid minimum_cost"); } - const baseUrl = 'https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10'; const url = `${baseUrl}&_ipg=${cmdOptions.per_page}&_udlo=${minCost.toFixed(2)}`; logMessage(`Constructed URL for 'latest': ${url}`); @@ -164,88 +213,67 @@ program } }); -// Handle URL as an argument. This will act as the default action if no other command is matched. program .argument('[url]', 'The full eBay search URL to scrape.') - .action(async (url, cmdOptions) => { // cmdOptions here are the global ones if no command specified + .action(async (url) => { const globalOptions = program.opts(); if (globalOptions.only_json) quietMode = true; - - // If 'url' is undefined here, it means no command and no URL was provided. - // 'latest' command has its own action, so this won't run for 'latest'. if (globalOptions.load) { - logMessage("Using --load, any provided URL argument will be ignored."); + logMessage("Using --load. Provided URL argument ignored. Images will not be downloaded."); await runScraping({ htmlFile: globalOptions.load, saveFile: globalOptions.save }); } else if (url) { await runScraping({ url: url, saveFile: globalOptions.save }); } else { // If no URL, no --load, and not the 'latest' command, show help. - // This condition means no specific action was determined. - if (!program.args.find(arg => program.commands.map(c => c.name()).includes(arg))) { + // Check if 'latest' was an argument. If so, commander handles its action. + // If not, and no URL, then show help. + const isLatestCommand = process.argv.includes('latest'); + if (!isLatestCommand) { program.help(); } } }); -// Add help text to guide users for subcommand help program.addHelpText('after', ` Example calls: - $ ebay-scraper latest --per_page 120 --minimum_cost 50 - $ ebay-scraper latest --help + $ ebay-scraper latest --per_page 120 $ ebay-scraper "https://www.ebay.com/sch/i.html?_nkw=ssd" - $ ebay-scraper --load saved_page.html --only_json | jq . - $ ebay-scraper --save current_page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"`); + $ ebay-scraper --load page.html --only_json | jq . + $ ebay-scraper --save page.html "https://www.ebay.com/sch/i.html?_nkw=hdd"`); - -// --- Wrapper to run scraping and print results --- async function runScraping(options) { try { const data = await scrapeEbay(options); if (quietMode) { - // Only output JSON string, no extra newlines or messages process.stdout.write(JSON.stringify(data, null, 2)); } else { - if (data && data.length > 0) { - console.log(JSON.stringify(data, null, 2)); - } else { - logMessage("No data extracted or a critical error occurred during scraping."); - } + if (data && data.length > 0) console.log(JSON.stringify(data, null, 2)); + else logMessage("No data extracted or a critical error occurred."); } } catch (e) { logError(`Critical error in runScraping: ${e.message}`); if (!quietMode && e.stack) console.error(e.stack); - if (quietMode) { // Ensure valid JSON output even on error for piping - process.stdout.write(JSON.stringify({error: e.message, data: []})); - } + if (quietMode) process.stdout.write(JSON.stringify({error: e.message, data: []})); } } -// --- Parse Arguments and Run --- (async () => { try { await program.parseAsync(process.argv); - // If no command was matched by commander and no URL argument was given, - // and it's not just options like --version or --help that commander handles. - const knownCommands = program.commands.map(cmd => cmd.name()); - const userArgs = process.argv.slice(2); - const potentialCommand = userArgs.find(arg => !arg.startsWith('-')); - - if (userArgs.length > 0 && !knownCommands.includes(potentialCommand) && !program.args.includes(potentialCommand) && !program.opts().load && potentialCommand) { - // This case handles if a user types something that isn't a command or a URL after options. - // Example: `node script.js --only_json somegibberish` - // However, the default argument [url] should catch most of these. - // If a URL-like string is passed, it will be caught by the .argument('[url]') action. - } else if (process.argv.slice(2).length === 0) { // No arguments at all + // If no command was specified and no URL, Commander's default help might not trigger if only options are present. + // This ensures help is shown if no actionable arguments are given. + const args = process.argv.slice(2); + const hasActionableArg = args.some(arg => !arg.startsWith('-') || program.commands.some(cmd => cmd.name() === arg)); + if (args.length > 0 && !hasActionableArg && !program.opts().load) { // If only options like --only_json but no command/url/load + program.help(); + } else if (args.length === 0) { // No arguments at all program.help(); } } catch (error) { logError(`Command parsing error: ${error.message}`); if (!quietMode && error.stack) console.error(error.stack); - if (quietMode) { - process.stdout.write(JSON.stringify({error: error.message, data: []})); - } else { - process.exit(1); - } + if (quietMode) process.stdout.write(JSON.stringify({error: error.message, data: []})); + else process.exit(1); } })(); diff --git a/ebay_core.js b/ebay_core.js index b446687..94809ca 100644 --- a/ebay_core.js +++ b/ebay_core.js @@ -1,5 +1,7 @@ -// ebay_core.js V1.1 - Shared Parsing & Extraction Logic -// Added itemCount and sizePerItemTB to output. +// ebay_core.js V1.4 - Shared Parsing & Extraction Logic +// - Restructured JSON output with a "parsed" sub-object. +// - Added parser_engine version. +// - Removed itemUrl, added image_url. (function (root, factory) { if (typeof module === 'object' && module.exports) { module.exports = factory(); @@ -10,13 +12,14 @@ 'use strict'; const EbayParser = {}; + const PARSER_ENGINE_VERSION = 1; EbayParser.parseSizeAndQuantity = function(title) { title = title ? title.toUpperCase() : ""; let totalTB = 0; let quantity = 1; let needed_description_check = false; - let individualSizeTB = 0; // Will hold the size per item + let individualSizeTB = 0; const explicitQtyPatterns = [ /\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?/i, @@ -49,7 +52,7 @@ sizeMatches.map(sm => sm.unit === 'GB' ? sm.value / 1000 : sm.value) )].sort((a, b) => a - b); if (uniqueSizesTB.length > 0) { - individualSizeTB = uniqueSizesTB[0]; // Set individual size + individualSizeTB = uniqueSizesTB[0]; if (uniqueSizesTB.length > 1) needed_description_check = true; } } @@ -71,21 +74,26 @@ if (quantity > 1 && totalTB === 0) { needed_description_check = true; } - if (quantity === 1 && sizeMatches.length === 1 && !needed_description_check) { + if (quantity === 1 && sizeMatches.length === 1 && !needed_description_check) { needed_description_check = false; } return { totalTB: parseFloat(totalTB.toFixed(4)), - quantity: quantity, // Renamed to 'quantity' internally, maps to 'itemCount' + quantity: quantity, needed_description_check: needed_description_check, - individualSizeTB: parseFloat(individualSizeTB.toFixed(4)) // Added size per item + individualSizeTB: parseFloat(individualSizeTB.toFixed(4)) }; }; - EbayParser.parsePrice = function(priceText) { /* ... (Keep existing parsePrice function) ... */ + EbayParser.parsePrice = function(priceText) { priceText = priceText || ""; if (priceText.toLowerCase().includes(' to ')) { + const rangeParts = priceText.split(/to/i); + const firstPriceMatch = rangeParts[0] ? rangeParts[0].match(/\$?([\d,]+\.?\d*)/) : null; + if (firstPriceMatch) { + return parseFloat(firstPriceMatch[1].replace(/,/g, '')); + } return null; } const priceMatch = priceText.match(/\$?([\d,]+\.?\d*)/); @@ -95,14 +103,11 @@ return null; }; - EbayParser.runUnitTests = function() { /* ... (Keep existing runUnitTests function) ... */ - // Ensure console exists (for Node vs Browser safety, though Node has it) + EbayParser.runUnitTests = function() { const log = typeof console !== 'undefined' ? console.log : function() {}; const error = typeof console !== 'undefined' ? console.error : function() {}; - log("Ebay Cost/TB: --- Running Unit Tests ---"); const testCases = [ - // Add expected individualSizeTB to tests { title: "LOT OF (9) MAJOR BRAND 2.5\" 7MM SSD * Kingston, Samsung, SanDisk& PNY*120-250GB", expected: { totalTB: 1.080, quantity: 9, individualSizeTB: 0.120, needed_description_check: true } }, { title: "Lot of 10 Intel 256 GB 2.5\" SATA SSD different Model check the Description", expected: { totalTB: 2.560, quantity: 10, individualSizeTB: 0.256, needed_description_check: true } }, { title: "Bulk 5 Lot Samsung 870 EVO 500GB SSD SATA - Used - Tested Passed Smart Test", expected: { totalTB: 2.500, quantity: 5, individualSizeTB: 0.500, needed_description_check: false } }, @@ -110,32 +115,26 @@ { title: "Micron 5100 MAX 1.84TB SATA 6Gb/s 2.5\" SSD MTFDDAK1T9TCC-1AR1ZABYY", expected: { totalTB: 1.84, quantity: 1, individualSizeTB: 1.84, needed_description_check: false } }, { title: "10-PACK 1TB SSD", expected: { totalTB: 10.0, quantity: 10, individualSizeTB: 1.0, needed_description_check: false } }, ]; - let testsPassed = 0; let testsFailed = 0; - testCases.forEach((test, index) => { const result = EbayParser.parseSizeAndQuantity(test.title); const tbCheck = Math.abs(result.totalTB - test.expected.totalTB) < 0.0001; const qCheck = result.quantity === test.expected.quantity; const sizeCheck = Math.abs(result.individualSizeTB - test.expected.individualSizeTB) < 0.0001; const needCheck = result.needed_description_check === test.expected.needed_description_check; - - if (tbCheck && qCheck && sizeCheck && needCheck) { - testsPassed++; - } else { + if (tbCheck && qCheck && sizeCheck && needCheck) testsPassed++; + else { error(`Test ${index + 1}: FAILED - "${test.title}"`); error(` Expected: TTB=${test.expected.totalTB.toFixed(4)}, Q=${test.expected.quantity}, STB=${test.expected.individualSizeTB.toFixed(4)}, Check=${test.expected.needed_description_check}`); error(` Actual: TTB=${result.totalTB.toFixed(4)}, Q=${result.quantity}, STB=${result.individualSizeTB.toFixed(4)}, Check=${result.needed_description_check}`); testsFailed++; } }); - log(`--- Unit Test Summary: ${testsPassed} Passed, ${testsFailed} Failed ---`); return testsFailed === 0; }; - // Updated to include itemCount and sizePerItemTB EbayParser.extractDataFromPage = function() { const itemSelector = 'li.s-item, li.srp-results__item, div.s-item[role="listitem"]'; const itemElements = document.querySelectorAll(itemSelector); @@ -144,44 +143,104 @@ itemElements.forEach(item => { const titleElement = item.querySelector('.s-item__title, .srp-results__title'); - const priceElement = item.querySelector('.s-item__price, .srp-results__price'); - const linkElement = item.querySelector('.s-item__link, a[href*="/itm/"]'); + const priceElement = item.querySelector('.s-item__price'); + // const linkElement = item.querySelector('.s-item__link, a[href*="/itm/"]'); // Not used for itemUrl anymore + const imageElement = item.querySelector('.s-item__image-wrapper img.s-item__image-img, .s-item__image img'); // Common image selectors - const title = titleElement ? titleElement.innerText.trim() : null; + let rawTitle = titleElement ? titleElement.innerText.trim() : null; const priceText = priceElement ? priceElement.innerText.trim() : null; - const itemUrl = linkElement ? linkElement.href : null; + // const itemUrl = linkElement ? linkElement.href : null; // Removed - if (!title || !priceText || !itemUrl) return; + // Try to get image URL, prefer data-src for lazy-loaded images, fallback to src + let imageUrl = null; + if (imageElement) { + imageUrl = imageElement.dataset.src || imageElement.getAttribute('src'); + } - const listingPrice = EbayParser.parsePrice(priceText); - const parsedInfo = EbayParser.parseSizeAndQuantity(title); + + if (!rawTitle || !priceText) return; // Item ID is now critical, URL was for item ID + + let cleanedTitle = rawTitle; + const newListingRegex = /^\s*NEW LISTING\s*[:\-\s]*/i; + if (newListingRegex.test(cleanedTitle)) { + cleanedTitle = rawTitle.replace(newListingRegex, "").trim(); + } else if (newListingRegex.test(rawTitle)) { + cleanedTitle = rawTitle.replace(newListingRegex, "").trim(); + } + + const primaryDisplayPrice = EbayParser.parsePrice(priceText); + + let currentBidPrice = null; + let finalBuyItNowPrice = null; + let hasBestOffer = false; + let itemIsAuction = false; + + const bidCountElement = item.querySelector('.s-item__bid-count'); + if (bidCountElement && bidCountElement.innerText.toLowerCase().includes('bid')) { + itemIsAuction = true; + } + + const bestOfferElement = item.querySelector('.s-item__purchase-options--bo, .s-item__best-offer'); + if (bestOfferElement) { + hasBestOffer = true; + } else { + const secondaryInfoElements = item.querySelectorAll('.s-item__subtitle, .s-item__secondary-text, .s-item__detail--secondary'); + secondaryInfoElements.forEach(el => { + if (el.innerText.toLowerCase().includes('or best offer')) { + hasBestOffer = true; + } + }); + } + + if (itemIsAuction) { + currentBidPrice = primaryDisplayPrice; + const auctionBinPriceElement = item.querySelector('.s-item__buy-it-now-price'); + if (auctionBinPriceElement) { + finalBuyItNowPrice = EbayParser.parsePrice(auctionBinPriceElement.innerText); + } + } else { + finalBuyItNowPrice = primaryDisplayPrice; + } + + const parsedInfo = EbayParser.parseSizeAndQuantity(cleanedTitle); const totalTB = parsedInfo.totalTB; - const quantity = parsedInfo.quantity; // Get quantity - const individualSizeTB = parsedInfo.individualSizeTB; // Get individual size + const quantity = parsedInfo.quantity; + const individualSizeTB = parsedInfo.individualSizeTB; const needed_description_check = parsedInfo.needed_description_check; let costPerTB = null; - if (listingPrice !== null && totalTB > 0) { - costPerTB = listingPrice / totalTB; + if (primaryDisplayPrice !== null && totalTB > 0) { + costPerTB = primaryDisplayPrice / totalTB; } + // Extract Item ID from the item's link (still need a link element for this) let itemId = null; - const itemMatch = itemUrl.match(/\/itm\/(\d+)/); - if (itemMatch && itemMatch[1]) { - itemId = itemMatch[1]; + const linkForIdElement = item.querySelector('a.s-item__link[href*="/itm/"], .s-item__info > a[href*="/itm/"]'); + if (linkForIdElement && linkForIdElement.href) { + const itemMatch = linkForIdElement.href.match(/\/itm\/(\d+)/); + if (itemMatch && itemMatch[1]) { + itemId = itemMatch[1]; + } } + if(!itemId) return; // Skip if no item ID can be found, as it's crucial items.push({ - title, - itemId, + title: cleanedTitle, + itemId: itemId, // Crucial dateFound: today, - listingPrice, - itemCount: quantity, // <-- Added - sizePerItemTB: individualSizeTB > 0 ? parseFloat(individualSizeTB.toFixed(3)) : null, // <-- Added - totalTB: totalTB > 0 ? parseFloat(totalTB.toFixed(3)) : null, - costPerTB: costPerTB !== null ? parseFloat(costPerTB.toFixed(2)) : null, - needed_description_check, - itemUrl + currentBidPrice: currentBidPrice, + buyItNowPrice: finalBuyItNowPrice, + hasBestOffer: hasBestOffer, + image_url: imageUrl, // <-- Added + parsed: { // <-- Nested object + itemCount: quantity, + sizePerItemTB: individualSizeTB > 0 ? parseFloat(individualSizeTB.toFixed(3)) : null, + totalTB: totalTB > 0 ? parseFloat(totalTB.toFixed(3)) : null, + costPerTB: costPerTB !== null ? parseFloat(costPerTB.toFixed(2)) : null, + needed_description_check: needed_description_check, + parser_engine: PARSER_ENGINE_VERSION // <-- Added + } + // itemUrl: itemUrl, // <-- Removed }); }); return items;