Files
ebay_scraper_rust/systemd/scraper_fetch.sh
hak8or b538dd8012
All checks were successful
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 3m46s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 4m3s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 4m14s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 9m19s
Allow saving and ingesting from zstd compressed scrapes
2025-09-01 14:45:21 -04:00

99 lines
5.7 KiB
Bash
Executable File

#!/bin/env bash
URL_PER_PAGE_60="&_ipg=60"
URL_PER_PAGE_240="&_ipg=240"
URL_MIN_PRICE_USD_60="&_udlo=60.00"
URL_SEARCHTERM_NONE="&_nkw="
URL_LOCATION_NORTHAMERICA="&LH_PrefLoc=3"
URL_SORTBY_NEWLY_LISTED="&_sop=10"
URL_SORTBY_ENDING_SOONEST="&_sop=1"
URL_BASE_LISTING="https://www.ebay.com/sch/i.html?"
if [ -z "${XDG_DATA_HOME}" ]; then
echo "XDG_DATA_HOME was not set, bailing!"
exit
fi
DIR_SSDS="$XDG_DATA_HOME/scraper/raw_scraped/ssd"
mkdir -p "$DIR_SSDS"
if [ ! -s "$DIR_SSDS/url.json" ]; then
URL_CATEGORY_SSD="&_sacat=175669"
URL_SSDS="$URL_BASE_LISTING$URL_SEARCHTERM_NONE$URL_CATEGORY_SSD&_from=R40&_fsrp=1$URL_LOCATION_NORTHAMERICA&imm=1$URL_PER_PAGE_240"
echo "{\"url\": \"$URL_SSDS\"}" > "$DIR_SSDS/url.json"
fi
curl "$(jq '.url' $DIR_SSDS/url.json --raw-output)$URL_SORTBY_NEWLY_LISTED" | zstd -z --ultra -19 -o "$DIR_SSDS/$(date +%s).html.zst"
sleep 2
curl "$(jq '.url' $DIR_SSDS/url.json --raw-output)$URL_SORTBY_ENDING_SOONEST" | zstd -z --ultra -19 -o "$DIR_SSDS/$(date +%s).html.zst"
DIR_MINIPC="$XDG_DATA_HOME/scraper/raw_scraped/minipc"
mkdir -p "$DIR_MINIPC"
if [ ! -s "$DIR_MINIPC/url.json" ]; then
URL_CATEGORY_MINIPC_ALLINONE="&_sacat=179"
URL_MINIPC="$URL_BASE_LISTING$URL_SEARCHTERM_NONE$URL_CATEGORY_MINIPC_ALLINONE&_from=R40&_fsrp=1$URL_LOCATION_NORTHAMERICA&imm=1$URL_PER_PAGE_240"
echo "{\"url\": \"$URL_MINIPC\"}" > "$DIR_MINIPC/url.json"
fi
curl "$(jq '.url' $DIR_MINIPC/url.json --raw-output)$URL_SORTBY_NEWLY_LISTED" | zstd -z --ultra -19 -o "$DIR_MINIPC/$(date +%s).html.zst"
sleep 2
curl "$(jq '.url' $DIR_MINIPC/url.json --raw-output)$URL_SORTBY_ENDING_SOONEST" | zstd -z --ultra -19 -o "$DIR_MINIPC/$(date +%s).html.zst"
# Little helper to ensure we see entries in journald
echo Done
# If needing to do a mass compression;
# fd '\.html$' -x zstd -z --ultra -19 -o {}.zst {}
# Level compression analysis;
#
# A single scraped result;
# for lvl in $(seq 3 22); zstd --compress --ultra -o 1755012328.html.zst$lvl -$lvl 1755012328.html; end
# 1755012328.html : 9.04% ( 2.60 MiB => 240 KiB, 1755012328.html.zst3)
# 1755012328.html : 9.04% ( 2.60 MiB => 240 KiB, 1755012328.html.zst4)
# 1755012328.html : 8.80% ( 2.60 MiB => 234 KiB, 1755012328.html.zst5)
# 1755012328.html : 8.58% ( 2.60 MiB => 228 KiB, 1755012328.html.zst6)
# 1755012328.html : 8.54% ( 2.60 MiB => 227 KiB, 1755012328.html.zst7)
# 1755012328.html : 8.45% ( 2.60 MiB => 225 KiB, 1755012328.html.zst8)
# 1755012328.html : 8.34% ( 2.60 MiB => 222 KiB, 1755012328.html.zst9)
# 1755012328.html : 8.30% ( 2.60 MiB => 221 KiB, 1755012328.html.zst10)
# 1755012328.html : 8.28% ( 2.60 MiB => 220 KiB, 1755012328.html.zst11)
# 1755012328.html : 8.28% ( 2.60 MiB => 220 KiB, 1755012328.html.zst12)
# 1755012328.html : 8.32% ( 2.60 MiB => 221 KiB, 1755012328.html.zst13)
# 1755012328.html : 8.29% ( 2.60 MiB => 221 KiB, 1755012328.html.zst14)
# 1755012328.html : 8.25% ( 2.60 MiB => 219 KiB, 1755012328.html.zst15)
# 1755012328.html : 8.28% ( 2.60 MiB => 220 KiB, 1755012328.html.zst16)
# 1755012328.html : 8.20% ( 2.60 MiB => 218 KiB, 1755012328.html.zst17)
# 1755012328.html : 8.23% ( 2.60 MiB => 219 KiB, 1755012328.html.zst18)
# 1755012328.html : 7.99% ( 2.60 MiB => 213 KiB, 1755012328.html.zst19)
# 1755012328.html : 7.99% ( 2.60 MiB => 213 KiB, 1755012328.html.zst20)
# 1755012328.html : 7.93% ( 2.60 MiB => 211 KiB, 1755012328.html.zst21)
# 1755012328.html : 7.91% ( 2.60 MiB => 211 KiB, 1755012328.html.zst22)
#
# Lets see if we get benefits tar'ing and them compressing;
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755012328.html
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755012331.html
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755015932.html
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755015929.html
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755019567.html
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755019564.html
# -rw-r--r-- 1 hak8or users 16M Sep 1 12:23 175501.tar
# ➜ for lvl in $(seq 3 22); zstd --compress --ultra -o 175501.tar.$lvl -$lvl 175501.tar; end
# 175501.tar : 8.91% ( 15.6 MiB => 1.39 MiB, 175501.tar.3)
# 175501.tar : 8.92% ( 15.6 MiB => 1.39 MiB, 175501.tar.4)
# 175501.tar : 8.65% ( 15.6 MiB => 1.35 MiB, 175501.tar.5)
# 175501.tar : 8.42% ( 15.6 MiB => 1.31 MiB, 175501.tar.6)
# 175501.tar : 8.36% ( 15.6 MiB => 1.30 MiB, 175501.tar.7)
# 175501.tar : 8.25% ( 15.6 MiB => 1.28 MiB, 175501.tar.8)
# 175501.tar : 5.36% ( 15.6 MiB => 854 KiB, 175501.tar.9)
# 175501.tar : 5.32% ( 15.6 MiB => 847 KiB, 175501.tar.10)
# 175501.tar : 5.30% ( 15.6 MiB => 844 KiB, 175501.tar.11)
# 175501.tar : 5.30% ( 15.6 MiB => 844 KiB, 175501.tar.12)
# 175501.tar : 5.48% ( 15.6 MiB => 872 KiB, 175501.tar.13)
# 175501.tar : 5.42% ( 15.6 MiB => 864 KiB, 175501.tar.14)
# 175501.tar : 5.19% ( 15.6 MiB => 828 KiB, 175501.tar.15)
# 175501.tar : 5.31% ( 15.6 MiB => 845 KiB, 175501.tar.16)
# 175501.tar : 5.01% ( 15.6 MiB => 798 KiB, 175501.tar.17)
# 175501.tar : 5.04% ( 15.6 MiB => 803 KiB, 175501.tar.18)
# 175501.tar : 4.84% ( 15.6 MiB => 771 KiB, 175501.tar.19)
# 175501.tar : 4.79% ( 15.6 MiB => 764 KiB, 175501.tar.20)
# 175501.tar : 4.74% ( 15.6 MiB => 755 KiB, 175501.tar.21)
# 175501.tar : 4.73% ( 15.6 MiB => 753 KiB, 175501.tar.22)