Compare commits
4 Commits
parallel_s
...
parser_llm
Author | SHA1 | Date | |
---|---|---|---|
cb8025becd | |||
0039078f41 | |||
4ae1622f02 | |||
b538dd8012 |
700
Cargo.lock
generated
700
Cargo.lock
generated
@@ -39,8 +39,8 @@ dependencies = [
|
|||||||
"flate2",
|
"flate2",
|
||||||
"foldhash",
|
"foldhash",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"h2",
|
"h2 0.3.26",
|
||||||
"http",
|
"http 0.2.12",
|
||||||
"httparse",
|
"httparse",
|
||||||
"httpdate",
|
"httpdate",
|
||||||
"itoa",
|
"itoa",
|
||||||
@@ -76,7 +76,7 @@ checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"bytestring",
|
"bytestring",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"http",
|
"http 0.2.12",
|
||||||
"regex",
|
"regex",
|
||||||
"regex-lite",
|
"regex-lite",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -289,6 +289,12 @@ dependencies = [
|
|||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atomic-waker"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.5.0"
|
version = "1.5.0"
|
||||||
@@ -491,6 +497,16 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "core-foundation"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
|
||||||
|
dependencies = [
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "core-foundation-sys"
|
name = "core-foundation-sys"
|
||||||
version = "0.8.7"
|
version = "0.8.7"
|
||||||
@@ -679,9 +695,12 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"dirs",
|
"dirs",
|
||||||
|
"futures",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
"num_enum",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
|
"reqwest",
|
||||||
"rusqlite",
|
"rusqlite",
|
||||||
"scraper",
|
"scraper",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -690,6 +709,7 @@ dependencies = [
|
|||||||
"test-log",
|
"test-log",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
|
"zstd",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -746,6 +766,16 @@ version = "1.0.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "errno"
|
||||||
|
version = "0.3.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fallible-iterator"
|
name = "fallible-iterator"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
@@ -758,6 +788,12 @@ version = "0.1.9"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastrand"
|
||||||
|
version = "2.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flate2"
|
name = "flate2"
|
||||||
version = "1.1.2"
|
version = "1.1.2"
|
||||||
@@ -780,6 +816,21 @@ version = "0.1.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "foreign-types"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
|
||||||
|
dependencies = [
|
||||||
|
"foreign-types-shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "foreign-types-shared"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "form_urlencoded"
|
name = "form_urlencoded"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
@@ -799,12 +850,65 @@ dependencies = [
|
|||||||
"new_debug_unreachable",
|
"new_debug_unreachable",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
|
||||||
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-executor",
|
||||||
|
"futures-io",
|
||||||
|
"futures-sink",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-channel"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-sink",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-core"
|
name = "futures-core"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-executor"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-io"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-macro"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
@@ -823,10 +927,16 @@ version = "0.3.31"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
"futures-io",
|
||||||
|
"futures-macro",
|
||||||
|
"futures-sink",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
|
"memchr",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"pin-utils",
|
"pin-utils",
|
||||||
|
"slab",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -897,7 +1007,26 @@ dependencies = [
|
|||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"http",
|
"http 0.2.12",
|
||||||
|
"indexmap",
|
||||||
|
"slab",
|
||||||
|
"tokio",
|
||||||
|
"tokio-util",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "h2"
|
||||||
|
version = "0.4.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
|
||||||
|
dependencies = [
|
||||||
|
"atomic-waker",
|
||||||
|
"bytes",
|
||||||
|
"fnv",
|
||||||
|
"futures-core",
|
||||||
|
"futures-sink",
|
||||||
|
"http 1.3.1",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"slab",
|
"slab",
|
||||||
"tokio",
|
"tokio",
|
||||||
@@ -952,6 +1081,40 @@ dependencies = [
|
|||||||
"itoa",
|
"itoa",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "http"
|
||||||
|
version = "1.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"fnv",
|
||||||
|
"itoa",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "http-body"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"http 1.3.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "http-body-util"
|
||||||
|
version = "0.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"futures-core",
|
||||||
|
"http 1.3.1",
|
||||||
|
"http-body",
|
||||||
|
"pin-project-lite",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httparse"
|
name = "httparse"
|
||||||
version = "1.10.1"
|
version = "1.10.1"
|
||||||
@@ -964,6 +1127,86 @@ version = "1.0.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hyper"
|
||||||
|
version = "1.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e"
|
||||||
|
dependencies = [
|
||||||
|
"atomic-waker",
|
||||||
|
"bytes",
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"h2 0.4.12",
|
||||||
|
"http 1.3.1",
|
||||||
|
"http-body",
|
||||||
|
"httparse",
|
||||||
|
"itoa",
|
||||||
|
"pin-project-lite",
|
||||||
|
"pin-utils",
|
||||||
|
"smallvec",
|
||||||
|
"tokio",
|
||||||
|
"want",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hyper-rustls"
|
||||||
|
version = "0.27.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
|
||||||
|
dependencies = [
|
||||||
|
"http 1.3.1",
|
||||||
|
"hyper",
|
||||||
|
"hyper-util",
|
||||||
|
"rustls",
|
||||||
|
"rustls-pki-types",
|
||||||
|
"tokio",
|
||||||
|
"tokio-rustls",
|
||||||
|
"tower-service",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hyper-tls"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
|
||||||
|
dependencies = [
|
||||||
|
"bytes",
|
||||||
|
"http-body-util",
|
||||||
|
"hyper",
|
||||||
|
"hyper-util",
|
||||||
|
"native-tls",
|
||||||
|
"tokio",
|
||||||
|
"tokio-native-tls",
|
||||||
|
"tower-service",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hyper-util"
|
||||||
|
version = "0.1.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
|
||||||
|
dependencies = [
|
||||||
|
"base64",
|
||||||
|
"bytes",
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-util",
|
||||||
|
"http 1.3.1",
|
||||||
|
"http-body",
|
||||||
|
"hyper",
|
||||||
|
"ipnet",
|
||||||
|
"libc",
|
||||||
|
"percent-encoding",
|
||||||
|
"pin-project-lite",
|
||||||
|
"socket2",
|
||||||
|
"system-configuration",
|
||||||
|
"tokio",
|
||||||
|
"tower-service",
|
||||||
|
"tracing",
|
||||||
|
"windows-registry",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "iana-time-zone"
|
name = "iana-time-zone"
|
||||||
version = "0.1.63"
|
version = "0.1.63"
|
||||||
@@ -1111,6 +1354,22 @@ dependencies = [
|
|||||||
"hashbrown",
|
"hashbrown",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ipnet"
|
||||||
|
version = "2.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iri-string"
|
||||||
|
version = "0.7.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is_terminal_polyfill"
|
name = "is_terminal_polyfill"
|
||||||
version = "1.70.1"
|
version = "1.70.1"
|
||||||
@@ -1182,6 +1441,12 @@ dependencies = [
|
|||||||
"vcpkg",
|
"vcpkg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linux-raw-sys"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "litemap"
|
name = "litemap"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
@@ -1294,6 +1559,23 @@ dependencies = [
|
|||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "native-tls"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"log",
|
||||||
|
"openssl",
|
||||||
|
"openssl-probe",
|
||||||
|
"openssl-sys",
|
||||||
|
"schannel",
|
||||||
|
"security-framework",
|
||||||
|
"security-framework-sys",
|
||||||
|
"tempfile",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "new_debug_unreachable"
|
name = "new_debug_unreachable"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
@@ -1325,6 +1607,28 @@ dependencies = [
|
|||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_enum"
|
||||||
|
version = "0.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
|
||||||
|
dependencies = [
|
||||||
|
"num_enum_derive",
|
||||||
|
"rustversion",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_enum_derive"
|
||||||
|
version = "0.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro-crate",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "object"
|
name = "object"
|
||||||
version = "0.36.7"
|
version = "0.36.7"
|
||||||
@@ -1346,6 +1650,50 @@ version = "1.70.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "openssl"
|
||||||
|
version = "0.10.73"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cfg-if",
|
||||||
|
"foreign-types",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"openssl-macros",
|
||||||
|
"openssl-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "openssl-macros"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "openssl-probe"
|
||||||
|
version = "0.1.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "openssl-sys"
|
||||||
|
version = "0.9.109"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"pkg-config",
|
||||||
|
"vcpkg",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "option-ext"
|
name = "option-ext"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@@ -1487,6 +1835,15 @@ version = "0.1.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro-crate"
|
||||||
|
version = "3.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
|
||||||
|
dependencies = [
|
||||||
|
"toml_edit",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.95"
|
version = "1.0.95"
|
||||||
@@ -1645,6 +2002,62 @@ version = "0.8.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "reqwest"
|
||||||
|
version = "0.12.23"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
|
||||||
|
dependencies = [
|
||||||
|
"base64",
|
||||||
|
"bytes",
|
||||||
|
"encoding_rs",
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-util",
|
||||||
|
"h2 0.4.12",
|
||||||
|
"http 1.3.1",
|
||||||
|
"http-body",
|
||||||
|
"http-body-util",
|
||||||
|
"hyper",
|
||||||
|
"hyper-rustls",
|
||||||
|
"hyper-tls",
|
||||||
|
"hyper-util",
|
||||||
|
"js-sys",
|
||||||
|
"log",
|
||||||
|
"mime",
|
||||||
|
"native-tls",
|
||||||
|
"percent-encoding",
|
||||||
|
"pin-project-lite",
|
||||||
|
"rustls-pki-types",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"serde_urlencoded",
|
||||||
|
"sync_wrapper",
|
||||||
|
"tokio",
|
||||||
|
"tokio-native-tls",
|
||||||
|
"tower",
|
||||||
|
"tower-http",
|
||||||
|
"tower-service",
|
||||||
|
"url",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-futures",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ring"
|
||||||
|
version = "0.17.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"cfg-if",
|
||||||
|
"getrandom 0.2.16",
|
||||||
|
"libc",
|
||||||
|
"untrusted",
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rusqlite"
|
name = "rusqlite"
|
||||||
version = "0.36.0"
|
version = "0.36.0"
|
||||||
@@ -1666,6 +2079,52 @@ version = "0.1.25"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
|
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustix"
|
||||||
|
version = "1.0.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
"linux-raw-sys",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustls"
|
||||||
|
version = "0.23.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc"
|
||||||
|
dependencies = [
|
||||||
|
"once_cell",
|
||||||
|
"rustls-pki-types",
|
||||||
|
"rustls-webpki",
|
||||||
|
"subtle",
|
||||||
|
"zeroize",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustls-pki-types"
|
||||||
|
version = "1.12.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
|
||||||
|
dependencies = [
|
||||||
|
"zeroize",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustls-webpki"
|
||||||
|
version = "0.103.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
|
||||||
|
dependencies = [
|
||||||
|
"ring",
|
||||||
|
"rustls-pki-types",
|
||||||
|
"untrusted",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustversion"
|
name = "rustversion"
|
||||||
version = "1.0.21"
|
version = "1.0.21"
|
||||||
@@ -1678,6 +2137,15 @@ version = "1.0.20"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "schannel"
|
||||||
|
version = "0.1.27"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "scopeguard"
|
name = "scopeguard"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
@@ -1699,6 +2167,29 @@ dependencies = [
|
|||||||
"tendril",
|
"tendril",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "security-framework"
|
||||||
|
version = "2.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"core-foundation",
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
"security-framework-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "security-framework-sys"
|
||||||
|
version = "2.14.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32"
|
||||||
|
dependencies = [
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "selectors"
|
name = "selectors"
|
||||||
version = "0.26.0"
|
version = "0.26.0"
|
||||||
@@ -1891,6 +2382,12 @@ version = "0.11.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "subtle"
|
||||||
|
version = "2.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.103"
|
version = "2.0.103"
|
||||||
@@ -1902,6 +2399,15 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sync_wrapper"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "synstructure"
|
name = "synstructure"
|
||||||
version = "0.13.2"
|
version = "0.13.2"
|
||||||
@@ -1913,6 +2419,40 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "system-configuration"
|
||||||
|
version = "0.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"core-foundation",
|
||||||
|
"system-configuration-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "system-configuration-sys"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
|
||||||
|
dependencies = [
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tempfile"
|
||||||
|
version = "3.21.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e"
|
||||||
|
dependencies = [
|
||||||
|
"fastrand",
|
||||||
|
"getrandom 0.3.3",
|
||||||
|
"once_cell",
|
||||||
|
"rustix",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tendril"
|
name = "tendril"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
@@ -2033,6 +2573,26 @@ dependencies = [
|
|||||||
"windows-sys 0.52.0",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-native-tls"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
|
||||||
|
dependencies = [
|
||||||
|
"native-tls",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-rustls"
|
||||||
|
version = "0.26.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
|
||||||
|
dependencies = [
|
||||||
|
"rustls",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-util"
|
name = "tokio-util"
|
||||||
version = "0.7.15"
|
version = "0.7.15"
|
||||||
@@ -2046,6 +2606,68 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_datetime"
|
||||||
|
version = "0.6.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_edit"
|
||||||
|
version = "0.22.27"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
|
||||||
|
dependencies = [
|
||||||
|
"indexmap",
|
||||||
|
"toml_datetime",
|
||||||
|
"winnow",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tower"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-util",
|
||||||
|
"pin-project-lite",
|
||||||
|
"sync_wrapper",
|
||||||
|
"tokio",
|
||||||
|
"tower-layer",
|
||||||
|
"tower-service",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tower-http"
|
||||||
|
version = "0.6.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"bytes",
|
||||||
|
"futures-util",
|
||||||
|
"http 1.3.1",
|
||||||
|
"http-body",
|
||||||
|
"iri-string",
|
||||||
|
"pin-project-lite",
|
||||||
|
"tower",
|
||||||
|
"tower-layer",
|
||||||
|
"tower-service",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tower-layer"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tower-service"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tracing"
|
name = "tracing"
|
||||||
version = "0.1.41"
|
version = "0.1.41"
|
||||||
@@ -2108,6 +2730,12 @@ dependencies = [
|
|||||||
"tracing-log",
|
"tracing-log",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "try-lock"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typenum"
|
name = "typenum"
|
||||||
version = "1.18.0"
|
version = "1.18.0"
|
||||||
@@ -2138,6 +2766,12 @@ version = "0.2.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "untrusted"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "url"
|
name = "url"
|
||||||
version = "2.5.4"
|
version = "2.5.4"
|
||||||
@@ -2185,6 +2819,15 @@ version = "0.9.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "want"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
|
||||||
|
dependencies = [
|
||||||
|
"try-lock",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasi"
|
name = "wasi"
|
||||||
version = "0.11.1+wasi-snapshot-preview1"
|
version = "0.11.1+wasi-snapshot-preview1"
|
||||||
@@ -2226,6 +2869,19 @@ dependencies = [
|
|||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-futures"
|
||||||
|
version = "0.4.50"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"js-sys",
|
||||||
|
"once_cell",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen-macro"
|
name = "wasm-bindgen-macro"
|
||||||
version = "0.2.100"
|
version = "0.2.100"
|
||||||
@@ -2258,6 +2914,16 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "web-sys"
|
||||||
|
version = "0.3.77"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
|
||||||
|
dependencies = [
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
version = "0.3.9"
|
version = "0.3.9"
|
||||||
@@ -2321,6 +2987,17 @@ version = "0.1.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-registry"
|
||||||
|
version = "0.5.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link",
|
||||||
|
"windows-result",
|
||||||
|
"windows-strings",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-result"
|
name = "windows-result"
|
||||||
version = "0.3.4"
|
version = "0.3.4"
|
||||||
@@ -2421,6 +3098,15 @@ version = "0.52.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winnow"
|
||||||
|
version = "0.7.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wit-bindgen-rt"
|
name = "wit-bindgen-rt"
|
||||||
version = "0.39.0"
|
version = "0.39.0"
|
||||||
@@ -2501,6 +3187,12 @@ dependencies = [
|
|||||||
"synstructure",
|
"synstructure",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zeroize"
|
||||||
|
version = "1.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerotrie"
|
name = "zerotrie"
|
||||||
version = "0.2.2"
|
version = "0.2.2"
|
||||||
|
@@ -8,9 +8,12 @@ actix-web = "4.11.0"
|
|||||||
chrono = { version = "0.4.41", features = ["serde"] }
|
chrono = { version = "0.4.41", features = ["serde"] }
|
||||||
clap = { version = "4.5.40", features = ["derive"] }
|
clap = { version = "4.5.40", features = ["derive"] }
|
||||||
dirs = "6.0.0"
|
dirs = "6.0.0"
|
||||||
|
futures = "0.3.31"
|
||||||
lazy_static = "1.5.0"
|
lazy_static = "1.5.0"
|
||||||
|
num_enum = "0.7.4"
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
|
reqwest = { version = "0.12.23", features = ["blocking"] }
|
||||||
rusqlite = { version = "0.36.0", features = ["bundled", "chrono"] }
|
rusqlite = { version = "0.36.0", features = ["bundled", "chrono"] }
|
||||||
scraper = "0.23.1"
|
scraper = "0.23.1"
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
@@ -18,6 +21,7 @@ serde_json = "1.0.140"
|
|||||||
test-log = { version = "0.2.17", features = ["trace"] }
|
test-log = { version = "0.2.17", features = ["trace"] }
|
||||||
tracing = { version = "0.1.41", features = ["attributes"] }
|
tracing = { version = "0.1.41", features = ["attributes"] }
|
||||||
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
|
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
|
||||||
|
zstd = "0.13.3"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
similar-asserts = "1.7.0"
|
similar-asserts = "1.7.0"
|
||||||
|
110
readme.md
110
readme.md
@@ -10,3 +10,113 @@ echo run2 && http GET "$URL_BASE/listing/since/12345678/2" && \
|
|||||||
echo run3 && http GET "$URL_BASE/listing/388484391867" && \
|
echo run3 && http GET "$URL_BASE/listing/388484391867" && \
|
||||||
echo run4 && http GET "$URL_BASE/listing/286605201240/history"
|
echo run4 && http GET "$URL_BASE/listing/286605201240/history"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
And some jq usage for raw interaction of the data;
|
||||||
|
```bash
|
||||||
|
# Download a bunch of listings.
|
||||||
|
http https://scraper.hak8or.com/api/listings since==0 limit==20 > listings.json
|
||||||
|
|
||||||
|
# Show what a single listing looks like.
|
||||||
|
listings.json | jq '.[0]'
|
||||||
|
{
|
||||||
|
"listing": {
|
||||||
|
"id": 22563,
|
||||||
|
"item_id": 286707621236,
|
||||||
|
"title": "WD_BLACK SN770M 2TB M.2 NVMe Internal SSD (WDBDNH0020BBK-WRSN)",
|
||||||
|
"buy_it_now_price_cents": null,
|
||||||
|
"has_best_offer": false,
|
||||||
|
"image_url": "https://i.ebayimg.com/images/g/It4AAeSwzz5oddoa/s-l140.jpg"
|
||||||
|
},
|
||||||
|
"history": [
|
||||||
|
{
|
||||||
|
"item": 286707621236,
|
||||||
|
"timestamp": "2025-07-15T04:46:54Z",
|
||||||
|
"category": "ssd",
|
||||||
|
"current_bid_usd_cents": 12900
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parsed": [
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"item": 286707621236,
|
||||||
|
"total_gigabytes": 2048,
|
||||||
|
"quantity": 1,
|
||||||
|
"individual_size_gigabytes": 2048,
|
||||||
|
"parse_engine": 0,
|
||||||
|
"needed_description_check": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Show the 1st and 2nd items, but only grab a few specific entries.
|
||||||
|
cat listings_small.json | jq '[.[1:3][] | {
|
||||||
|
item_id: .listing.item_id,
|
||||||
|
title: .listing.title,
|
||||||
|
parsed: .parsed[] | {
|
||||||
|
total_gigabytes,
|
||||||
|
quantity,
|
||||||
|
individual_size_gigabytes
|
||||||
|
}
|
||||||
|
}]'
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"item_id": 297545995095,
|
||||||
|
"title": "Crucial P3 Plus 1TB NVMe Internal M.2 SSD (CT1000P3PSSD8) - Barely used!",
|
||||||
|
"parsed": {
|
||||||
|
"total_gigabytes": 1024,
|
||||||
|
"quantity": 1,
|
||||||
|
"individual_size_gigabytes": 1024
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"item_id": 127220979797,
|
||||||
|
"title": "Kingston NV2 2TB M.2 3500MG/S NVMe Internal SSD PCIe 4.0 Gen SNV2S/2000G C-/#qWT",
|
||||||
|
"parsed": {
|
||||||
|
"total_gigabytes": 2048,
|
||||||
|
"quantity": 1,
|
||||||
|
"individual_size_gigabytes": 2048
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
And now a LLM based parse, such that the prompt is this (189 Tokens for Gemini 2.5 Flash Lite)
|
||||||
|
```
|
||||||
|
I will provide you with a listing title I want you to analyse. Then you will tell me the total gigabytes of all drives listed in the listing, how many drives are specified in the title, and the gigabytes of each drive in the listing. Here is an example for a title of "Crucial P3 Plus 1TB NVMe Internal M.2 SSD (CT1000P3PSSD8) - Barely used!";
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"total_gigabytes": 1024,
|
||||||
|
"quantity": 1,
|
||||||
|
"individual_size_gigabytes": 1024
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Reply with "OK" (and _only_ "OK") if you understand this. After you reply with that, I will provide you with a title, and then you will reply with solely the requested json (and ONLY said json).
|
||||||
|
```
|
||||||
|
|
||||||
|
And passing a title of (30 tokens);
|
||||||
|
```
|
||||||
|
Lot Of 3 Western Digital PC SN740 512GB M.2 2230 NVMe Internal SSD
|
||||||
|
```
|
||||||
|
returns the following json of (41 tokens);
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"total_gigabytes": 1536,
|
||||||
|
"quantity": 3,
|
||||||
|
"individual_size_gigabytes": 512
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
and another example of sending (49 tokens)
|
||||||
|
```
|
||||||
|
(Lot of 6) Samsung MZ-VLB2560 256GB M.2 NVMe Internal SSD (MZVLB256HBHQ-000H1)
|
||||||
|
```
|
||||||
|
returns the following json of (42 tokens);
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"total_gigabytes": 1536,
|
||||||
|
"quantity": 6,
|
||||||
|
"individual_size_gigabytes": 256
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
So for 1 listing we have a 189 Token "System Prompt", then a ~45 token title prompt, and 42 Token parsed reply. Given 30,000 listings, that's 5,670,000 Token "System Prompt" as Input, 1,350,000 Token Title prompt as Input, and 1,260,000 Token Parsed information (output). Assuming Gemini 2.5 Flash Mini which is $0.10/M for input and $0.40/M for output, would pay $0.702 for input and $0.504 for output, or $1.206 total.
|
||||||
|
235
src/db.rs
235
src/db.rs
@@ -1,5 +1,8 @@
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
use num_enum::TryFromPrimitive;
|
||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
|
use rusqlite::ToSql;
|
||||||
|
use rusqlite::types::FromSql;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@@ -205,15 +208,34 @@ impl ParsedPage {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, PartialEq, Copy, Clone)]
|
#[repr(i64)]
|
||||||
|
#[derive(Serialize, Debug, PartialEq, Copy, Clone, PartialOrd, Ord, Eq, TryFromPrimitive)]
|
||||||
|
pub enum StorageParsingEngineVersion {
|
||||||
|
Testing = 0,
|
||||||
|
Regex = 1,
|
||||||
|
LLM = 2,
|
||||||
|
}
|
||||||
|
impl ToSql for StorageParsingEngineVersion {
|
||||||
|
fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
|
||||||
|
Ok((*self as i64).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl FromSql for StorageParsingEngineVersion {
|
||||||
|
fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult<Self> {
|
||||||
|
let v = value.as_i64()?;
|
||||||
|
Self::try_from(v).map_err(|_| rusqlite::types::FromSqlError::OutOfRange(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||||
pub struct ParsedStorage {
|
pub struct ParsedStorage {
|
||||||
pub id: i64,
|
pub id: i64,
|
||||||
pub item: i64,
|
pub item: i64,
|
||||||
pub total_gigabytes: i64,
|
pub total_gigabytes: i64,
|
||||||
pub quantity: i64,
|
pub quantity: i64,
|
||||||
pub individual_size_gigabytes: i64,
|
pub individual_size_gigabytes: i64,
|
||||||
pub parse_engine: i64,
|
pub parse_engine: StorageParsingEngineVersion,
|
||||||
pub needed_description_check: bool,
|
pub failed_reason: String,
|
||||||
}
|
}
|
||||||
impl DBTable for ParsedStorage {
|
impl DBTable for ParsedStorage {
|
||||||
const TABLE_NAME: &'static str = "Storage_Parsed";
|
const TABLE_NAME: &'static str = "Storage_Parsed";
|
||||||
@@ -224,13 +246,13 @@ impl DBTable for ParsedStorage {
|
|||||||
quantity INTEGER,
|
quantity INTEGER,
|
||||||
sizes_gigabytes TEXT,
|
sizes_gigabytes TEXT,
|
||||||
parse_engine INTEGER,
|
parse_engine INTEGER,
|
||||||
need_description_check INTEGER,
|
failed_reason TEXT,
|
||||||
UNIQUE(item, parse_engine)
|
UNIQUE(item, parse_engine)
|
||||||
FOREIGN KEY(item) REFERENCES Listings(item_id)
|
FOREIGN KEY(item) REFERENCES Listings(item_id)
|
||||||
";
|
";
|
||||||
|
|
||||||
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
||||||
let mut stmt = conn.prepare(&format!("SELECT id, item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check FROM {}", Self::TABLE_NAME))?;
|
let mut stmt = conn.prepare(&format!("SELECT id, item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, failed_reason FROM {}", Self::TABLE_NAME))?;
|
||||||
let iter = stmt.query_map([], |row| {
|
let iter = stmt.query_map([], |row| {
|
||||||
Ok(ParsedStorage {
|
Ok(ParsedStorage {
|
||||||
id: row.get(0)?,
|
id: row.get(0)?,
|
||||||
@@ -242,7 +264,7 @@ impl DBTable for ParsedStorage {
|
|||||||
r.parse().unwrap_or(0)
|
r.parse().unwrap_or(0)
|
||||||
},
|
},
|
||||||
parse_engine: row.get(5)?,
|
parse_engine: row.get(5)?,
|
||||||
needed_description_check: row.get(6)?,
|
failed_reason: row.get(6)?,
|
||||||
})
|
})
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -273,7 +295,7 @@ impl ParsedStorage {
|
|||||||
r.parse().unwrap()
|
r.parse().unwrap()
|
||||||
},
|
},
|
||||||
parse_engine: row.get(5)?,
|
parse_engine: row.get(5)?,
|
||||||
needed_description_check: row.get(6)?,
|
failed_reason: row.get(6)?,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.ok()
|
.ok()
|
||||||
@@ -283,21 +305,26 @@ impl ParsedStorage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_or_update(&self, conn: &Connection) {
|
pub fn add_or_update(&self, conn: &Connection) {
|
||||||
let _ = conn.execute(&format!("
|
let _ = conn
|
||||||
|
.execute(
|
||||||
|
&format!(
|
||||||
|
"
|
||||||
INSERT OR REPLACE INTO {}
|
INSERT OR REPLACE INTO {}
|
||||||
(item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check)
|
(item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, failed_reason)
|
||||||
VALUES
|
VALUES
|
||||||
(?1, ?2, ?3, ?4, ?5, ?6)",
|
(?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
Self::TABLE_NAME),
|
Self::TABLE_NAME
|
||||||
|
),
|
||||||
(
|
(
|
||||||
&self.item,
|
&self.item,
|
||||||
self.total_gigabytes,
|
self.total_gigabytes,
|
||||||
self.quantity,
|
self.quantity,
|
||||||
self.individual_size_gigabytes.to_string(),
|
self.individual_size_gigabytes.to_string(),
|
||||||
self.parse_engine,
|
self.parse_engine,
|
||||||
self.needed_description_check
|
&self.failed_reason,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
).unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -494,19 +521,40 @@ impl Listing {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lookup_non_parsed(conn: &Connection) -> Vec<(i64, String)> {
|
pub fn lookup_pending_parse(
|
||||||
let mut stmt = conn
|
conn: &Connection,
|
||||||
.prepare(&format!(
|
allowed_engines: &[i64],
|
||||||
|
count_limit: u64,
|
||||||
|
) -> Vec<(i64, String)> {
|
||||||
|
let engines_filter = if !allowed_engines.is_empty() {
|
||||||
|
format!(
|
||||||
|
"AND ({})",
|
||||||
|
allowed_engines
|
||||||
|
.iter()
|
||||||
|
.map(|e| "ps.parse_engine = ".to_owned() + &e.to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" OR ")
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
let query = format!(
|
||||||
"
|
"
|
||||||
SELECT ei.item_id, ei.title FROM {} AS ei
|
SELECT listing.item_id, listing.title FROM {0} AS listing
|
||||||
LEFT JOIN {} AS sp ON ei.item_id = sp.item
|
WHERE NOT EXISTS (
|
||||||
WHERE sp.item IS NULL",
|
SELECT 1 FROM {1} AS ps
|
||||||
|
WHERE listing.item_id = ps.item {engines_filter}
|
||||||
|
)
|
||||||
|
LIMIT {count_limit}
|
||||||
|
",
|
||||||
Self::TABLE_NAME,
|
Self::TABLE_NAME,
|
||||||
ParsedStorage::TABLE_NAME
|
ParsedStorage::TABLE_NAME
|
||||||
))
|
);
|
||||||
|
conn.prepare(&query)
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap();
|
.unwrap()
|
||||||
stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
|
.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|e| e.unwrap())
|
.map(|e| e.unwrap())
|
||||||
@@ -565,7 +613,7 @@ pub fn listings_get_filtered(
|
|||||||
history: ItemAppearances::lookup(conn, l.item_id),
|
history: ItemAppearances::lookup(conn, l.item_id),
|
||||||
parsed: ParsedStorage::lookup(conn, l.item_id),
|
parsed: ParsedStorage::lookup(conn, l.item_id),
|
||||||
})
|
})
|
||||||
.filter(|lr| lr.parsed.iter().any(|p| !p.needed_description_check))
|
.filter(|lr| lr.parsed.iter().any(|p| p.failed_reason.is_empty()))
|
||||||
.collect::<Vec<ListingsFilterResult>>();
|
.collect::<Vec<ListingsFilterResult>>();
|
||||||
info!(
|
info!(
|
||||||
"Found total {} listings since (str:{} epoch:{})",
|
"Found total {} listings since (str:{} epoch:{})",
|
||||||
@@ -614,6 +662,125 @@ pub fn listings_get_filtered(
|
|||||||
listings
|
listings
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[repr(i64)]
|
||||||
|
#[derive(Serialize, Debug, PartialEq, Copy, Clone, TryFromPrimitive)]
|
||||||
|
pub enum StorageLLMVersion {
|
||||||
|
Testing = 0,
|
||||||
|
Gemini2d5Prompt0 = 1,
|
||||||
|
}
|
||||||
|
impl ToSql for StorageLLMVersion {
|
||||||
|
fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
|
||||||
|
Ok((*self as i64).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl FromSql for StorageLLMVersion {
|
||||||
|
fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult<Self> {
|
||||||
|
let v = value.as_i64()?;
|
||||||
|
Self::try_from(v).map_err(|_| rusqlite::types::FromSqlError::OutOfRange(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is mostly meant as a way to cache all of these.
|
||||||
|
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||||
|
pub struct ParsedLLMStorageResult {
|
||||||
|
pub id: i64,
|
||||||
|
pub item_id: i64,
|
||||||
|
pub title: String,
|
||||||
|
pub quantity: i64,
|
||||||
|
pub gigabytes: i64,
|
||||||
|
pub fail_reason: String,
|
||||||
|
pub llm_id: StorageLLMVersion,
|
||||||
|
}
|
||||||
|
impl DBTable for ParsedLLMStorageResult {
|
||||||
|
const TABLE_NAME: &'static str = "ParsedLLMStorageResult";
|
||||||
|
const TABLE_SCHEMA: &'static str = "
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
item_id INTEGER NOT NULL UNIQUE,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
quantity INTEGER NOT NULL,
|
||||||
|
gigabytes INTEGER NOT NULL,
|
||||||
|
fail_reason TEXT NOT NULL,
|
||||||
|
llm_id INTEGER NOT NULL
|
||||||
|
";
|
||||||
|
|
||||||
|
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
||||||
|
let mut stmt = conn.prepare(&format!(
|
||||||
|
"SELECT id, item_id, title, quantity, gigabytes, fail_reason, llm_id FROM {}",
|
||||||
|
Self::TABLE_NAME
|
||||||
|
))?;
|
||||||
|
let iter = stmt.query_map([], |row| {
|
||||||
|
Ok(ParsedLLMStorageResult {
|
||||||
|
id: row.get(0)?,
|
||||||
|
item_id: row.get(1)?,
|
||||||
|
title: row.get(2)?,
|
||||||
|
quantity: row.get(3)?,
|
||||||
|
gigabytes: row.get(4)?,
|
||||||
|
fail_reason: row.get(5)?,
|
||||||
|
llm_id: row.get(6)?,
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for item in iter {
|
||||||
|
result.push(item?);
|
||||||
|
}
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl ParsedLLMStorageResult {
|
||||||
|
pub fn lookup(conn: &Connection, item_id: i64) -> Option<ParsedLLMStorageResult> {
|
||||||
|
let mut stmt = conn
|
||||||
|
.prepare(&format!(
|
||||||
|
"SELECT * FROM {} WHERE item_id = ?",
|
||||||
|
Self::TABLE_NAME
|
||||||
|
))
|
||||||
|
.ok()?;
|
||||||
|
stmt.query_one([item_id], |row| {
|
||||||
|
Ok(ParsedLLMStorageResult {
|
||||||
|
id: row.get(0)?,
|
||||||
|
item_id: row.get(1)?,
|
||||||
|
title: row.get(2)?,
|
||||||
|
quantity: row.get(3)?,
|
||||||
|
gigabytes: row.get(4)?,
|
||||||
|
fail_reason: row.get(5)?,
|
||||||
|
llm_id: row.get(6)?,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_or_update(&self, conn: &Connection) {
|
||||||
|
let count = conn
|
||||||
|
.execute(
|
||||||
|
&format!(
|
||||||
|
"INSERT OR REPLACE INTO {}
|
||||||
|
(
|
||||||
|
item_id,
|
||||||
|
title,
|
||||||
|
quantity,
|
||||||
|
gigabytes,
|
||||||
|
fail_reason,
|
||||||
|
llm_id
|
||||||
|
)
|
||||||
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
|
Self::TABLE_NAME
|
||||||
|
),
|
||||||
|
(
|
||||||
|
self.item_id,
|
||||||
|
&self.title,
|
||||||
|
self.quantity,
|
||||||
|
self.gigabytes,
|
||||||
|
self.fail_reason.clone(),
|
||||||
|
self.llm_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
if count != 1 {
|
||||||
|
panic!("Expected count to be 1 but got {}", count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_initialized(path: Option<&Path>) -> Connection {
|
pub fn get_initialized(path: Option<&Path>) -> Connection {
|
||||||
let conn = match path {
|
let conn = match path {
|
||||||
Some(p) => Connection::open(&p),
|
Some(p) => Connection::open(&p),
|
||||||
@@ -626,6 +793,7 @@ pub fn get_initialized(path: Option<&Path>) -> Connection {
|
|||||||
ParsedStorage::initialize(&conn);
|
ParsedStorage::initialize(&conn);
|
||||||
ParsedPage::initialize(&conn);
|
ParsedPage::initialize(&conn);
|
||||||
ItemAppearances::initialize(&conn);
|
ItemAppearances::initialize(&conn);
|
||||||
|
ParsedLLMStorageResult::initialize(&conn);
|
||||||
|
|
||||||
conn
|
conn
|
||||||
}
|
}
|
||||||
@@ -637,6 +805,7 @@ pub struct Stats {
|
|||||||
rows_parsed_storage: i64,
|
rows_parsed_storage: i64,
|
||||||
rows_parsed_page: i64,
|
rows_parsed_page: i64,
|
||||||
rows_item_appearances: i64,
|
rows_item_appearances: i64,
|
||||||
|
pub rows_parsed_storage_llm: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_stats(conn: &Connection) -> Stats {
|
pub fn get_stats(conn: &Connection) -> Stats {
|
||||||
@@ -646,6 +815,7 @@ pub fn get_stats(conn: &Connection) -> Stats {
|
|||||||
rows_parsed_storage: ParsedStorage::get_count(conn),
|
rows_parsed_storage: ParsedStorage::get_count(conn),
|
||||||
rows_parsed_page: ParsedPage::get_count(conn),
|
rows_parsed_page: ParsedPage::get_count(conn),
|
||||||
rows_item_appearances: ItemAppearances::get_count(conn),
|
rows_item_appearances: ItemAppearances::get_count(conn),
|
||||||
|
rows_parsed_storage_llm: ParsedLLMStorageResult::get_count(conn),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -667,7 +837,7 @@ mod tests {
|
|||||||
let listing = Listing {
|
let listing = Listing {
|
||||||
id: 1,
|
id: 1,
|
||||||
item_id: 1234,
|
item_id: 1234,
|
||||||
title: "Some Title".to_string(),
|
title: "Lot of 2 512GB SSD 6gb/s working with 5% wear".to_string(),
|
||||||
buy_it_now_price_cents: Some(123),
|
buy_it_now_price_cents: Some(123),
|
||||||
has_best_offer: false,
|
has_best_offer: false,
|
||||||
image_url: "google.com".to_string(),
|
image_url: "google.com".to_string(),
|
||||||
@@ -681,8 +851,8 @@ mod tests {
|
|||||||
total_gigabytes: 13,
|
total_gigabytes: 13,
|
||||||
quantity: 3,
|
quantity: 3,
|
||||||
individual_size_gigabytes: 13,
|
individual_size_gigabytes: 13,
|
||||||
parse_engine: 9,
|
parse_engine: StorageParsingEngineVersion::Testing,
|
||||||
needed_description_check: true,
|
failed_reason: "".to_owned(),
|
||||||
};
|
};
|
||||||
parsed.add_or_update(&db);
|
parsed.add_or_update(&db);
|
||||||
assert_eq!(ParsedStorage::lookup(&db, listing.item_id), vec![parsed]);
|
assert_eq!(ParsedStorage::lookup(&db, listing.item_id), vec![parsed]);
|
||||||
@@ -706,6 +876,21 @@ mod tests {
|
|||||||
vec![apperance]
|
vec![apperance]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let parsedllmstorage = ParsedLLMStorageResult {
|
||||||
|
fail_reason: "Some reason".to_owned(),
|
||||||
|
gigabytes: 12,
|
||||||
|
id: 1,
|
||||||
|
item_id: 12345,
|
||||||
|
quantity: 32,
|
||||||
|
title: "Some Title".to_owned(),
|
||||||
|
llm_id: StorageLLMVersion::Testing,
|
||||||
|
};
|
||||||
|
parsedllmstorage.add_or_update(&db);
|
||||||
|
assert_eq!(
|
||||||
|
ParsedLLMStorageResult::lookup(&db, parsedllmstorage.item_id),
|
||||||
|
Some(parsedllmstorage)
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(Listing::lookup_since(&db, page.timestamp, 3), vec![listing]);
|
assert_eq!(Listing::lookup_since(&db, page.timestamp, 3), vec![listing]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Listing::lookup_since(&db, page.timestamp + chrono::Duration::seconds(1), 3),
|
Listing::lookup_since(&db, page.timestamp + chrono::Duration::seconds(1), 3),
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
pub mod db;
|
pub mod db;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod parser_ebay;
|
pub mod parser_ebay;
|
||||||
pub mod parser_storage;
|
pub mod parser_storage_e0;
|
||||||
|
pub mod parser_storage_e1;
|
||||||
|
188
src/main.rs
188
src/main.rs
@@ -1,17 +1,16 @@
|
|||||||
use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use ebay_scraper_rust::db::{
|
use ebay_scraper_rust::db;
|
||||||
DBTable, ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
|
use ebay_scraper_rust::db::DBTable;
|
||||||
get_stats, listings_get_filtered,
|
|
||||||
};
|
|
||||||
use ebay_scraper_rust::parser::parse_dir;
|
use ebay_scraper_rust::parser::parse_dir;
|
||||||
use ebay_scraper_rust::parser_storage;
|
use ebay_scraper_rust::{parser_storage_e0, parser_storage_e1};
|
||||||
|
use futures::future::join_all;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use tracing::{info, instrument};
|
use tracing::{error, info, instrument};
|
||||||
|
|
||||||
use tracing_subscriber::filter::EnvFilter;
|
use tracing_subscriber::filter::EnvFilter;
|
||||||
use tracing_subscriber::fmt;
|
use tracing_subscriber::fmt;
|
||||||
@@ -28,6 +27,13 @@ mod xdg_dirs;
|
|||||||
)]
|
)]
|
||||||
struct Args {}
|
struct Args {}
|
||||||
|
|
||||||
|
struct AppCtx {
|
||||||
|
db: rusqlite::Connection,
|
||||||
|
db_llm: rusqlite::Connection,
|
||||||
|
download_dir: PathBuf,
|
||||||
|
llm_parser: Option<actix_web::rt::task::JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
#[derive(Deserialize, Debug)]
|
||||||
struct ListingsFilter {
|
struct ListingsFilter {
|
||||||
since: Option<i64>,
|
since: Option<i64>,
|
||||||
@@ -37,12 +43,12 @@ struct ListingsFilter {
|
|||||||
|
|
||||||
#[get("/listings")]
|
#[get("/listings")]
|
||||||
async fn listings_filtered_get(
|
async fn listings_filtered_get(
|
||||||
db: Data<Mutex<rusqlite::Connection>>,
|
ctx: Data<Mutex<AppCtx>>,
|
||||||
filter: web::Query<ListingsFilter>,
|
filter: web::Query<ListingsFilter>,
|
||||||
) -> Result<impl Responder> {
|
) -> Result<impl Responder> {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let res = listings_get_filtered(
|
let res = db::listings_get_filtered(
|
||||||
&db.lock().unwrap(),
|
&ctx.lock().unwrap().db,
|
||||||
&DateTime::<Utc>::from_timestamp(filter.since.unwrap_or(0), 0).unwrap(),
|
&DateTime::<Utc>::from_timestamp(filter.since.unwrap_or(0), 0).unwrap(),
|
||||||
filter.limit.unwrap_or(1_000),
|
filter.limit.unwrap_or(1_000),
|
||||||
filter.cents_per_tbytes_max.unwrap_or(100_00),
|
filter.cents_per_tbytes_max.unwrap_or(100_00),
|
||||||
@@ -57,19 +63,16 @@ async fn listings_filtered_get(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[get("/listing/{id}")]
|
#[get("/listing/{id}")]
|
||||||
async fn listing_get(
|
async fn listing_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
||||||
db: Data<Mutex<rusqlite::Connection>>,
|
Ok(web::Json(db::Listing::lookup(&ctx.lock().unwrap().db, *id)))
|
||||||
id: web::Path<i64>,
|
|
||||||
) -> Result<impl Responder> {
|
|
||||||
Ok(web::Json(Listing::lookup(&db.lock().unwrap(), *id)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/listing/{id}/parsed")]
|
#[get("/listing/{id}/parsed")]
|
||||||
async fn listing_parse_get(
|
async fn listing_parse_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
||||||
db: Data<Mutex<rusqlite::Connection>>,
|
Ok(web::Json(db::ParsedStorage::lookup(
|
||||||
id: web::Path<i64>,
|
&ctx.lock().unwrap().db,
|
||||||
) -> Result<impl Responder> {
|
*id,
|
||||||
Ok(web::Json(ParsedStorage::lookup(&db.lock().unwrap(), *id)))
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
@@ -80,10 +83,10 @@ struct APIHistory {
|
|||||||
|
|
||||||
#[get("/listing/{id}/history")]
|
#[get("/listing/{id}/history")]
|
||||||
async fn listing_history_get(
|
async fn listing_history_get(
|
||||||
db: Data<Mutex<rusqlite::Connection>>,
|
ctx: Data<Mutex<AppCtx>>,
|
||||||
id: web::Path<i64>,
|
id: web::Path<i64>,
|
||||||
) -> Result<impl Responder> {
|
) -> Result<impl Responder> {
|
||||||
let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id)
|
let history: Vec<_> = db::ItemAppearances::lookup(&ctx.lock().unwrap().db, *id)
|
||||||
.iter()
|
.iter()
|
||||||
// .inspect(|e| info!("got: {:?}", e))
|
// .inspect(|e| info!("got: {:?}", e))
|
||||||
.filter_map(|e| {
|
.filter_map(|e| {
|
||||||
@@ -96,36 +99,109 @@ async fn listing_history_get(
|
|||||||
Ok(web::Json(history))
|
Ok(web::Json(history))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[post("/listing/parse")]
|
async fn storage_parse_work(entries: &[(i64, String)]) -> Vec<db::ParsedStorage> {
|
||||||
async fn parse_listings(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
let llm_futures: Vec<_> = entries
|
||||||
let mut cnt = 0;
|
|
||||||
let db_unlocked = db.lock().unwrap();
|
|
||||||
Listing::lookup_non_parsed(&db_unlocked)
|
|
||||||
.iter()
|
.iter()
|
||||||
.map(|l| parser_storage::parse_size_and_quantity(l.0, &l.1))
|
.map(|(id, title)| parser_storage_e1::parse_size_and_quantity_llm(*id, title))
|
||||||
.inspect(|_| cnt = cnt + 1)
|
.collect();
|
||||||
.for_each(|ps| ps.add_or_update(&db_unlocked));
|
let llm_future_results = join_all(llm_futures).await;
|
||||||
|
let llm_results = llm_future_results
|
||||||
|
.iter()
|
||||||
|
.flatten()
|
||||||
|
.map(|e| db::ParsedStorage {
|
||||||
|
id: 0,
|
||||||
|
item: e.item_id,
|
||||||
|
total_gigabytes: e.quantity * e.gigabytes,
|
||||||
|
quantity: e.quantity,
|
||||||
|
individual_size_gigabytes: e.gigabytes,
|
||||||
|
failed_reason: e.fail_reason.clone(),
|
||||||
|
parse_engine: db::StorageParsingEngineVersion::LLM,
|
||||||
|
});
|
||||||
|
// .inspect(|e| e.add_or_update(&unlocked.db))
|
||||||
|
// .map(|e| db::ParsedStorage {
|
||||||
|
// id: 0,
|
||||||
|
// item: e.item_id,
|
||||||
|
// total_gigabytes: e.quantity * e.gigabytes,
|
||||||
|
// quantity: e.quantity,
|
||||||
|
// individual_size_gigabytes: e.gigabytes,
|
||||||
|
// needed_description_check: !e.fail_reason.is_empty(),
|
||||||
|
// parse_engine: db::StorageParsingEngineVersion::LLM,
|
||||||
|
// })
|
||||||
|
// .for_each(|e| e.add_or_update(&unlocked.db));
|
||||||
|
|
||||||
Ok(web::Json(cnt))
|
// And a regex based parse.
|
||||||
|
let regex_results = entries
|
||||||
|
.iter()
|
||||||
|
.map(|(id, title)| parser_storage_e0::parse_size_and_quantity(*id, &title));
|
||||||
|
// .for_each(|e| e.add_or_update(&unlocked.db));
|
||||||
|
|
||||||
|
regex_results.chain(llm_results).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn storage_parse_worker(ctx: Data<Mutex<AppCtx>>) -> actix_web::rt::task::JoinHandle<()> {
|
||||||
|
actix_web::rt::spawn(async move {
|
||||||
|
loop {
|
||||||
|
actix_web::rt::time::sleep(std::time::Duration::from_millis(1000)).await;
|
||||||
|
let ctx_unlocked = ctx.lock().unwrap();
|
||||||
|
let entries = db::Listing::lookup_pending_parse(&ctx_unlocked.db, &[], 10);
|
||||||
|
let parsed = storage_parse_work(entries.as_slice()).await;
|
||||||
|
for p in parsed {
|
||||||
|
p.add_or_update(&ctx_unlocked.db);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[post("/listing/parse")]
|
||||||
|
async fn parse_listings(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
|
// Prepare a background parser to go through and use an LLM to parse the
|
||||||
|
// storage info.
|
||||||
|
if ctx.lock().unwrap().llm_parser.is_none() {
|
||||||
|
ctx.clone().lock().unwrap().llm_parser = Some(storage_parse_worker(ctx.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lets grab a few entries and then try parsing them with two engines.
|
||||||
|
let ctx_locked = ctx.lock().unwrap();
|
||||||
|
let entries: Vec<_> = db::Listing::lookup_pending_parse(&ctx_locked.db, &[], 100)
|
||||||
|
.iter()
|
||||||
|
.take(10)
|
||||||
|
.map(|e| e.clone())
|
||||||
|
.collect();
|
||||||
|
for (item_id, title) in &entries {
|
||||||
|
let ps1 =
|
||||||
|
parser_storage_e1::parse_size_and_quantity(&ctx_locked.db_llm, *item_id, &title).await;
|
||||||
|
if ps1.is_some() {
|
||||||
|
// info!(
|
||||||
|
// "Parsed using an LLM title:{} and results:{:?}",
|
||||||
|
// title,
|
||||||
|
// ps1.unwrap()
|
||||||
|
// );
|
||||||
|
ps1.unwrap().add_or_update(&ctx_locked.db);
|
||||||
|
// ps1.unwrap().add_or_update(&ctx_locked.db_llm); No need
|
||||||
|
} else {
|
||||||
|
error!("Failed to parse {item_id} with title {title}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(web::Json(entries.len()))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/category")]
|
#[get("/category")]
|
||||||
async fn category_getnames(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
async fn category_getnames(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
Ok(web::Json(SearchURL::names(&db.lock().unwrap())))
|
Ok(web::Json(db::SearchURL::names(&ctx.lock().unwrap().db)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[post("/category/{category}/parse")]
|
#[post("/category/{category}/parse")]
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
async fn category_parse(
|
async fn category_parse(
|
||||||
db: Data<Mutex<rusqlite::Connection>>,
|
ctx: Data<Mutex<AppCtx>>,
|
||||||
downloaddir: Data<PathBuf>,
|
|
||||||
category: web::Path<String>,
|
category: web::Path<String>,
|
||||||
) -> Result<impl Responder> {
|
) -> Result<impl Responder> {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
let ctx_unlocked = ctx.lock().unwrap();
|
||||||
let count = parse_dir(
|
let count = parse_dir(
|
||||||
&downloaddir.join(category.clone()),
|
&ctx_unlocked.download_dir.join(category.clone()),
|
||||||
&category,
|
&category,
|
||||||
&db.lock().unwrap(),
|
&ctx_unlocked.db,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let elapsed = start.elapsed().as_micros() as f64 / 1000.0;
|
let elapsed = start.elapsed().as_micros() as f64 / 1000.0;
|
||||||
@@ -135,19 +211,22 @@ async fn category_parse(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[get("/stats")]
|
#[get("/stats")]
|
||||||
async fn stats_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
async fn stats_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
Ok(web::Json(get_stats(&db.lock().unwrap())))
|
let mut stats_db = db::get_stats(&ctx.lock().unwrap().db);
|
||||||
|
let stats_db_llm = db::get_stats(&ctx.lock().unwrap().db_llm);
|
||||||
|
stats_db.rows_parsed_storage_llm = stats_db_llm.rows_parsed_storage_llm;
|
||||||
|
Ok(web::Json(stats_db))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/admin")]
|
#[get("/admin")]
|
||||||
async fn admin_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
async fn admin_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
let db = db.lock().unwrap();
|
let ctx_locked = ctx.lock().unwrap();
|
||||||
let query_start_time = Instant::now();
|
let query_start_time = Instant::now();
|
||||||
let search_urls = SearchURL::get_all(&db).unwrap_or_default();
|
let search_urls = db::SearchURL::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let parsed_pages = ParsedPage::get_all(&db).unwrap_or_default();
|
let parsed_pages = db::ParsedPage::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let parsed_storages = ParsedStorage::get_all(&db).unwrap_or_default();
|
let parsed_storages = db::ParsedStorage::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let item_appearances = ItemAppearances::get_all(&db).unwrap_or_default();
|
let item_appearances = db::ItemAppearances::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let listings = Listing::get_all(&db).unwrap_or_default();
|
let listings = db::Listing::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
|
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
|
||||||
|
|
||||||
let html_gen_start_time = Instant::now();
|
let html_gen_start_time = Instant::now();
|
||||||
@@ -287,10 +366,22 @@ async fn main() -> std::io::Result<()> {
|
|||||||
"Starting with scraped data dir of \"{}\".",
|
"Starting with scraped data dir of \"{}\".",
|
||||||
scrapedatadir.to_str().unwrap()
|
scrapedatadir.to_str().unwrap()
|
||||||
);
|
);
|
||||||
let db_mutex = Data::new(Mutex::new(get_initialized(None)));
|
|
||||||
|
let app_data = Data::new(Mutex::new(AppCtx {
|
||||||
|
download_dir: scrapedatadir.clone(),
|
||||||
|
db: db::get_initialized(None),
|
||||||
|
db_llm: {
|
||||||
|
let db_path = scrapedatadir.with_file_name("llm.sqlite");
|
||||||
|
let db = rusqlite::Connection::open(&db_path).unwrap();
|
||||||
|
db::ParsedLLMStorageResult::initialize(&db);
|
||||||
|
info!("Created {:?} for caching LLM parsed title.", db_path);
|
||||||
|
db
|
||||||
|
},
|
||||||
|
llm_parser: None,
|
||||||
|
}));
|
||||||
|
|
||||||
// Prepare our backend via pulling in what catagories we are preconfigured with.
|
// Prepare our backend via pulling in what catagories we are preconfigured with.
|
||||||
SearchURL::scan(&db_mutex.lock().unwrap(), &scrapedatadir, "url.json");
|
db::SearchURL::scan(&app_data.lock().unwrap().db, &scrapedatadir, "url.json");
|
||||||
|
|
||||||
HttpServer::new(move || {
|
HttpServer::new(move || {
|
||||||
App::new()
|
App::new()
|
||||||
@@ -306,8 +397,7 @@ async fn main() -> std::io::Result<()> {
|
|||||||
.service(admin_get)
|
.service(admin_get)
|
||||||
.service(stats_get)
|
.service(stats_get)
|
||||||
// Stuff which is passed into every request.
|
// Stuff which is passed into every request.
|
||||||
.app_data(db_mutex.clone())
|
.app_data(app_data.clone())
|
||||||
.app_data(Data::new(scrapedatadir.clone()))
|
|
||||||
})
|
})
|
||||||
.bind(("0.0.0.0", 9876))?
|
.bind(("0.0.0.0", 9876))?
|
||||||
.run()
|
.run()
|
||||||
|
@@ -5,9 +5,10 @@ use crate::{
|
|||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_json;
|
use serde_json;
|
||||||
use std::path::Path;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
use std::{io::Read, path::Path};
|
||||||
use tracing::{debug, error, info};
|
use tracing::{debug, error, info};
|
||||||
|
use zstd;
|
||||||
|
|
||||||
fn timestamps_from_dir(path: &Path) -> Vec<i64> {
|
fn timestamps_from_dir(path: &Path) -> Vec<i64> {
|
||||||
if !std::fs::exists(path).expect("Directory must exist") {
|
if !std::fs::exists(path).expect("Directory must exist") {
|
||||||
@@ -20,17 +21,46 @@ fn timestamps_from_dir(path: &Path) -> Vec<i64> {
|
|||||||
std::fs::read_dir(path)
|
std::fs::read_dir(path)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|fpath| fpath.unwrap().path())
|
.map(|fpath| fpath.unwrap().path())
|
||||||
.filter_map(|fstem| {
|
.filter_map(|fname| {
|
||||||
fstem
|
// Turns out file_stem() doesn't handle multiple extensions and
|
||||||
.file_stem()
|
// file_prefix() is still in not stable.
|
||||||
.and_then(|s| s.to_str())
|
Some(fname.file_stem()?.to_str()?.split_once('.')?.0.to_owned())
|
||||||
.expect("Invalid file name")
|
|
||||||
.parse()
|
|
||||||
.ok()
|
|
||||||
})
|
})
|
||||||
|
.filter_map(|fname| fname.parse().ok())
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_timestamp_from_dir(
|
||||||
|
dir: &Path,
|
||||||
|
timestamp: &chrono::DateTime<chrono::Utc>,
|
||||||
|
) -> Option<String> {
|
||||||
|
// First check for the normal html version, which we can just read straight.
|
||||||
|
let page_path = dir.join(format!("{}.{}", timestamp.timestamp(), "html"));
|
||||||
|
if page_path.exists() {
|
||||||
|
return std::fs::read_to_string(&page_path)
|
||||||
|
.inspect_err(|e| error!("Failed reading {}, error:{e}", page_path.display()))
|
||||||
|
.ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
// And now if it's compresed but with zstd.
|
||||||
|
let page_path = dir.join(format!("{}.{}.{}", timestamp.timestamp(), "html", "zst"));
|
||||||
|
if page_path.exists() {
|
||||||
|
let f = std::fs::File::open(&page_path)
|
||||||
|
.inspect_err(|e| error!("Failed reading {}, error:{e}", page_path.display()))
|
||||||
|
.ok()?;
|
||||||
|
let mut s = String::new();
|
||||||
|
zstd::Decoder::new(f).ok()?.read_to_string(&mut s).ok()?;
|
||||||
|
return Some(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
error!(
|
||||||
|
"Failed to lookup file for timestamp {} in {}, bailing ...",
|
||||||
|
timestamp,
|
||||||
|
dir.display()
|
||||||
|
);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_dir(dir: &Path, category: &str, db: &rusqlite::Connection) -> Option<usize> {
|
pub fn parse_dir(dir: &Path, category: &str, db: &rusqlite::Connection) -> Option<usize> {
|
||||||
// Ensure the category is created.
|
// Ensure the category is created.
|
||||||
let url_fpath = dir.join("url.json");
|
let url_fpath = dir.join("url.json");
|
||||||
@@ -93,10 +123,7 @@ pub fn parse_dir(dir: &Path, category: &str, db: &rusqlite::Connection) -> Optio
|
|||||||
category: category.to_string(),
|
category: category.to_string(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let page_path = dir.join(format!("{}.html", ts.timestamp()));
|
let page_contents = read_timestamp_from_dir(dir, &ts)?;
|
||||||
let page_contents = std::fs::read_to_string(&page_path)
|
|
||||||
.inspect_err(|e| error!("Failed reading {}, error:{e}", page_path.display()))
|
|
||||||
.ok()?;
|
|
||||||
let elements =
|
let elements =
|
||||||
parser_ebay::parse_from_ebay_page(&page_contents, &ts, &category).unwrap();
|
parser_ebay::parse_from_ebay_page(&page_contents, &ts, &category).unwrap();
|
||||||
info!(
|
info!(
|
||||||
|
@@ -172,12 +172,22 @@ pub fn parse_from_ebay_page(
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use similar_asserts::assert_eq;
|
use similar_asserts::assert_eq;
|
||||||
|
use std::io::Read;
|
||||||
|
use zstd;
|
||||||
|
|
||||||
#[test_log::test]
|
#[test_log::test]
|
||||||
fn parse() {
|
fn parse() {
|
||||||
let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
|
let timestamp = chrono::DateTime::from_timestamp(1750369463, 0).unwrap();
|
||||||
let html = include_str!("../test_data/scraper/raw_scraped/ssd/1750369463.html");
|
let zstd = include_bytes!("../test_data/scraper/raw_scraped/ssd/1750369463.html.zst");
|
||||||
let parsed = parse_from_ebay_page(html, ×tamp, "ssd").unwrap();
|
let cursor = std::io::Cursor::new(zstd);
|
||||||
|
|
||||||
|
let mut html = String::new();
|
||||||
|
zstd::Decoder::new(cursor)
|
||||||
|
.unwrap()
|
||||||
|
.read_to_string(&mut html)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let parsed = parse_from_ebay_page(&html, ×tamp, "ssd").unwrap();
|
||||||
// assert_eq!(parsed.len(), 62);
|
// assert_eq!(parsed.len(), 62);
|
||||||
|
|
||||||
let parsed = parsed.first_chunk::<10>().unwrap();
|
let parsed = parsed.first_chunk::<10>().unwrap();
|
||||||
|
@@ -32,7 +32,7 @@ pub fn parse_size_and_quantity(item_id: i64, title: &str) -> ParsedStorage {
|
|||||||
let upper_title = title.to_uppercase();
|
let upper_title = title.to_uppercase();
|
||||||
let mut total_gb = 0i64;
|
let mut total_gb = 0i64;
|
||||||
let mut quantity = 1i64;
|
let mut quantity = 1i64;
|
||||||
let mut needed_description_check = false;
|
let mut failed_reason = String::new();
|
||||||
let mut individual_size_gb = 0i64;
|
let mut individual_size_gb = 0i64;
|
||||||
|
|
||||||
for pattern in EXPLICIT_QTY_PATTERNS.iter() {
|
for pattern in EXPLICIT_QTY_PATTERNS.iter() {
|
||||||
@@ -68,36 +68,35 @@ pub fn parse_size_and_quantity(item_id: i64, title: &str) -> ParsedStorage {
|
|||||||
if !unique_sizes_gb.is_empty() {
|
if !unique_sizes_gb.is_empty() {
|
||||||
individual_size_gb = unique_sizes_gb[0];
|
individual_size_gb = unique_sizes_gb[0];
|
||||||
if unique_sizes_gb.len() > 1 {
|
if unique_sizes_gb.len() > 1 {
|
||||||
needed_description_check = true;
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if SIZE_RANGE_REGEX.is_match(&upper_title) {
|
if SIZE_RANGE_REGEX.is_match(&upper_title) {
|
||||||
needed_description_check = true;
|
failed_reason = "No Size Given".to_owned();
|
||||||
}
|
}
|
||||||
if quantity > 1 && upper_title.contains("MIXED") {
|
if quantity > 1 && upper_title.contains("MIXED") {
|
||||||
needed_description_check = true;
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
}
|
}
|
||||||
if upper_title.contains("CHECK THE DESCRIPTION")
|
if upper_title.contains("CHECK THE DESCRIPTION")
|
||||||
|| upper_title.contains("CHECK DESCRIPTION")
|
|| upper_title.contains("CHECK DESCRIPTION")
|
||||||
|| upper_title.contains("SEE DESCRIPTION")
|
|| upper_title.contains("SEE DESCRIPTION")
|
||||||
{
|
{
|
||||||
if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 {
|
if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 {
|
||||||
needed_description_check = true;
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if upper_title.contains("READ") {
|
||||||
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
|
}
|
||||||
|
|
||||||
if individual_size_gb > 0 {
|
if individual_size_gb > 0 {
|
||||||
total_gb = individual_size_gb * quantity;
|
total_gb = individual_size_gb * quantity;
|
||||||
}
|
}
|
||||||
|
|
||||||
if quantity > 1 && total_gb == 0 && !size_matches.is_empty() {
|
if quantity > 1 && total_gb == 0 && !size_matches.is_empty() {
|
||||||
needed_description_check = true;
|
failed_reason = "No size given".to_owned();
|
||||||
}
|
|
||||||
|
|
||||||
if quantity == 1 && size_matches.len() == 1 && !needed_description_check {
|
|
||||||
// This condition is implicitly handled
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedStorage {
|
ParsedStorage {
|
||||||
@@ -106,8 +105,8 @@ pub fn parse_size_and_quantity(item_id: i64, title: &str) -> ParsedStorage {
|
|||||||
total_gigabytes: total_gb,
|
total_gigabytes: total_gb,
|
||||||
quantity,
|
quantity,
|
||||||
individual_size_gigabytes: individual_size_gb,
|
individual_size_gigabytes: individual_size_gb,
|
||||||
needed_description_check,
|
failed_reason: failed_reason,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,8 +124,8 @@ mod tests {
|
|||||||
total_gigabytes: 512 * 3,
|
total_gigabytes: 512 * 3,
|
||||||
quantity: 3,
|
quantity: 3,
|
||||||
individual_size_gigabytes: 512,
|
individual_size_gigabytes: 512,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -138,8 +137,8 @@ mod tests {
|
|||||||
total_gigabytes: 240,
|
total_gigabytes: 240,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 240,
|
individual_size_gigabytes: 240,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -151,8 +150,8 @@ mod tests {
|
|||||||
total_gigabytes: 1024,
|
total_gigabytes: 1024,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 1024,
|
individual_size_gigabytes: 1024,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: true,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
false, // Sadly this one fails :/
|
false, // Sadly this one fails :/
|
||||||
),
|
),
|
||||||
@@ -164,8 +163,8 @@ mod tests {
|
|||||||
total_gigabytes: 7 * 1024,
|
total_gigabytes: 7 * 1024,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 7 * 1024,
|
individual_size_gigabytes: 7 * 1024,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -177,8 +176,8 @@ mod tests {
|
|||||||
total_gigabytes: 6 * 256,
|
total_gigabytes: 6 * 256,
|
||||||
quantity: 6,
|
quantity: 6,
|
||||||
individual_size_gigabytes: 256,
|
individual_size_gigabytes: 256,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -190,8 +189,8 @@ mod tests {
|
|||||||
total_gigabytes: 1966,
|
total_gigabytes: 1966,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 1966,
|
individual_size_gigabytes: 1966,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
160
src/parser_storage_e1.rs
Normal file
160
src/parser_storage_e1.rs
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
use crate::db::{
|
||||||
|
ParsedLLMStorageResult, ParsedStorage, StorageLLMVersion, StorageParsingEngineVersion,
|
||||||
|
};
|
||||||
|
use reqwest::header::{AUTHORIZATION, CONTENT_TYPE};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::json;
|
||||||
|
use tracing::error;
|
||||||
|
|
||||||
|
// Given this prompt and a string of "(Lot of 6) Samsung MZ-VLB2560 256GB M.2NVMe Internal SSD
|
||||||
|
// (MZVLB256HBHQ-000H1)", we get 338 input tokens and 36 output tokens. Assuming no caching, then
|
||||||
|
// Gemini 2.5 Flash Lite at $0.10/M input and $0.40/M output, this would cost $0.0000338 Input,
|
||||||
|
// $0.0000144 Output, and $0.0000482 Total. Given 30,000 listings this would be $1.446.
|
||||||
|
const SYSTEM_PROMPT: &str = r#"
|
||||||
|
You will be given a product listing for one or more storage drives. You will return *ONLY* JSON strictly adhering to the same structure and key names as below. This means no backticks or markdown/markup. You will specify how many storage drives are included in the listing as a number (1, 2, 3, etc), the size in gigabytes of each drive as a number (rounding up if needed, so 1, 2, 3, etc), and lastly if the above cannot be provided due the the listing title being incomplete or confusing, a very short reason why.
|
||||||
|
|
||||||
|
Here is an example for a title of "Lot of 2, Crucial P3 Plus 1TB NVMe Internal M.2 SSD (CT1000P3PSSD8) - Barely used!";
|
||||||
|
{
|
||||||
|
"quantity": 2,
|
||||||
|
"gigabytes": 1024
|
||||||
|
"fail_reason": ""
|
||||||
|
}
|
||||||
|
|
||||||
|
And an example for an unclear title of "Pallet of assorted 128GB to 5TB drives";
|
||||||
|
{
|
||||||
|
"quantity": 0,
|
||||||
|
"gigabytes": 0,
|
||||||
|
"fail_reason": "multiple mixed sizes"
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
|
||||||
|
#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
|
||||||
|
struct LLMParsedResponse {
|
||||||
|
pub quantity: i64,
|
||||||
|
pub gigabytes: i64,
|
||||||
|
pub fail_reason: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
struct OpenAIResponse {
|
||||||
|
choices: Vec<OpenAIChoice>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
struct OpenAIChoice {
|
||||||
|
message: OpenAIMessage,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
struct OpenAIMessage {
|
||||||
|
content: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
const OPENAI_LLM_URL: &str = "https://badurl.hak8or.com/litellm_api/chat/completions";
|
||||||
|
#[cfg(not(test))]
|
||||||
|
const OPENAI_LLM_URL: &str = "https://ai.hak8or.com/litellm_api/chat/completions";
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
const OPENAI_LLM_API_KEY: &str = "Bearer sk-YmVlcC1ib29wLWEtcm9ib3Q";
|
||||||
|
#[cfg(not(test))]
|
||||||
|
const OPENAI_LLM_API_KEY: &str = "Bearer sk-HMGML94x2ag6ggOoDghSGA";
|
||||||
|
|
||||||
|
pub async fn parse_size_and_quantity_llm(
|
||||||
|
item_id: i64,
|
||||||
|
title: &str,
|
||||||
|
) -> Option<ParsedLLMStorageResult> {
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let req = client
|
||||||
|
.post(OPENAI_LLM_URL)
|
||||||
|
.header(CONTENT_TYPE, actix_web::mime::APPLICATION_JSON.to_string())
|
||||||
|
.header(AUTHORIZATION, OPENAI_LLM_API_KEY)
|
||||||
|
.body(
|
||||||
|
json!({
|
||||||
|
"model": "gemini-2.5-flash-lite",
|
||||||
|
"reasoning_effort": "disable",
|
||||||
|
"thinking": {"type": "disabled", "budget_tokens": 0},
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": SYSTEM_PROMPT },
|
||||||
|
{ "role": "user", "content": title }
|
||||||
|
]
|
||||||
|
})
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
|
let reply_body = req.send().await.ok()?.text().await.ok()?;
|
||||||
|
let repl_json: OpenAIResponse = serde_json::from_str(&reply_body).ok()?;
|
||||||
|
match repl_json.choices.len() {
|
||||||
|
0 => {
|
||||||
|
error!("When parsing title, LLM returned ZERO choices");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
1 => { /* Nothing to do */ }
|
||||||
|
a => error!("When parsing title, LLM returned {a}, >1 choices, using first!"),
|
||||||
|
}
|
||||||
|
let reply_parsed_storage_json: LLMParsedResponse =
|
||||||
|
serde_json::from_str(&repl_json.choices[0].message.content).ok()?;
|
||||||
|
|
||||||
|
if !reply_parsed_storage_json.fail_reason.is_empty() {
|
||||||
|
error!(
|
||||||
|
"Failed parsing item_id:{item_id}, title:{title}, due to reason:{}",
|
||||||
|
reply_parsed_storage_json.fail_reason
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(ParsedLLMStorageResult {
|
||||||
|
id: 0,
|
||||||
|
fail_reason: reply_parsed_storage_json.fail_reason.clone(),
|
||||||
|
gigabytes: reply_parsed_storage_json.gigabytes,
|
||||||
|
item_id,
|
||||||
|
quantity: reply_parsed_storage_json.quantity,
|
||||||
|
title: title.to_owned(),
|
||||||
|
llm_id: StorageLLMVersion::Gemini2d5Prompt0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since we can't have a hashmap in a const, and I don't want to play with
|
||||||
|
// making our parsed result struct contain a CoW string for fail_reason and
|
||||||
|
// title, we are stuck with this ...
|
||||||
|
pub fn parse_cached(item_id: i64, title: &str) -> Option<ParsedLLMStorageResult> {
|
||||||
|
match title {
|
||||||
|
"Lot of 2 512GB SSD 6gb/s working with 5% wear" => Some(ParsedLLMStorageResult {
|
||||||
|
id: 0,
|
||||||
|
item_id: item_id,
|
||||||
|
fail_reason: "".to_string(),
|
||||||
|
gigabytes: 512,
|
||||||
|
quantity: 2,
|
||||||
|
title: title.to_owned(),
|
||||||
|
llm_id: StorageLLMVersion::Testing,
|
||||||
|
}),
|
||||||
|
"Lot of 2 assorted SSD" => Some(ParsedLLMStorageResult {
|
||||||
|
id: 0,
|
||||||
|
fail_reason: "mixed sizes".to_owned(),
|
||||||
|
gigabytes: 0,
|
||||||
|
item_id,
|
||||||
|
quantity: 0,
|
||||||
|
title: title.to_owned(),
|
||||||
|
llm_id: StorageLLMVersion::Testing,
|
||||||
|
}),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses size and quantity information from an item title.
|
||||||
|
pub async fn parse_size_and_quantity(
|
||||||
|
db: &rusqlite::Connection,
|
||||||
|
item_id: i64,
|
||||||
|
title: &str,
|
||||||
|
) -> Option<ParsedStorage> {
|
||||||
|
let plsr = parse_size_and_quantity_llm(item_id, title).await?;
|
||||||
|
plsr.add_or_update(&db);
|
||||||
|
|
||||||
|
Some(ParsedStorage {
|
||||||
|
id: 0,
|
||||||
|
item: item_id,
|
||||||
|
total_gigabytes: plsr.quantity * plsr.gigabytes,
|
||||||
|
quantity: plsr.quantity,
|
||||||
|
individual_size_gigabytes: plsr.gigabytes,
|
||||||
|
failed_reason: plsr.fail_reason,
|
||||||
|
parse_engine: StorageParsingEngineVersion::LLM,
|
||||||
|
})
|
||||||
|
}
|
@@ -5,6 +5,10 @@ URL_PER_PAGE_240="&_ipg=240"
|
|||||||
URL_MIN_PRICE_USD_60="&_udlo=60.00"
|
URL_MIN_PRICE_USD_60="&_udlo=60.00"
|
||||||
URL_SEARCHTERM_NONE="&_nkw="
|
URL_SEARCHTERM_NONE="&_nkw="
|
||||||
URL_LOCATION_NORTHAMERICA="&LH_PrefLoc=3"
|
URL_LOCATION_NORTHAMERICA="&LH_PrefLoc=3"
|
||||||
|
URL_SORTBY_NEWLY_LISTED="&_sop=10"
|
||||||
|
URL_SORTBY_ENDING_SOONEST="&_sop=1"
|
||||||
|
URL_CATEGORY_MINIPC_ALLINONE="&_sacat=179"
|
||||||
|
URL_CATEGORY_SSD="&_sacat=175669"
|
||||||
URL_BASE_LISTING="https://www.ebay.com/sch/i.html?"
|
URL_BASE_LISTING="https://www.ebay.com/sch/i.html?"
|
||||||
|
|
||||||
if [ -z "${XDG_DATA_HOME}" ]; then
|
if [ -z "${XDG_DATA_HOME}" ]; then
|
||||||
@@ -12,20 +16,141 @@ if [ -z "${XDG_DATA_HOME}" ]; then
|
|||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
DIR_SSDS="$XDG_DATA_HOME/ebay_scraper/raw_scraped/ssd"
|
# Heh, so Ebay started to block my scraping efforts after a while. I couldn't
|
||||||
mkdir -p "$DIR_SSDS"
|
# get this working with wget, so in the end I decided to go for this route which
|
||||||
if [ ! -s "$DIR_SSDS/url.json" ]; then
|
# is quite ugly, but should work in the end. We effectively run a non headless
|
||||||
URL_CATEGORY_SSD="&_sacat=175669"
|
# version of a browser with various realistic headers and screen dimensions.
|
||||||
URL_SSDS="$URL_BASE_LISTING$URL_SEARCHTERM_NONE$URL_CATEGORY_SSD&_from=R40&_fsrp=1$URL_LOCATION_NORTHAMERICA&imm=1&_sop=10$URL_PER_PAGE_240"
|
# Lastly, we give the page an extra 5 seconds to run any potential javascript
|
||||||
echo "{\"url\": \"$URL_SSDS\"}" > "$DIR_SSDS/url.json"
|
# often used to counter scraping or bots.
|
||||||
fi
|
fetch_compress_save_html() {
|
||||||
wget -o "$DIR_SSDS/$(date +%s).html" "$(jq '.url' $DIR_SSDS/url.json)"
|
local url="$1"
|
||||||
|
local output_file="$2"
|
||||||
|
|
||||||
DIR_MINIPC="$XDG_DATA_HOME/ebay_scraper/raw_scraped/minipc"
|
echo Fetching $url
|
||||||
mkdir -p "$DIR_MINIPC"
|
xvfb-run --server-args="-screen 0 1024x768x24" \
|
||||||
if [ ! -s "$DIR_MINIPC/url.json" ]; then
|
uv run --with playwright --with playwright-stealth - $url <<'EOF' | zstd -z --ultra -19 -o $output_file
|
||||||
URL_CATEGORY_MINIPC_ALLINONE="&_sacat=179"
|
import asyncio
|
||||||
URL_MINIPC="$URL_BASE_LISTING$URL_SEARCHTERM_NONE$URL_CATEGORY_MINIPC_ALLINONE&_from=R40&_fsrp=1$URL_LOCATION_NORTHAMERICA&imm=1&_sop=10$URL_PER_PAGE_240"
|
import sys
|
||||||
echo "{\"url\": \"$URL_MINIPC\"}" > "$DIR_MINIPC/url.json"
|
from playwright.async_api import async_playwright
|
||||||
fi
|
from playwright_stealth import Stealth
|
||||||
wget -o "$DIR_MINIPC/$(date +%s).html" "$(jq '.url' $DIR_MINIPC/url.json)"
|
|
||||||
|
async def main():
|
||||||
|
async with Stealth().use_async(async_playwright()) as p:
|
||||||
|
browser = await p.chromium.launch(
|
||||||
|
executable_path='/usr/bin/chromium',
|
||||||
|
args=[
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-blink-features=AutomationControlled',
|
||||||
|
"--window-size=1901,1018"
|
||||||
|
],
|
||||||
|
headless=False
|
||||||
|
)
|
||||||
|
# Create context with user agent
|
||||||
|
context = await browser.new_context(
|
||||||
|
color_scheme=r"light",
|
||||||
|
locale=r"en-US,en;q=0.9",
|
||||||
|
user_agent=r"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
|
||||||
|
timezone_id=r"America/New_York",
|
||||||
|
extra_http_headers={
|
||||||
|
"origin": "https://www.ebay.com",
|
||||||
|
"accept": "*/*",
|
||||||
|
"accept-encoding": "gzip, deflate, br, zstd",
|
||||||
|
"cache-control": "no-cache",
|
||||||
|
"accept-language": "en-US,en;q=0.9"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
page = await context.new_page()
|
||||||
|
|
||||||
|
await page.goto(sys.argv[1], wait_until="domcontentloaded")
|
||||||
|
await page.wait_for_timeout(5000)
|
||||||
|
print(await page.content())
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
fetch() {
|
||||||
|
local name="$1"
|
||||||
|
local url_param="$2"
|
||||||
|
|
||||||
|
DIR="$XDG_DATA_HOME/scraper/raw_scraped/$name"
|
||||||
|
mkdir -p "$DIR"
|
||||||
|
if [ ! -s "$DIR/url.json" ]; then
|
||||||
|
local URL="$URL_BASE_LISTING$URL_SEARCHTERM_NONE$url_param&_from=R40&_fsrp=1$URL_LOCATION_NORTHAMERICA&imm=1$URL_PER_PAGE_240"
|
||||||
|
echo "{\"url\": \"$URL\"}" > "$DIR/url.json"
|
||||||
|
fi
|
||||||
|
|
||||||
|
URL_NEWEST="$(jq '.url' $DIR/url.json --raw-output)$URL_SORTBY_NEWLY_LISTED"
|
||||||
|
fetch_compress_save_html $URL_NEWEST "$DIR/$(date +%s).html.zst"
|
||||||
|
|
||||||
|
URL_ENDING="$(jq '.url' $DIR/url.json --raw-output)$URL_SORTBY_ENDING_SOONEST"
|
||||||
|
fetch_compress_save_html $URL_ENDING "$DIR/$(date +%s).html.zst"
|
||||||
|
}
|
||||||
|
|
||||||
|
fetch "ssd" "$URL_CATEGORY_SSD"
|
||||||
|
fetch "minipc" "$URL_CATEGORY_MINIPC_ALLINONE"
|
||||||
|
|
||||||
|
|
||||||
|
# If needing to do a mass compression;
|
||||||
|
# fd '\.html$' -x zstd -z --ultra -19 -o {}.zst {}
|
||||||
|
|
||||||
|
# If needing to purge bogus downloads
|
||||||
|
# fd --size -100K .html.zst -x ls -lah {}
|
||||||
|
# fd --size -100K .html.zst -x rm {}
|
||||||
|
|
||||||
|
# Level compression analysis;
|
||||||
|
#
|
||||||
|
# A single scraped result;
|
||||||
|
# for lvl in $(seq 3 22); zstd --compress --ultra -o 1755012328.html.zst$lvl -$lvl 1755012328.html; end
|
||||||
|
# 1755012328.html : 9.04% ( 2.60 MiB => 240 KiB, 1755012328.html.zst3)
|
||||||
|
# 1755012328.html : 9.04% ( 2.60 MiB => 240 KiB, 1755012328.html.zst4)
|
||||||
|
# 1755012328.html : 8.80% ( 2.60 MiB => 234 KiB, 1755012328.html.zst5)
|
||||||
|
# 1755012328.html : 8.58% ( 2.60 MiB => 228 KiB, 1755012328.html.zst6)
|
||||||
|
# 1755012328.html : 8.54% ( 2.60 MiB => 227 KiB, 1755012328.html.zst7)
|
||||||
|
# 1755012328.html : 8.45% ( 2.60 MiB => 225 KiB, 1755012328.html.zst8)
|
||||||
|
# 1755012328.html : 8.34% ( 2.60 MiB => 222 KiB, 1755012328.html.zst9)
|
||||||
|
# 1755012328.html : 8.30% ( 2.60 MiB => 221 KiB, 1755012328.html.zst10)
|
||||||
|
# 1755012328.html : 8.28% ( 2.60 MiB => 220 KiB, 1755012328.html.zst11)
|
||||||
|
# 1755012328.html : 8.28% ( 2.60 MiB => 220 KiB, 1755012328.html.zst12)
|
||||||
|
# 1755012328.html : 8.32% ( 2.60 MiB => 221 KiB, 1755012328.html.zst13)
|
||||||
|
# 1755012328.html : 8.29% ( 2.60 MiB => 221 KiB, 1755012328.html.zst14)
|
||||||
|
# 1755012328.html : 8.25% ( 2.60 MiB => 219 KiB, 1755012328.html.zst15)
|
||||||
|
# 1755012328.html : 8.28% ( 2.60 MiB => 220 KiB, 1755012328.html.zst16)
|
||||||
|
# 1755012328.html : 8.20% ( 2.60 MiB => 218 KiB, 1755012328.html.zst17)
|
||||||
|
# 1755012328.html : 8.23% ( 2.60 MiB => 219 KiB, 1755012328.html.zst18)
|
||||||
|
# 1755012328.html : 7.99% ( 2.60 MiB => 213 KiB, 1755012328.html.zst19)
|
||||||
|
# 1755012328.html : 7.99% ( 2.60 MiB => 213 KiB, 1755012328.html.zst20)
|
||||||
|
# 1755012328.html : 7.93% ( 2.60 MiB => 211 KiB, 1755012328.html.zst21)
|
||||||
|
# 1755012328.html : 7.91% ( 2.60 MiB => 211 KiB, 1755012328.html.zst22)
|
||||||
|
#
|
||||||
|
# Lets see if we get benefits tar'ing and them compressing;
|
||||||
|
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755012328.html
|
||||||
|
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755012331.html
|
||||||
|
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755015932.html
|
||||||
|
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755015929.html
|
||||||
|
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755019567.html
|
||||||
|
# -rw-r--r-- 1 hak8or users 2.6M Sep 1 10:11 ./1755019564.html
|
||||||
|
# -rw-r--r-- 1 hak8or users 16M Sep 1 12:23 175501.tar
|
||||||
|
# ➜ for lvl in $(seq 3 22); zstd --compress --ultra -o 175501.tar.$lvl -$lvl 175501.tar; end
|
||||||
|
# 175501.tar : 8.91% ( 15.6 MiB => 1.39 MiB, 175501.tar.3)
|
||||||
|
# 175501.tar : 8.92% ( 15.6 MiB => 1.39 MiB, 175501.tar.4)
|
||||||
|
# 175501.tar : 8.65% ( 15.6 MiB => 1.35 MiB, 175501.tar.5)
|
||||||
|
# 175501.tar : 8.42% ( 15.6 MiB => 1.31 MiB, 175501.tar.6)
|
||||||
|
# 175501.tar : 8.36% ( 15.6 MiB => 1.30 MiB, 175501.tar.7)
|
||||||
|
# 175501.tar : 8.25% ( 15.6 MiB => 1.28 MiB, 175501.tar.8)
|
||||||
|
# 175501.tar : 5.36% ( 15.6 MiB => 854 KiB, 175501.tar.9)
|
||||||
|
# 175501.tar : 5.32% ( 15.6 MiB => 847 KiB, 175501.tar.10)
|
||||||
|
# 175501.tar : 5.30% ( 15.6 MiB => 844 KiB, 175501.tar.11)
|
||||||
|
# 175501.tar : 5.30% ( 15.6 MiB => 844 KiB, 175501.tar.12)
|
||||||
|
# 175501.tar : 5.48% ( 15.6 MiB => 872 KiB, 175501.tar.13)
|
||||||
|
# 175501.tar : 5.42% ( 15.6 MiB => 864 KiB, 175501.tar.14)
|
||||||
|
# 175501.tar : 5.19% ( 15.6 MiB => 828 KiB, 175501.tar.15)
|
||||||
|
# 175501.tar : 5.31% ( 15.6 MiB => 845 KiB, 175501.tar.16)
|
||||||
|
# 175501.tar : 5.01% ( 15.6 MiB => 798 KiB, 175501.tar.17)
|
||||||
|
# 175501.tar : 5.04% ( 15.6 MiB => 803 KiB, 175501.tar.18)
|
||||||
|
# 175501.tar : 4.84% ( 15.6 MiB => 771 KiB, 175501.tar.19)
|
||||||
|
# 175501.tar : 4.79% ( 15.6 MiB => 764 KiB, 175501.tar.20)
|
||||||
|
# 175501.tar : 4.74% ( 15.6 MiB => 755 KiB, 175501.tar.21)
|
||||||
|
# 175501.tar : 4.73% ( 15.6 MiB => 753 KiB, 175501.tar.22)
|
||||||
|
File diff suppressed because one or more lines are too long
BIN
test_data/scraper/raw_scraped/ssd/1750369463.html.zst
Normal file
BIN
test_data/scraper/raw_scraped/ssd/1750369463.html.zst
Normal file
Binary file not shown.
Reference in New Issue
Block a user