Add LLM based parsing
All checks were successful
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 4m11s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m34s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 5m14s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 11m4s
All checks were successful
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 4m11s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m34s
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 5m14s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 11m4s
This commit is contained in:
601
Cargo.lock
generated
601
Cargo.lock
generated
@@ -39,8 +39,8 @@ dependencies = [
|
||||
"flate2",
|
||||
"foldhash",
|
||||
"futures-core",
|
||||
"h2",
|
||||
"http",
|
||||
"h2 0.3.26",
|
||||
"http 0.2.12",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
@@ -76,7 +76,7 @@ checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8"
|
||||
dependencies = [
|
||||
"bytestring",
|
||||
"cfg-if",
|
||||
"http",
|
||||
"http 0.2.12",
|
||||
"regex",
|
||||
"regex-lite",
|
||||
"serde",
|
||||
@@ -289,6 +289,12 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-waker"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
@@ -491,6 +497,16 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.7"
|
||||
@@ -682,6 +698,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"rayon",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"rusqlite",
|
||||
"scraper",
|
||||
"serde",
|
||||
@@ -747,6 +764,16 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.3.0"
|
||||
@@ -759,6 +786,12 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.2"
|
||||
@@ -781,6 +814,21 @@ version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
|
||||
dependencies = [
|
||||
"foreign-types-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types-shared"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.1"
|
||||
@@ -800,12 +848,28 @@ dependencies = [
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.31"
|
||||
@@ -825,9 +889,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -898,7 +966,26 @@ dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http 0.2.12",
|
||||
"indexmap",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.4.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
|
||||
dependencies = [
|
||||
"atomic-waker",
|
||||
"bytes",
|
||||
"fnv",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"http 1.3.1",
|
||||
"indexmap",
|
||||
"slab",
|
||||
"tokio",
|
||||
@@ -953,6 +1040,40 @@ dependencies = [
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http 1.3.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body-util"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"http 1.3.1",
|
||||
"http-body",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httparse"
|
||||
version = "1.10.1"
|
||||
@@ -965,6 +1086,86 @@ version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e"
|
||||
dependencies = [
|
||||
"atomic-waker",
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"h2 0.4.12",
|
||||
"http 1.3.1",
|
||||
"http-body",
|
||||
"httparse",
|
||||
"itoa",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"smallvec",
|
||||
"tokio",
|
||||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-rustls"
|
||||
version = "0.27.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
|
||||
dependencies = [
|
||||
"http 1.3.1",
|
||||
"hyper",
|
||||
"hyper-util",
|
||||
"rustls",
|
||||
"rustls-pki-types",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-tls"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper-util",
|
||||
"native-tls",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-util"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"ipnet",
|
||||
"libc",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"windows-registry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.63"
|
||||
@@ -1112,6 +1313,22 @@ dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
|
||||
|
||||
[[package]]
|
||||
name = "iri-string"
|
||||
version = "0.7.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
@@ -1183,6 +1400,12 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
|
||||
|
||||
[[package]]
|
||||
name = "litemap"
|
||||
version = "0.8.0"
|
||||
@@ -1295,6 +1518,23 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"openssl",
|
||||
"openssl-probe",
|
||||
"openssl-sys",
|
||||
"schannel",
|
||||
"security-framework",
|
||||
"security-framework-sys",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.6"
|
||||
@@ -1347,6 +1587,50 @@ version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.10.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"foreign-types",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"openssl-macros",
|
||||
"openssl-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-probe"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.9.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "option-ext"
|
||||
version = "0.2.0"
|
||||
@@ -1646,6 +1930,62 @@ version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.12.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bytes",
|
||||
"encoding_rs",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2 0.4.12",
|
||||
"http 1.3.1",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper-rustls",
|
||||
"hyper-tls",
|
||||
"hyper-util",
|
||||
"js-sys",
|
||||
"log",
|
||||
"mime",
|
||||
"native-tls",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls-pki-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tower",
|
||||
"tower-http",
|
||||
"tower-service",
|
||||
"url",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"getrandom 0.2.16",
|
||||
"libc",
|
||||
"untrusted",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rusqlite"
|
||||
version = "0.36.0"
|
||||
@@ -1667,6 +2007,52 @@ version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.23.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rustls-pki-types",
|
||||
"rustls-webpki",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pki-types"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
|
||||
dependencies = [
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-webpki"
|
||||
version = "0.103.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"rustls-pki-types",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.21"
|
||||
@@ -1679,6 +2065,15 @@ version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
||||
|
||||
[[package]]
|
||||
name = "schannel"
|
||||
version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d"
|
||||
dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
@@ -1700,6 +2095,29 @@ dependencies = [
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "2.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"core-foundation",
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
"security-framework-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework-sys"
|
||||
version = "2.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.26.0"
|
||||
@@ -1892,6 +2310,12 @@ version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.103"
|
||||
@@ -1903,6 +2327,15 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sync_wrapper"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "synstructure"
|
||||
version = "0.13.2"
|
||||
@@ -1914,6 +2347,40 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"core-foundation",
|
||||
"system-configuration-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration-sys"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"getrandom 0.3.3",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.3"
|
||||
@@ -2034,6 +2501,26 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-native-tls"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.26.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
|
||||
dependencies = [
|
||||
"rustls",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-util"
|
||||
version = "0.7.15"
|
||||
@@ -2047,6 +2534,51 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"sync_wrapper",
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-http"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.3.1",
|
||||
"http-body",
|
||||
"iri-string",
|
||||
"pin-project-lite",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-layer"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
|
||||
|
||||
[[package]]
|
||||
name = "tower-service"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.41"
|
||||
@@ -2109,6 +2641,12 @@ dependencies = [
|
||||
"tracing-log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "try-lock"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.18.0"
|
||||
@@ -2139,6 +2677,12 @@ version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.5.4"
|
||||
@@ -2186,6 +2730,15 @@ version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||
|
||||
[[package]]
|
||||
name = "want"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
|
||||
dependencies = [
|
||||
"try-lock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.1+wasi-snapshot-preview1"
|
||||
@@ -2227,6 +2780,19 @@ dependencies = [
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-futures"
|
||||
version = "0.4.50"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.100"
|
||||
@@ -2259,6 +2825,16 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "web-sys"
|
||||
version = "0.3.77"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
@@ -2322,6 +2898,17 @@ version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||
|
||||
[[package]]
|
||||
name = "windows-registry"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
"windows-result",
|
||||
"windows-strings",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.3.4"
|
||||
@@ -2502,6 +3089,12 @@ dependencies = [
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zeroize"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
|
||||
|
||||
[[package]]
|
||||
name = "zerotrie"
|
||||
version = "0.2.2"
|
||||
|
@@ -11,6 +11,7 @@ dirs = "6.0.0"
|
||||
lazy_static = "1.5.0"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.23", features = ["blocking"] }
|
||||
rusqlite = { version = "0.36.0", features = ["bundled", "chrono"] }
|
||||
scraper = "0.23.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
|
110
readme.md
110
readme.md
@@ -10,3 +10,113 @@ echo run2 && http GET "$URL_BASE/listing/since/12345678/2" && \
|
||||
echo run3 && http GET "$URL_BASE/listing/388484391867" && \
|
||||
echo run4 && http GET "$URL_BASE/listing/286605201240/history"
|
||||
```
|
||||
|
||||
And some jq usage for raw interaction of the data;
|
||||
```bash
|
||||
# Download a bunch of listings.
|
||||
http https://scraper.hak8or.com/api/listings since==0 limit==20 > listings.json
|
||||
|
||||
# Show what a single listing looks like.
|
||||
listings.json | jq '.[0]'
|
||||
{
|
||||
"listing": {
|
||||
"id": 22563,
|
||||
"item_id": 286707621236,
|
||||
"title": "WD_BLACK SN770M 2TB M.2 NVMe Internal SSD (WDBDNH0020BBK-WRSN)",
|
||||
"buy_it_now_price_cents": null,
|
||||
"has_best_offer": false,
|
||||
"image_url": "https://i.ebayimg.com/images/g/It4AAeSwzz5oddoa/s-l140.jpg"
|
||||
},
|
||||
"history": [
|
||||
{
|
||||
"item": 286707621236,
|
||||
"timestamp": "2025-07-15T04:46:54Z",
|
||||
"category": "ssd",
|
||||
"current_bid_usd_cents": 12900
|
||||
}
|
||||
],
|
||||
"parsed": [
|
||||
{
|
||||
"id": 6,
|
||||
"item": 286707621236,
|
||||
"total_gigabytes": 2048,
|
||||
"quantity": 1,
|
||||
"individual_size_gigabytes": 2048,
|
||||
"parse_engine": 0,
|
||||
"needed_description_check": false
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Show the 1st and 2nd items, but only grab a few specific entries.
|
||||
cat listings_small.json | jq '[.[1:3][] | {
|
||||
item_id: .listing.item_id,
|
||||
title: .listing.title,
|
||||
parsed: .parsed[] | {
|
||||
total_gigabytes,
|
||||
quantity,
|
||||
individual_size_gigabytes
|
||||
}
|
||||
}]'
|
||||
[
|
||||
{
|
||||
"item_id": 297545995095,
|
||||
"title": "Crucial P3 Plus 1TB NVMe Internal M.2 SSD (CT1000P3PSSD8) - Barely used!",
|
||||
"parsed": {
|
||||
"total_gigabytes": 1024,
|
||||
"quantity": 1,
|
||||
"individual_size_gigabytes": 1024
|
||||
}
|
||||
},
|
||||
{
|
||||
"item_id": 127220979797,
|
||||
"title": "Kingston NV2 2TB M.2 3500MG/S NVMe Internal SSD PCIe 4.0 Gen SNV2S/2000G C-/#qWT",
|
||||
"parsed": {
|
||||
"total_gigabytes": 2048,
|
||||
"quantity": 1,
|
||||
"individual_size_gigabytes": 2048
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
And now a LLM based parse, such that the prompt is this (189 Tokens for Gemini 2.5 Flash Lite)
|
||||
```
|
||||
I will provide you with a listing title I want you to analyse. Then you will tell me the total gigabytes of all drives listed in the listing, how many drives are specified in the title, and the gigabytes of each drive in the listing. Here is an example for a title of "Crucial P3 Plus 1TB NVMe Internal M.2 SSD (CT1000P3PSSD8) - Barely used!";
|
||||
```
|
||||
{
|
||||
"total_gigabytes": 1024,
|
||||
"quantity": 1,
|
||||
"individual_size_gigabytes": 1024
|
||||
}
|
||||
```
|
||||
Reply with "OK" (and _only_ "OK") if you understand this. After you reply with that, I will provide you with a title, and then you will reply with solely the requested json (and ONLY said json).
|
||||
```
|
||||
|
||||
And passing a title of (30 tokens);
|
||||
```
|
||||
Lot Of 3 Western Digital PC SN740 512GB M.2 2230 NVMe Internal SSD
|
||||
```
|
||||
returns the following json of (41 tokens);
|
||||
```json
|
||||
{
|
||||
"total_gigabytes": 1536,
|
||||
"quantity": 3,
|
||||
"individual_size_gigabytes": 512
|
||||
}
|
||||
```
|
||||
|
||||
and another example of sending (49 tokens)
|
||||
```
|
||||
(Lot of 6) Samsung MZ-VLB2560 256GB M.2 NVMe Internal SSD (MZVLB256HBHQ-000H1)
|
||||
```
|
||||
returns the following json of (42 tokens);
|
||||
```json
|
||||
{
|
||||
"total_gigabytes": 1536,
|
||||
"quantity": 6,
|
||||
"individual_size_gigabytes": 256
|
||||
}
|
||||
```
|
||||
|
||||
So for 1 listing we have a 189 Token "System Prompt", then a ~45 token title prompt, and 42 Token parsed reply. Given 30,000 listings, that's 5,670,000 Token "System Prompt" as Input, 1,350,000 Token Title prompt as Input, and 1,260,000 Token Parsed information (output). Assuming Gemini 2.5 Flash Mini which is $0.10/M for input and $0.40/M for output, would pay $0.702 for input and $0.504 for output, or $1.206 total.
|
||||
|
112
src/db.rs
112
src/db.rs
@@ -614,6 +614,101 @@ pub fn listings_get_filtered(
|
||||
listings
|
||||
}
|
||||
|
||||
// This is mostly meant as a way to cache all of these.
|
||||
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||
pub struct ParsedLLMStorageResult {
|
||||
pub id: i64,
|
||||
pub item_id: i64,
|
||||
pub title: String,
|
||||
pub quantity: i64,
|
||||
pub gigabytes: i64,
|
||||
pub fail_reason: String,
|
||||
}
|
||||
impl DBTable for ParsedLLMStorageResult {
|
||||
const TABLE_NAME: &'static str = "ParsedLLMStorageResult";
|
||||
const TABLE_SCHEMA: &'static str = "
|
||||
id INTEGER PRIMARY KEY,
|
||||
item_id INTEGER NOT NULL UNIQUE,
|
||||
title TEXT NOT NULL,
|
||||
quantity INTEGER NOT NULL,
|
||||
gigabytes INTEGER NOT NULL,
|
||||
fail_reason TEXT NOT NULL
|
||||
";
|
||||
|
||||
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
||||
let mut stmt = conn.prepare(&format!(
|
||||
"SELECT id, item_id, title, quantity, gigabytes, fail_reason FROM {}",
|
||||
Self::TABLE_NAME
|
||||
))?;
|
||||
let iter = stmt.query_map([], |row| {
|
||||
Ok(ParsedLLMStorageResult {
|
||||
id: row.get(0)?,
|
||||
item_id: row.get(1)?,
|
||||
title: row.get(2)?,
|
||||
quantity: row.get(3)?,
|
||||
gigabytes: row.get(4)?,
|
||||
fail_reason: row.get(5)?,
|
||||
})
|
||||
})?;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for item in iter {
|
||||
result.push(item?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
impl ParsedLLMStorageResult {
|
||||
pub fn lookup(conn: &Connection, item_id: i64) -> Option<ParsedLLMStorageResult> {
|
||||
let mut stmt = conn
|
||||
.prepare(&format!(
|
||||
"SELECT * FROM {} WHERE item_id = ?",
|
||||
Self::TABLE_NAME
|
||||
))
|
||||
.ok()?;
|
||||
stmt.query_one([item_id], |row| {
|
||||
Ok(ParsedLLMStorageResult {
|
||||
id: row.get(0)?,
|
||||
item_id: row.get(1)?,
|
||||
title: row.get(2)?,
|
||||
quantity: row.get(3)?,
|
||||
gigabytes: row.get(4)?,
|
||||
fail_reason: row.get(5)?,
|
||||
})
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
|
||||
pub fn add_or_update(&self, conn: &Connection) {
|
||||
let count = conn
|
||||
.execute(
|
||||
&format!(
|
||||
"INSERT OR REPLACE INTO {}
|
||||
(
|
||||
item_id,
|
||||
title,
|
||||
quantity,
|
||||
gigabytes,
|
||||
fail_reason
|
||||
)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5)",
|
||||
Self::TABLE_NAME
|
||||
),
|
||||
(
|
||||
self.item_id,
|
||||
&self.title,
|
||||
self.quantity,
|
||||
self.gigabytes,
|
||||
self.fail_reason.clone(),
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
if count != 1 {
|
||||
panic!("Expected count to be 1 but got {}", count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_initialized(path: Option<&Path>) -> Connection {
|
||||
let conn = match path {
|
||||
Some(p) => Connection::open(&p),
|
||||
@@ -626,6 +721,7 @@ pub fn get_initialized(path: Option<&Path>) -> Connection {
|
||||
ParsedStorage::initialize(&conn);
|
||||
ParsedPage::initialize(&conn);
|
||||
ItemAppearances::initialize(&conn);
|
||||
ParsedLLMStorageResult::initialize(&conn);
|
||||
|
||||
conn
|
||||
}
|
||||
@@ -637,6 +733,7 @@ pub struct Stats {
|
||||
rows_parsed_storage: i64,
|
||||
rows_parsed_page: i64,
|
||||
rows_item_appearances: i64,
|
||||
// pub rows_parsed_storage_llm: i64,
|
||||
}
|
||||
|
||||
pub fn get_stats(conn: &Connection) -> Stats {
|
||||
@@ -646,6 +743,7 @@ pub fn get_stats(conn: &Connection) -> Stats {
|
||||
rows_parsed_storage: ParsedStorage::get_count(conn),
|
||||
rows_parsed_page: ParsedPage::get_count(conn),
|
||||
rows_item_appearances: ItemAppearances::get_count(conn),
|
||||
// rows_parsed_storage_llm: ParsedLLMStorageResult::get_count(conn),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -706,6 +804,20 @@ mod tests {
|
||||
vec![apperance]
|
||||
);
|
||||
|
||||
let parsedllmstorage = ParsedLLMStorageResult {
|
||||
fail_reason: "Some reason".to_owned(),
|
||||
gigabytes: 12,
|
||||
id: 1,
|
||||
item_id: 12345,
|
||||
quantity: 32,
|
||||
title: "Some Title".to_owned(),
|
||||
};
|
||||
parsedllmstorage.add_or_update(&db);
|
||||
assert_eq!(
|
||||
ParsedLLMStorageResult::lookup(&db, parsedllmstorage.item_id),
|
||||
Some(parsedllmstorage)
|
||||
);
|
||||
|
||||
assert_eq!(Listing::lookup_since(&db, page.timestamp, 3), vec![listing]);
|
||||
assert_eq!(
|
||||
Listing::lookup_since(&db, page.timestamp + chrono::Duration::seconds(1), 3),
|
||||
|
@@ -1,4 +1,5 @@
|
||||
pub mod db;
|
||||
pub mod parser;
|
||||
pub mod parser_ebay;
|
||||
pub mod parser_storage;
|
||||
pub mod parser_storage_e0;
|
||||
pub mod parser_storage_e1;
|
||||
|
122
src/main.rs
122
src/main.rs
@@ -2,16 +2,16 @@ use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
||||
use chrono::{DateTime, Utc};
|
||||
use clap::Parser;
|
||||
use ebay_scraper_rust::db::{
|
||||
DBTable, ItemAppearances, Listing, ParsedPage, ParsedStorage, SearchURL, get_initialized,
|
||||
get_stats, listings_get_filtered,
|
||||
DBTable, ItemAppearances, Listing, ParsedLLMStorageResult, ParsedPage, ParsedStorage,
|
||||
SearchURL, get_initialized, get_stats, listings_get_filtered,
|
||||
};
|
||||
use ebay_scraper_rust::parser::parse_dir;
|
||||
use ebay_scraper_rust::parser_storage;
|
||||
use ebay_scraper_rust::{parser_storage_e0, parser_storage_e1};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Mutex;
|
||||
use std::time::Instant;
|
||||
use tracing::{info, instrument};
|
||||
use tracing::{error, info, instrument};
|
||||
|
||||
use tracing_subscriber::filter::EnvFilter;
|
||||
use tracing_subscriber::fmt;
|
||||
@@ -28,6 +28,12 @@ mod xdg_dirs;
|
||||
)]
|
||||
struct Args {}
|
||||
|
||||
struct AppCtx {
|
||||
db: rusqlite::Connection,
|
||||
db_llm: rusqlite::Connection,
|
||||
download_dir: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct ListingsFilter {
|
||||
since: Option<i64>,
|
||||
@@ -37,12 +43,12 @@ struct ListingsFilter {
|
||||
|
||||
#[get("/listings")]
|
||||
async fn listings_filtered_get(
|
||||
db: Data<Mutex<rusqlite::Connection>>,
|
||||
ctx: Data<Mutex<AppCtx>>,
|
||||
filter: web::Query<ListingsFilter>,
|
||||
) -> Result<impl Responder> {
|
||||
let start = Instant::now();
|
||||
let res = listings_get_filtered(
|
||||
&db.lock().unwrap(),
|
||||
&ctx.lock().unwrap().db,
|
||||
&DateTime::<Utc>::from_timestamp(filter.since.unwrap_or(0), 0).unwrap(),
|
||||
filter.limit.unwrap_or(1_000),
|
||||
filter.cents_per_tbytes_max.unwrap_or(100_00),
|
||||
@@ -57,19 +63,16 @@ async fn listings_filtered_get(
|
||||
}
|
||||
|
||||
#[get("/listing/{id}")]
|
||||
async fn listing_get(
|
||||
db: Data<Mutex<rusqlite::Connection>>,
|
||||
id: web::Path<i64>,
|
||||
) -> Result<impl Responder> {
|
||||
Ok(web::Json(Listing::lookup(&db.lock().unwrap(), *id)))
|
||||
async fn listing_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
||||
Ok(web::Json(Listing::lookup(&ctx.lock().unwrap().db, *id)))
|
||||
}
|
||||
|
||||
#[get("/listing/{id}/parsed")]
|
||||
async fn listing_parse_get(
|
||||
db: Data<Mutex<rusqlite::Connection>>,
|
||||
id: web::Path<i64>,
|
||||
) -> Result<impl Responder> {
|
||||
Ok(web::Json(ParsedStorage::lookup(&db.lock().unwrap(), *id)))
|
||||
async fn listing_parse_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
||||
Ok(web::Json(ParsedStorage::lookup(
|
||||
&ctx.lock().unwrap().db,
|
||||
*id,
|
||||
)))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -80,10 +83,10 @@ struct APIHistory {
|
||||
|
||||
#[get("/listing/{id}/history")]
|
||||
async fn listing_history_get(
|
||||
db: Data<Mutex<rusqlite::Connection>>,
|
||||
ctx: Data<Mutex<AppCtx>>,
|
||||
id: web::Path<i64>,
|
||||
) -> Result<impl Responder> {
|
||||
let history: Vec<_> = ItemAppearances::lookup(&db.lock().unwrap(), *id)
|
||||
let history: Vec<_> = ItemAppearances::lookup(&ctx.lock().unwrap().db, *id)
|
||||
.iter()
|
||||
// .inspect(|e| info!("got: {:?}", e))
|
||||
.filter_map(|e| {
|
||||
@@ -97,35 +100,52 @@ async fn listing_history_get(
|
||||
}
|
||||
|
||||
#[post("/listing/parse")]
|
||||
async fn parse_listings(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
||||
let mut cnt = 0;
|
||||
let db_unlocked = db.lock().unwrap();
|
||||
Listing::lookup_non_parsed(&db_unlocked)
|
||||
async fn parse_listings(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||
// Lets grab a few entries and then try parsing them with two engines.
|
||||
let ctx_locked = ctx.lock().unwrap();
|
||||
let entries: Vec<_> = Listing::lookup_non_parsed(&ctx_locked.db)
|
||||
.iter()
|
||||
.map(|l| parser_storage::parse_size_and_quantity(l.0, &l.1))
|
||||
.inspect(|_| cnt = cnt + 1)
|
||||
.for_each(|ps| ps.add_or_update(&db_unlocked));
|
||||
.take(10)
|
||||
.map(|e| e.clone())
|
||||
.collect();
|
||||
for (item_id, title) in &entries {
|
||||
let ps0 = parser_storage_e0::parse_size_and_quantity(*item_id, &title);
|
||||
ps0.add_or_update(&ctx_locked.db);
|
||||
|
||||
Ok(web::Json(cnt))
|
||||
let ps1 =
|
||||
parser_storage_e1::parse_size_and_quantity(&ctx_locked.db_llm, *item_id, &title).await;
|
||||
if ps1.is_some() {
|
||||
info!(
|
||||
"Parsed using an LLM title:{} and results:{:?}",
|
||||
title,
|
||||
ps1.unwrap()
|
||||
);
|
||||
ps1.unwrap().add_or_update(&ctx_locked.db);
|
||||
ps1.unwrap().add_or_update(&ctx_locked.db_llm);
|
||||
} else {
|
||||
error!("Failed to parse {item_id} with title {title}");
|
||||
}
|
||||
}
|
||||
Ok(web::Json(entries.len()))
|
||||
}
|
||||
|
||||
#[get("/category")]
|
||||
async fn category_getnames(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
||||
Ok(web::Json(SearchURL::names(&db.lock().unwrap())))
|
||||
async fn category_getnames(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||
Ok(web::Json(SearchURL::names(&ctx.lock().unwrap().db)))
|
||||
}
|
||||
|
||||
#[post("/category/{category}/parse")]
|
||||
#[instrument(skip_all)]
|
||||
async fn category_parse(
|
||||
db: Data<Mutex<rusqlite::Connection>>,
|
||||
downloaddir: Data<PathBuf>,
|
||||
ctx: Data<Mutex<AppCtx>>,
|
||||
category: web::Path<String>,
|
||||
) -> Result<impl Responder> {
|
||||
let start = Instant::now();
|
||||
let ctx_unlocked = ctx.lock().unwrap();
|
||||
let count = parse_dir(
|
||||
&downloaddir.join(category.clone()),
|
||||
&ctx_unlocked.download_dir.join(category.clone()),
|
||||
&category,
|
||||
&db.lock().unwrap(),
|
||||
&ctx_unlocked.db,
|
||||
)
|
||||
.unwrap();
|
||||
let elapsed = start.elapsed().as_micros() as f64 / 1000.0;
|
||||
@@ -135,19 +155,22 @@ async fn category_parse(
|
||||
}
|
||||
|
||||
#[get("/stats")]
|
||||
async fn stats_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
||||
Ok(web::Json(get_stats(&db.lock().unwrap())))
|
||||
async fn stats_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||
let stats_db = get_stats(&ctx.lock().unwrap().db);
|
||||
// let stats_db_llm = get_stats(&ctx.lock().unwrap().db_llm);
|
||||
// stats_db.rows_parsed_storage_llm = stats_db_llm.rows_parsed_storage_llm;
|
||||
Ok(web::Json(stats_db))
|
||||
}
|
||||
|
||||
#[get("/admin")]
|
||||
async fn admin_get(db: Data<Mutex<rusqlite::Connection>>) -> Result<impl Responder> {
|
||||
let db = db.lock().unwrap();
|
||||
async fn admin_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||
let ctx_locked = ctx.lock().unwrap();
|
||||
let query_start_time = Instant::now();
|
||||
let search_urls = SearchURL::get_all(&db).unwrap_or_default();
|
||||
let parsed_pages = ParsedPage::get_all(&db).unwrap_or_default();
|
||||
let parsed_storages = ParsedStorage::get_all(&db).unwrap_or_default();
|
||||
let item_appearances = ItemAppearances::get_all(&db).unwrap_or_default();
|
||||
let listings = Listing::get_all(&db).unwrap_or_default();
|
||||
let search_urls = SearchURL::get_all(&ctx_locked.db).unwrap_or_default();
|
||||
let parsed_pages = ParsedPage::get_all(&ctx_locked.db).unwrap_or_default();
|
||||
let parsed_storages = ParsedStorage::get_all(&ctx_locked.db).unwrap_or_default();
|
||||
let item_appearances = ItemAppearances::get_all(&ctx_locked.db).unwrap_or_default();
|
||||
let listings = Listing::get_all(&ctx_locked.db).unwrap_or_default();
|
||||
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
|
||||
|
||||
let html_gen_start_time = Instant::now();
|
||||
@@ -287,10 +310,20 @@ async fn main() -> std::io::Result<()> {
|
||||
"Starting with scraped data dir of \"{}\".",
|
||||
scrapedatadir.to_str().unwrap()
|
||||
);
|
||||
let db_mutex = Data::new(Mutex::new(get_initialized(None)));
|
||||
|
||||
let app_data = Data::new(Mutex::new(AppCtx {
|
||||
download_dir: scrapedatadir.clone(),
|
||||
db: get_initialized(None),
|
||||
db_llm: {
|
||||
let db_path = scrapedatadir.with_file_name("llm.sqlite");
|
||||
let db = rusqlite::Connection::open(db_path).unwrap();
|
||||
ParsedLLMStorageResult::initialize(&db);
|
||||
db
|
||||
},
|
||||
}));
|
||||
|
||||
// Prepare our backend via pulling in what catagories we are preconfigured with.
|
||||
SearchURL::scan(&db_mutex.lock().unwrap(), &scrapedatadir, "url.json");
|
||||
SearchURL::scan(&app_data.lock().unwrap().db, &scrapedatadir, "url.json");
|
||||
|
||||
HttpServer::new(move || {
|
||||
App::new()
|
||||
@@ -306,8 +339,7 @@ async fn main() -> std::io::Result<()> {
|
||||
.service(admin_get)
|
||||
.service(stats_get)
|
||||
// Stuff which is passed into every request.
|
||||
.app_data(db_mutex.clone())
|
||||
.app_data(Data::new(scrapedatadir.clone()))
|
||||
.app_data(app_data.clone())
|
||||
})
|
||||
.bind(("0.0.0.0", 9876))?
|
||||
.run()
|
||||
|
105
src/parser_storage_e1.rs
Normal file
105
src/parser_storage_e1.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
use crate::db::ParsedLLMStorageResult;
|
||||
use crate::db::ParsedStorage;
|
||||
use actix_web::mime::APPLICATION_JSON;
|
||||
use reqwest::header::AUTHORIZATION;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
|
||||
// Given this prompt and a string of "(Lot of 6) Samsung MZ-VLB2560 256GB M.2NVMe Internal SSD
|
||||
// (MZVLB256HBHQ-000H1)", we get 338 input tokens and 36 output tokens. Assuming no caching, then
|
||||
// Gemini 2.5 Flash Lite at $0.10/M input and $0.40/M output, this would cost $0.0000338 Input,
|
||||
// $0.0000144 Output, and $0.0000482 Total. Given 30,000 listings this would be $1.446.
|
||||
const SYSTEM_PROMPT: &str = r#"
|
||||
You will be given a product listing for one or more storage drives. You will return *ONLY* JSON strictly adhering to the same structure and key names as below. This means no backticks or markdown/markup. You will specify how many storage drives are included in the listing as a number (1, 2, 3, etc), the size in gigabytes of each drive as a number (rounding up if needed, so 1, 2, 3, etc), and lastly if the above cannot be provided due the the listing title being incomplete or confusing, a very short reason why.
|
||||
|
||||
Here is an example for a title of "Lot of 2, Crucial P3 Plus 1TB NVMe Internal M.2 SSD (CT1000P3PSSD8) - Barely used!";
|
||||
{
|
||||
"quantity": 2,
|
||||
"gigabytes": 1024
|
||||
"fail_reason": ""
|
||||
}
|
||||
|
||||
And an example for an unclear title of "Pallet of assorted 128GB to 5TB drives";
|
||||
{
|
||||
"quantity": 0,
|
||||
"gigabytes": 0,
|
||||
"fail_reason": "multiple mixed sizes"
|
||||
}
|
||||
"#;
|
||||
|
||||
fn create_request(title: &str) -> serde_json::Value {
|
||||
json!({
|
||||
"model": "gemini-2.5-flash-lite",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": SYSTEM_PROMPT
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": title
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
|
||||
struct LLMParsedResponse {
|
||||
pub quantity: i64,
|
||||
pub gigabytes: i64,
|
||||
pub fail_reason: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct OpenAIResponse {
|
||||
choices: Vec<OpenAIChoice>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct OpenAIChoice {
|
||||
message: OpenAIMessage,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct OpenAIMessage {
|
||||
content: String,
|
||||
}
|
||||
|
||||
/// Parses size and quantity information from an item title.
|
||||
pub async fn parse_size_and_quantity(
|
||||
db: &rusqlite::Connection,
|
||||
item_id: i64,
|
||||
title: &str,
|
||||
) -> Option<ParsedStorage> {
|
||||
let client = reqwest::Client::new();
|
||||
let req = client
|
||||
.post("https://ai.hak8or.com/litellm_api/chat/completions")
|
||||
.header(CONTENT_TYPE, APPLICATION_JSON.to_string())
|
||||
.header(AUTHORIZATION, "Bearer sk-HMGML94x2ag6ggOoDghSGA")
|
||||
.body(create_request(title).to_string());
|
||||
let reply_body = req.send().await.ok()?.text().await.ok()?;
|
||||
let repl_json: OpenAIResponse = serde_json::from_str(&reply_body).ok()?;
|
||||
let reply_parsed_storage_json: LLMParsedResponse =
|
||||
serde_json::from_str(&repl_json.choices[0].message.content).ok()?;
|
||||
|
||||
let plsr = ParsedLLMStorageResult {
|
||||
id: 0,
|
||||
fail_reason: reply_parsed_storage_json.fail_reason.clone(),
|
||||
gigabytes: reply_parsed_storage_json.gigabytes,
|
||||
item_id,
|
||||
quantity: reply_parsed_storage_json.quantity,
|
||||
title: title.to_owned(),
|
||||
};
|
||||
plsr.add_or_update(&db);
|
||||
|
||||
Some(ParsedStorage {
|
||||
id: 0,
|
||||
item: item_id,
|
||||
total_gigabytes: reply_parsed_storage_json.quantity * reply_parsed_storage_json.gigabytes,
|
||||
quantity: reply_parsed_storage_json.quantity,
|
||||
individual_size_gigabytes: reply_parsed_storage_json.gigabytes,
|
||||
needed_description_check: !reply_parsed_storage_json.fail_reason.is_empty(),
|
||||
parse_engine: 1,
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user