diff --git a/ebay_storage/rust/Cargo.lock b/ebay_storage/rust/Cargo.lock index 4482390..40773b2 100644 --- a/ebay_storage/rust/Cargo.lock +++ b/ebay_storage/rust/Cargo.lock @@ -17,19 +17,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "getrandom 0.3.3", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -104,6 +91,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.4.0" @@ -127,15 +120,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.7" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" @@ -222,7 +209,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -255,14 +242,14 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cssparser" -version = "0.31.2" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" dependencies = [ "cssparser-macros", "dtoa-short", "itoa", - "phf 0.11.3", + "phf", "smallvec", ] @@ -273,7 +260,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -284,7 +271,7 @@ checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -295,7 +282,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -328,14 +315,16 @@ dependencies = [ "serde", "serde_json", "tokio", + "tracing", + "tracing-subscriber", "url", ] [[package]] name = "ego-tree" -version = "0.6.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" [[package]] name = "encoding_rs" @@ -464,7 +453,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -546,15 +535,15 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "h2" -version = "0.3.26" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +checksum = "a9421a676d1b147b16b82c9225157dc629087ef8ec4d5e2960f9437a90dac0a5" dependencies = [ + "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "futures-util", "http", "indexmap", "slab", @@ -577,23 +566,21 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "html5ever" -version = "0.26.0" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" dependencies = [ "log", "mac", "markup5ever", - "proc-macro2", - "quote", - "syn 1.0.109", + "match_token", ] [[package]] name = "http" -version = "0.2.12" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", @@ -602,12 +589,24 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.6" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", "pin-project-lite", ] @@ -617,47 +616,82 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - [[package]] name = "hyper" -version = "0.14.32" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ "bytes", "futures-channel", - "futures-core", "futures-util", "h2", "http", "http-body", "httparse", - "httpdate", "itoa", "pin-project-lite", - "socket2", + "smallvec", "tokio", - "tower-service", - "tracing", "want", ] [[package]] -name = "hyper-tls" -version = "0.5.0" +name = "hyper-rustls" +version = "0.27.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +checksum = "03a01595e11bdcec50946522c32dde3fc6914743000a68b93000965f2f02406d" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", + "http-body-util", "hyper", + "hyper-util", "native-tls", "tokio", "tokio-native-tls", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c293b6b3d21eca78250dc7dbebd6b9210ec5530e038cbfe0661b5c47ab06e8" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "hyper", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2", + "system-configuration", + "tokio", + "tower-service", + "tracing", + "windows-registry", ] [[package]] @@ -807,6 +841,16 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +[[package]] +name = "iri-string" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -877,18 +921,29 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "markup5ever" -version = "0.11.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" dependencies = [ "log", - "phf 0.10.1", + "phf", "phf_codegen", "string_cache", "string_cache_codegen", "tendril", ] +[[package]] +name = "match_token" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "memchr" version = "2.7.4" @@ -944,6 +999,16 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -980,7 +1045,7 @@ version = "0.10.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" dependencies = [ - "bitflags 2.9.1", + "bitflags", "cfg-if", "foreign-types", "libc", @@ -997,7 +1062,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -1018,6 +1083,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "parking_lot" version = "0.12.3" @@ -1047,15 +1118,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "phf" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" -dependencies = [ - "phf_shared 0.10.0", -] - [[package]] name = "phf" version = "0.11.3" @@ -1063,27 +1125,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ "phf_macros", - "phf_shared 0.11.3", + "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.10.0" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", -] - -[[package]] -name = "phf_generator" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" -dependencies = [ - "phf_shared 0.10.0", - "rand", + "phf_generator", + "phf_shared", ] [[package]] @@ -1092,7 +1144,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "phf_shared 0.11.3", + "phf_shared", "rand", ] @@ -1102,20 +1154,11 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" dependencies = [ - "phf_generator 0.11.3", - "phf_shared 0.11.3", + "phf_generator", + "phf_shared", "proc-macro2", "quote", - "syn 2.0.101", -] - -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher 0.3.11", + "syn", ] [[package]] @@ -1124,7 +1167,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "siphasher 1.0.1", + "siphasher", ] [[package]] @@ -1154,15 +1197,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - [[package]] name = "precomputed-hash" version = "0.1.1" @@ -1199,18 +1233,6 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", "rand_core", ] @@ -1219,9 +1241,6 @@ name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.16", -] [[package]] name = "redox_syscall" @@ -1229,7 +1248,7 @@ version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" dependencies = [ - "bitflags 2.9.1", + "bitflags", ] [[package]] @@ -1263,9 +1282,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.11.27" +version = "0.12.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +checksum = "e98ff6b0dbbe4d5a37318f433d4fc82babd21631f194d370409ceb2e40b2f0b5" dependencies = [ "base64", "bytes", @@ -1275,8 +1294,11 @@ dependencies = [ "h2", "http", "http-body", + "http-body-util", "hyper", + "hyper-rustls", "hyper-tls", + "hyper-util", "ipnet", "js-sys", "log", @@ -1285,22 +1307,36 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", - "system-configuration", "tokio", "tokio-native-tls", "tokio-util", + "tower", + "tower-http", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", ] [[package]] @@ -1315,7 +1351,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.9.1", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -1323,12 +1359,36 @@ dependencies = [ ] [[package]] -name = "rustls-pemfile" -version = "1.0.4" +name = "rustls" +version = "0.23.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321" dependencies = [ - "base64", + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", ] [[package]] @@ -1360,16 +1420,15 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scraper" -version = "0.18.1" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585480e3719b311b78a573db1c9d9c4c1f8010c2dee4cc59c2efe58ea4dbc3e1" +checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2" dependencies = [ - "ahash", "cssparser", "ego-tree", "getopts", "html5ever", - "once_cell", + "precomputed-hash", "selectors", "tendril", ] @@ -1380,7 +1439,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.1", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -1399,17 +1458,17 @@ dependencies = [ [[package]] name = "selectors" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" dependencies = [ - "bitflags 2.9.1", + "bitflags", "cssparser", "derive_more", "fxhash", "log", "new_debug_unreachable", - "phf 0.10.1", + "phf", "phf_codegen", "precomputed-hash", "servo_arc", @@ -1433,7 +1492,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -1462,13 +1521,22 @@ dependencies = [ [[package]] name = "servo_arc" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" dependencies = [ "stable_deref_trait", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1484,12 +1552,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - [[package]] name = "siphasher" version = "1.0.1" @@ -1535,7 +1597,7 @@ checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" dependencies = [ "new_debug_unreachable", "parking_lot", - "phf_shared 0.11.3", + "phf_shared", "precomputed-hash", "serde", ] @@ -1546,8 +1608,8 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" dependencies = [ - "phf_generator 0.11.3", - "phf_shared 0.11.3", + "phf_generator", + "phf_shared", "proc-macro2", "quote", ] @@ -1559,15 +1621,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] -name = "syn" -version = "1.0.109" +name = "subtle" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -1582,9 +1639,12 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "0.1.2" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] [[package]] name = "synstructure" @@ -1594,25 +1654,25 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -1642,6 +1702,16 @@ dependencies = [ "utf-8", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tinystr" version = "0.8.1" @@ -1678,7 +1748,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -1691,6 +1761,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +dependencies = [ + "rustls", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.15" @@ -1704,6 +1784,45 @@ dependencies = [ "tokio", ] +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fdb0c213ca27a9f57ab69ddb290fd80d970922355b83ae380b395d3986b8a2e" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http", + "http-body", + "iri-string", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.3" @@ -1717,9 +1836,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.33" @@ -1727,6 +1858,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -1747,6 +1904,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.4" @@ -1776,18 +1939,18 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "want" version = "0.3.1" @@ -1834,7 +1997,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.101", + "syn", "wasm-bindgen-shared", ] @@ -1869,7 +2032,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1906,6 +2069,28 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.61.2" @@ -1916,7 +2101,7 @@ dependencies = [ "windows-interface", "windows-link", "windows-result", - "windows-strings", + "windows-strings 0.4.2", ] [[package]] @@ -1927,7 +2112,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -1938,7 +2123,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] [[package]] @@ -1947,6 +2132,17 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +[[package]] +name = "windows-registry" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" +dependencies = [ + "windows-result", + "windows-strings 0.3.1", + "windows-targets 0.53.0", +] + [[package]] name = "windows-result" version = "0.3.4" @@ -1958,20 +2154,20 @@ dependencies = [ [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" dependencies = [ "windows-link", ] [[package]] -name = "windows-sys" -version = "0.48.0" +name = "windows-strings" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-targets 0.48.5", + "windows-link", ] [[package]] @@ -1992,21 +2188,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -2016,7 +2197,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -2024,10 +2205,20 @@ dependencies = [ ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" +name = "windows-targets" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] [[package]] name = "windows_aarch64_gnullvm" @@ -2036,10 +2227,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" +name = "windows_aarch64_gnullvm" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" [[package]] name = "windows_aarch64_msvc" @@ -2048,10 +2239,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "windows_i686_gnu" -version = "0.48.5" +name = "windows_aarch64_msvc" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" [[package]] name = "windows_i686_gnu" @@ -2059,6 +2250,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -2066,10 +2263,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "windows_i686_msvc" -version = "0.48.5" +name = "windows_i686_gnullvm" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" [[package]] name = "windows_i686_msvc" @@ -2078,10 +2275,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" +name = "windows_i686_msvc" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" [[package]] name = "windows_x86_64_gnu" @@ -2090,10 +2287,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" +name = "windows_x86_64_gnu" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" [[package]] name = "windows_x86_64_gnullvm" @@ -2102,10 +2299,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +name = "windows_x86_64_gnullvm" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" [[package]] name = "windows_x86_64_msvc" @@ -2114,14 +2311,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "winreg" -version = "0.50.0" +name = "windows_x86_64_msvc" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "wit-bindgen-rt" @@ -2129,7 +2322,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.1", + "bitflags", ] [[package]] @@ -2158,30 +2351,10 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", "synstructure", ] -[[package]] -name = "zerocopy" -version = "0.8.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "zerofrom" version = "0.1.6" @@ -2199,10 +2372,16 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + [[package]] name = "zerotrie" version = "0.2.2" @@ -2233,5 +2412,5 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn", ] diff --git a/ebay_storage/rust/Cargo.toml b/ebay_storage/rust/Cargo.toml index d7a01a8..6ca9af1 100644 --- a/ebay_storage/rust/Cargo.toml +++ b/ebay_storage/rust/Cargo.toml @@ -1,19 +1,21 @@ [package] name = "ebay_scraper_rust" version = "0.1.0" -edition = "2021" +edition = "2024" [dependencies] -clap = { version = "4.4", features = ["derive"] } -reqwest = { version = "0.11", features = ["json", "stream"] } # Removed "blocking" as we use tokio -scraper = "0.18" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -regex = "1.10" -tokio = { version = "1", features = ["full"] } -url = "2.5" +clap = { version = "4.5.39", features = ["derive"] } +reqwest = { version = "0.12.18", features = ["json", "stream"] } # Removed "blocking" as we use tokio +scraper = "0.23.1" +serde = { version = "1.0.219", features = ["derive"] } +serde_json = "1.0.140" +regex = "1.11.1" +tokio = { version = "1.45.1", features = ["full"] } +url = "2.5.4" # path-slash is not strictly needed if using std::path::PathBuf correctly -bytes = "1.5" -chrono = { version = "0.4", features = ["serde"] } -lazy_static = "1.4.0" -futures = "0.3" # For join_all on async tasks \ No newline at end of file +bytes = "1.10.1" +chrono = { version = "0.4.41", features = ["serde"] } +lazy_static = "1.5.0" +futures = "0.3.31" # For join_all on async tasks +tracing = "0.1.41" +tracing-subscriber = { version = "0.3.19", features = ["fmt"] } diff --git a/ebay_storage/rust/src/cli.rs b/ebay_storage/rust/src/cli.rs new file mode 100644 index 0000000..15686d9 --- /dev/null +++ b/ebay_storage/rust/src/cli.rs @@ -0,0 +1,45 @@ +// src/cli.rs +use clap::Parser; + +#[derive(Parser, Debug)] +#[clap( + name = "ebay-scraper-rust", + version = "0.1.0", + about = "Scrapes eBay search results for SSD/HDD cost per TB." +)] +pub struct Cli { + #[clap(subcommand)] + pub command: Option, + + /// The full eBay search URL to scrape. + pub url: Option, + + /// Save scraped HTML to a file (and download images if fetching from URL). + #[clap(long)] + pub save: Option, + + /// Load HTML from a file (disables network). Image download will not occur with --load. + #[clap(long)] + pub load: Option, + + /// Suppress informational logs, output only final JSON. + #[clap(long)] + pub only_json: bool, +} + +#[derive(Parser, Debug)] +pub enum Commands { + /// Scrapes latest listings. + Latest(LatestArgs), +} + +#[derive(Parser, Debug)] +pub struct LatestArgs { + /// Items per page (60, 120, or 240) + #[clap(long, default_value = "60")] + pub per_page: String, + + /// Minimum cost (e.g., 50.00) + #[clap(long, default_value = "0.00")] + pub minimum_cost: f64, +} diff --git a/ebay_storage/rust/src/html_utils.rs b/ebay_storage/rust/src/html_utils.rs new file mode 100644 index 0000000..023e594 --- /dev/null +++ b/ebay_storage/rust/src/html_utils.rs @@ -0,0 +1,250 @@ +// src/html_utils.rs +use chrono::Utc; +use lazy_static::lazy_static; +use regex::Regex; +use scraper::{Html, Selector}; +use std::error::Error as StdError; // Use the same alias as main +use std::fs::{self, File}; +use std::io::Write; +use std::path::Path; +use tracing::{error, info, warn}; +use url::Url; + +use super::item::{EbayItem, ParsedItemData}; +use super::parser_utils; + +// Define or import AppError to match main.rs +type AppError = Box; + +const PARSER_ENGINE_VERSION: i32 = 1; +const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"; + +lazy_static! { + static ref ITEM_ID_REGEX: Regex = Regex::new(r"/itm/(\d+)").unwrap(); + static ref NEW_LISTING_REGEX: Regex = Regex::new(r"(?i)^\s*NEW LISTING\s*[:\-\s]*").unwrap(); +} + +/// Fetches HTML content from a URL. +pub async fn fetch_html(url: &str) -> Result { + info!(target_url = url, "Navigating to URL"); + let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?; + let response = client.get(url).send().await?; + if !response.status().is_success() { + let err_msg = format!( + "Failed to fetch URL: {} - Status: {}", + url, + response.status() + ); + error!(error_message = %err_msg, "URL fetch failed"); + return Err(err_msg.into()); + } + let html_content = response.text().await?; + info!( + target_url = url, + "Navigation successful. Page content retrieved." + ); + Ok(html_content) +} + +/// Extracts item data from HTML content. +pub fn extract_data_from_html(html_content: &str) -> Result, AppError> { + let document = Html::parse_document(html_content); + let mut items = Vec::new(); + let today = Utc::now(); + + // MODIFIED: Using .unwrap() for Selector::parse() calls as requested + let item_selector = + Selector::parse("li.s-item, li.srp-results__item, div.s-item[role='listitem']").unwrap(); + let title_selector = Selector::parse(".s-item__title, .srp-results__title").unwrap(); + let price_selector = Selector::parse(".s-item__price").unwrap(); + let image_selector = + Selector::parse(".s-item__image-wrapper img.s-item__image-img, .s-item__image img") + .unwrap(); + let link_selector = + Selector::parse("a.s-item__link[href*='/itm/'], .s-item__info > a[href*='/itm/']").unwrap(); + let bid_count_selector = Selector::parse(".s-item__bid-count").unwrap(); + let best_offer_selector = + Selector::parse(".s-item__purchase-options--bo, .s-item__best-offer").unwrap(); + let secondary_info_selector = + Selector::parse(".s-item__subtitle, .s-item__secondary-text, .s-item__detail--secondary") + .unwrap(); + let auction_bin_price_selector = Selector::parse(".s-item__buy-it-now-price").unwrap(); + + for element in document.select(&item_selector) { + let raw_title_text = element + .select(&title_selector) + .next() + .map(|el| el.text().collect::().trim().to_string()); + let price_text = element + .select(&price_selector) + .next() + .map(|el| el.text().collect::().trim().to_string()); + + let item_id = element + .select(&link_selector) + .next() + .and_then(|link_el| link_el.value().attr("href")) + .and_then(|href| ITEM_ID_REGEX.captures(href)) + .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())); + + if raw_title_text.is_none() || price_text.is_none() || item_id.is_none() { + warn!("Skipping item due to missing title, price, or item ID."); + continue; + } + let raw_title = raw_title_text.unwrap(); + let price_text = price_text.unwrap(); + let item_id = item_id.unwrap(); + + let cleaned_title = NEW_LISTING_REGEX.replace(&raw_title, "").trim().to_string(); + + let primary_display_price = parser_utils::parse_price(&price_text); + + let mut current_bid_price: Option = None; + let mut final_buy_it_now_price: Option = None; + let mut has_best_offer = false; + let mut item_is_auction = false; + + if let Some(bid_el) = element.select(&bid_count_selector).next() { + if bid_el + .text() + .collect::() + .to_lowercase() + .contains("bid") + { + item_is_auction = true; + } + } + + if element.select(&best_offer_selector).next().is_some() { + has_best_offer = true; + } else { + for el in element.select(&secondary_info_selector) { + if el + .text() + .collect::() + .to_lowercase() + .contains("or best offer") + { + has_best_offer = true; + break; + } + } + } + + if item_is_auction { + current_bid_price = primary_display_price; + if let Some(bin_el) = element.select(&auction_bin_price_selector).next() { + final_buy_it_now_price = + parser_utils::parse_price(&bin_el.text().collect::()); + } + } else { + final_buy_it_now_price = primary_display_price; + } + + let image_url_val = element + .select(&image_selector) + .next() + .and_then(|img_el| { + img_el + .value() + .attr("data-src") + .or(img_el.value().attr("src")) + }) + .map(|s| s.to_string()); + + let parsed_size_info = parser_utils::parse_size_and_quantity(&cleaned_title); + + let cost_per_tb = if let Some(price) = primary_display_price { + if parsed_size_info.total_tb > 0.0 { + Some(((price / parsed_size_info.total_tb) * 100.0).round() / 100.0) + } else { + None + } + } else { + None + }; + + let parsed_data = ParsedItemData { + item_count: parsed_size_info.quantity, + size_per_item_tb: if parsed_size_info.individual_size_tb > 0.0 { + Some(parsed_size_info.individual_size_tb) + } else { + None + }, + total_tb: if parsed_size_info.total_tb > 0.0 { + Some(parsed_size_info.total_tb) + } else { + None + }, + cost_per_tb, + needed_description_check: parsed_size_info.needed_description_check, + parser_engine: PARSER_ENGINE_VERSION, + }; + + items.push(EbayItem { + title: cleaned_title, + item_id, + date_found: today, + current_bid_price, + buy_it_now_price: final_buy_it_now_price, + has_best_offer, + image_url: image_url_val, + parsed: parsed_data, + }); + } + Ok(items) +} + +/// Downloads an image from a URL and saves it, preserving path structure. +pub async fn download_image( + image_url_str: &str, + base_save_directory: &Path, +) -> Result<(), AppError> { + if image_url_str.is_empty() { + return Ok(()); + } + + let parsed_url = Url::parse(image_url_str).map_err(|e| Box::new(e) as AppError)?; + + let image_path_from_url = parsed_url.path().trim_start_matches('/'); + if image_path_from_url.is_empty() { + return Err(Box::from("Image URL has no path component") as AppError); + } + + let full_local_image_path = base_save_directory.join(image_path_from_url); + + if let Some(parent_dir) = full_local_image_path.parent() { + fs::create_dir_all(parent_dir).map_err(|e| Box::new(e) as AppError)?; + info!(path = %parent_dir.display(), "Ensured image directory exists"); + } + + let client = reqwest::Client::builder() + .user_agent(USER_AGENT) + .build() + .map_err(|e| Box::new(e) as AppError)?; + let response = client + .get(image_url_str) + .send() + .await + .map_err(|e| Box::new(e) as AppError)?; + + if !response.status().is_success() { + let err_msg = format!( + "Failed to download image {}. Status: {}", + image_url_str, + response.status() + ); + return Err(Box::from(err_msg) as AppError); + } + + let mut file = File::create(&full_local_image_path).map_err(|e| Box::new(e) as AppError)?; + let content = response + .bytes() + .await + .map_err(|e| Box::new(e) as AppError)?; + file.write_all(&content) + .map_err(|e| Box::new(e) as AppError)?; + + info!(path = %full_local_image_path.display(), "Downloaded image"); + Ok(()) +} diff --git a/ebay_storage/rust/src/item.rs b/ebay_storage/rust/src/item.rs new file mode 100644 index 0000000..4645ec2 --- /dev/null +++ b/ebay_storage/rust/src/item.rs @@ -0,0 +1,45 @@ +// src/item.rs +use chrono::{DateTime, Utc}; +use serde::Serialize; + +#[derive(Serialize, Debug)] +pub struct EbayItem { + pub title: String, + #[serde(rename = "itemId")] + pub item_id: String, + #[serde(rename = "dateFound")] + pub date_found: DateTime, + #[serde(rename = "currentBidPrice")] + pub current_bid_price: Option, + #[serde(rename = "buyItNowPrice", skip_serializing_if = "Option::is_none")] + pub buy_it_now_price: Option, + #[serde(rename = "hasBestOffer")] + pub has_best_offer: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub image_url: Option, + pub parsed: ParsedItemData, +} + +#[derive(Serialize, Debug)] +pub struct ParsedItemData { + #[serde(rename = "itemCount")] + pub item_count: i32, + #[serde(rename = "sizePerItemTB")] + pub size_per_item_tb: Option, + #[serde(rename = "totalTB")] + pub total_tb: Option, + #[serde(rename = "costPerTB")] + pub cost_per_tb: Option, + #[serde(rename = "needed_description_check")] + pub needed_description_check: bool, + #[serde(rename = "parser_engine")] + pub parser_engine: i32, +} + +#[derive(Debug)] +pub struct SizeQuantityInfo { + pub total_tb: f64, + pub quantity: i32, + pub individual_size_tb: f64, + pub needed_description_check: bool, +} diff --git a/ebay_storage/rust/src/main.rs b/ebay_storage/rust/src/main.rs index c971d26..db4e1a2 100644 --- a/ebay_storage/rust/src/main.rs +++ b/ebay_storage/rust/src/main.rs @@ -1,426 +1,66 @@ -// main.rs +// src/main.rs +mod cli; +mod item; +mod parser_utils; +mod html_utils; -// Import necessary crates use clap::Parser; -use regex::Regex; -use scraper::{Html, Selector}; -use serde::Serialize; -use std::fs::{self, File}; -use std::io::Write; -use std::path::{Path, PathBuf}; -use std::error::Error; -use chrono::{DateTime, Utc}; -use lazy_static::lazy_static; -use url::Url; +use std::fs; +use std::io::Write; +use std::path::PathBuf; +use std::error::Error as StdError; +use tracing::{info, error, warn, Level}; +use tracing_subscriber; -// Define constants -const PARSER_ENGINE_VERSION: i32 = 1; -const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"; +use cli::{Cli, Commands}; +use item::EbayItem; -// --- Lazy static Regex definitions --- -lazy_static! { - // Regex for parsing quantity from title (e.g., "LOT OF 10", "5-PACK") - static ref EXPLICIT_QTY_PATTERNS: Vec = vec![ - Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?").unwrap(), - Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\*\s*(\d+)").unwrap(), - Regex::new(r"\b(?:PACK\s+OF|PACK|BULK)\s*\(?\s*(\d+)\s*\)?").unwrap(), - Regex::new(r"\b(\d+)\s*-\s*PACK\b").unwrap(), - Regex::new(r"\b(\d+)\s*COUNT\b").unwrap(), - ]; - // Regex for parsing size from title (e.g., "500GB", "2TB") - static ref SIZE_REGEX: Regex = Regex::new(r"(\d+(?:\.\d+)?)\s*(TB|GB)\b").unwrap(); - // Regex for titles indicating a range of sizes or mixed items - static ref SIZE_RANGE_REGEX: Regex = Regex::new(r"\d+(?:\.\d+)?\s*(?:GB|TB)\s*(?:-|&|OR|TO)\s*\d+(?:\.\d+)?\s*(?:GB|TB)").unwrap(); - // Regex for extracting item ID from URL - static ref ITEM_ID_REGEX: Regex = Regex::new(r"/itm/(\d+)").unwrap(); - // Regex for parsing price, potentially a range - static ref PRICE_REGEX: Regex = Regex::new(r"\$?([\d,]+\.?\d*)").unwrap(); - // Regex for "NEW LISTING" prefix - case-insensitive to better match JS /i flag - static ref NEW_LISTING_REGEX: Regex = Regex::new(r"(?i)^\s*NEW LISTING\s*[:\-\s]*").unwrap(); -} +// Define a more specific error type for the application +// This type is now implicitly used by html_utils.rs as well due to function signatures. +type AppError = Box; -// --- Command Line Argument Parsing (using clap) --- -#[derive(Parser, Debug)] -#[clap(name = "ebay-scraper-rust", version = "0.1.0", about = "Scrapes eBay search results for SSD/HDD cost per TB.")] -struct Cli { - #[clap(subcommand)] - command: Option, - - /// The full eBay search URL to scrape. - url: Option, - - /// Save scraped HTML to a file (and download images if fetching from URL). - #[clap(long)] - save: Option, - - /// Load HTML from a file (disables network). Image download will not occur with --load. - #[clap(long)] - load: Option, - - /// Suppress informational logs, output only final JSON. - #[clap(long)] - only_json: bool, -} - -#[derive(Parser, Debug)] -enum Commands { - /// Scrapes latest listings. - Latest(LatestArgs), -} - -#[derive(Parser, Debug)] -struct LatestArgs { - /// Items per page (60, 120, or 240) - #[clap(long, default_value = "60")] - per_page: String, // Keep as string for validation - - /// Minimum cost (e.g., 50.00) - #[clap(long, default_value = "0.00")] - minimum_cost: f64, -} - -// --- Data Structures for Scraped Items (using serde) --- -#[derive(Serialize, Debug)] -struct EbayItem { - title: String, - #[serde(rename = "itemId")] - item_id: String, - #[serde(rename = "dateFound")] - date_found: DateTime, - #[serde(rename = "currentBidPrice")] - current_bid_price: Option, - #[serde(rename = "buyItNowPrice", skip_serializing_if = "Option::is_none")] // Keep skip for this one if JS does it - buy_it_now_price: Option, - #[serde(rename = "hasBestOffer")] - has_best_offer: bool, - #[serde(skip_serializing_if = "Option::is_none")] // Keep skip for this one if JS does it - image_url: Option, - parsed: ParsedItemData, -} - -#[derive(Serialize, Debug)] -struct ParsedItemData { - #[serde(rename = "itemCount")] - item_count: i32, - // MODIFIED: Removed skip_serializing_if to always include the field, even if null - #[serde(rename = "sizePerItemTB")] - size_per_item_tb: Option, - #[serde(rename = "totalTB")] - total_tb: Option, - #[serde(rename = "costPerTB")] - cost_per_tb: Option, - #[serde(rename = "needed_description_check")] - needed_description_check: bool, - #[serde(rename = "parser_engine")] - parser_engine: i32, -} - -#[derive(Debug)] -struct SizeQuantityInfo { - total_tb: f64, - quantity: i32, - individual_size_tb: f64, - needed_description_check: bool, -} - -// --- Logging --- -fn log_message(message: &str, quiet_mode: bool) { - if !quiet_mode { - eprintln!("{}", message); - } -} - -fn log_error(message: &str, quiet_mode: bool) { - if !quiet_mode { - eprintln!("ERROR: {}", message); - } -} - - -// --- Parsing Logic --- -mod parser { - use super::*; - - /// Parses size and quantity information from an item title. - pub fn parse_size_and_quantity(title: &str) -> SizeQuantityInfo { - let upper_title = title.to_uppercase(); - let mut total_tb = 0.0; - let mut quantity = 1; - let mut needed_description_check = false; - let mut individual_size_tb = 0.0; - - for pattern in EXPLICIT_QTY_PATTERNS.iter() { - if let Some(caps) = pattern.captures(&upper_title) { - if let Some(qty_match) = caps.get(1) { - if let Ok(parsed_qty) = qty_match.as_str().parse::() { - if parsed_qty > 0 && parsed_qty < 500 { - quantity = parsed_qty; - break; - } - } - } - } - } - - let mut size_matches: Vec<(f64, String)> = Vec::new(); - for caps in SIZE_REGEX.captures_iter(&upper_title) { - if let (Some(val_str), Some(unit_str)) = (caps.get(1), caps.get(2)) { - if let Ok(val) = val_str.as_str().parse::() { - size_matches.push((val, unit_str.as_str().to_string())); - } - } - } - - if !size_matches.is_empty() { - let mut unique_sizes_tb: Vec = size_matches.iter() - .map(|(val, unit)| if unit == "GB" { *val / 1000.0 } else { *val }) - .collect(); - unique_sizes_tb.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); - unique_sizes_tb.dedup(); - - if !unique_sizes_tb.is_empty() { - individual_size_tb = unique_sizes_tb[0]; - if unique_sizes_tb.len() > 1 { - needed_description_check = true; - } - } - } - - if SIZE_RANGE_REGEX.is_match(&upper_title) { - needed_description_check = true; - } - if quantity > 1 && upper_title.contains("MIXED") { - needed_description_check = true; - } - if upper_title.contains("CHECK THE DESCRIPTION") || upper_title.contains("CHECK DESCRIPTION") || upper_title.contains("SEE DESCRIPTION") { - if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 { - needed_description_check = true; - } - } - - if individual_size_tb > 0.0 { - total_tb = individual_size_tb * quantity as f64; - } - - if quantity > 1 && total_tb == 0.0 && !size_matches.is_empty() { - needed_description_check = true; - } - - if quantity == 1 && size_matches.len() == 1 && !needed_description_check { - // This condition is implicitly handled - } - - SizeQuantityInfo { - total_tb: (total_tb * 10000.0).round() / 10000.0, - quantity, - individual_size_tb: (individual_size_tb * 10000.0).round() / 10000.0, - needed_description_check, - } - } - - /// Parses price from a string, taking the first price if it's a range. - pub fn parse_price(price_text: &str) -> Option { - let lower_price_text = price_text.to_lowercase(); - if lower_price_text.contains(" to ") { - if let Some(first_part) = lower_price_text.split(" to ").next() { - if let Some(caps) = PRICE_REGEX.captures(first_part) { - if let Some(price_match) = caps.get(1) { - return price_match.as_str().replace(',', "").parse().ok(); - } - } - } - return None; - } - - if let Some(caps) = PRICE_REGEX.captures(price_text) { - if let Some(price_match) = caps.get(1) { - return price_match.as_str().replace(',', "").parse().ok(); - } - } - None - } -} - -// --- HTML Scraping Logic --- -mod html_scraper { - use super::*; - - /// Extracts item data from HTML content. - pub fn extract_data_from_html(html_content: &str, quiet_mode: bool) -> Result, Box> { - let document = Html::parse_document(html_content); - let mut items = Vec::new(); - let today = Utc::now(); - - let item_selector = Selector::parse("li.s-item, li.srp-results__item, div.s-item[role='listitem']").unwrap(); - let title_selector = Selector::parse(".s-item__title, .srp-results__title").unwrap(); - let price_selector = Selector::parse(".s-item__price").unwrap(); - let image_selector = Selector::parse(".s-item__image-wrapper img.s-item__image-img, .s-item__image img").unwrap(); - let link_selector = Selector::parse("a.s-item__link[href*='/itm/'], .s-item__info > a[href*='/itm/']").unwrap(); - let bid_count_selector = Selector::parse(".s-item__bid-count").unwrap(); - let best_offer_selector = Selector::parse(".s-item__purchase-options--bo, .s-item__best-offer").unwrap(); - let secondary_info_selector = Selector::parse(".s-item__subtitle, .s-item__secondary-text, .s-item__detail--secondary").unwrap(); - let auction_bin_price_selector = Selector::parse(".s-item__buy-it-now-price").unwrap(); - - for element in document.select(&item_selector) { - let raw_title_text = element.select(&title_selector).next().map(|el| el.text().collect::().trim().to_string()); - let price_text = element.select(&price_selector).next().map(|el| el.text().collect::().trim().to_string()); - - let item_id = element.select(&link_selector).next() - .and_then(|link_el| link_el.value().attr("href")) - .and_then(|href| ITEM_ID_REGEX.captures(href)) - .and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())); - - if raw_title_text.is_none() || price_text.is_none() || item_id.is_none() { - log_message("Skipping item due to missing title, price, or item ID.", quiet_mode); - continue; - } - let raw_title = raw_title_text.unwrap(); - let price_text = price_text.unwrap(); - let item_id = item_id.unwrap(); - - let cleaned_title = NEW_LISTING_REGEX.replace(&raw_title, "").trim().to_string(); - - let primary_display_price = parser::parse_price(&price_text); - - let mut current_bid_price: Option = None; - let mut final_buy_it_now_price: Option = None; - let mut has_best_offer = false; - let mut item_is_auction = false; - - if let Some(bid_el) = element.select(&bid_count_selector).next() { - if bid_el.text().collect::().to_lowercase().contains("bid") { - item_is_auction = true; - } - } - - if element.select(&best_offer_selector).next().is_some() { - has_best_offer = true; - } else { - for el in element.select(&secondary_info_selector) { - if el.text().collect::().to_lowercase().contains("or best offer") { - has_best_offer = true; - break; - } - } - } - - if item_is_auction { - current_bid_price = primary_display_price; - if let Some(bin_el) = element.select(&auction_bin_price_selector).next() { - final_buy_it_now_price = parser::parse_price(&bin_el.text().collect::()); - } - } else { - final_buy_it_now_price = primary_display_price; - } - - let image_url_val = element.select(&image_selector).next() - .and_then(|img_el| { - img_el.value().attr("data-src").or(img_el.value().attr("src")) - }) - .map(|s| s.to_string()); - - let parsed_size_info = parser::parse_size_and_quantity(&cleaned_title); - - let cost_per_tb = if let Some(price) = primary_display_price { - if parsed_size_info.total_tb > 0.0 { - Some(((price / parsed_size_info.total_tb) * 100.0).round() / 100.0) - } else { None } - } else { None }; - - let parsed_data = ParsedItemData { - item_count: parsed_size_info.quantity, - size_per_item_tb: if parsed_size_info.individual_size_tb > 0.0 { Some(parsed_size_info.individual_size_tb) } else { None }, - total_tb: if parsed_size_info.total_tb > 0.0 { Some(parsed_size_info.total_tb) } else { None }, - cost_per_tb, // This will be None if conditions aren't met, and serialized as null - needed_description_check: parsed_size_info.needed_description_check, - parser_engine: PARSER_ENGINE_VERSION, - }; - - items.push(EbayItem { - title: cleaned_title, - item_id, - date_found: today, - current_bid_price, - buy_it_now_price: final_buy_it_now_price, - has_best_offer, - image_url: image_url_val, - parsed: parsed_data, - }); - } - Ok(items) - } - - /// Downloads an image from a URL and saves it, preserving path structure. - pub async fn download_image(image_url_str: &str, base_save_directory: &Path, quiet_mode: bool) -> Result<(), Box> { - if image_url_str.is_empty() { - return Ok(()); - } - - let parsed_url = Url::parse(image_url_str)?; - - let image_path_from_url = parsed_url.path().trim_start_matches('/'); - if image_path_from_url.is_empty() { - return Err("Image URL has no path component".into()); - } - - let full_local_image_path = base_save_directory.join(image_path_from_url); - - if let Some(parent_dir) = full_local_image_path.parent() { - fs::create_dir_all(parent_dir)?; - log_message(&format!("Ensured image directory exists: {}", parent_dir.display()), quiet_mode); - } - - let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?; - let response = client.get(image_url_str).send().await?; - - if !response.status().is_success() { - return Err(format!("Failed to download image {}. Status: {}", image_url_str, response.status()).into()); - } - - let mut file = File::create(&full_local_image_path)?; - let content = response.bytes().await?; - file.write_all(&content)?; - - log_message(&format!("Downloaded image: {}", full_local_image_path.display()), quiet_mode); - Ok(()) - } -} - - -// --- Main Application Logic --- #[tokio::main] -async fn main() -> Result<(), Box> { - let cli = Cli::parse(); - let quiet_mode = cli.only_json; +async fn main() -> Result<(), AppError> { + let cli_args = Cli::parse(); - log_message("Starting scraping process...", quiet_mode); + let subscriber_builder = tracing_subscriber::fmt().with_writer(std::io::stderr); + if cli_args.only_json { + subscriber_builder + .with_max_level(Level::ERROR) + .try_init()?; + } else { + subscriber_builder + .with_max_level(Level::INFO) + .try_init()?; + } + + info!("Starting scraping process..."); let html_content_to_parse: String; let mut should_download_images = false; let mut image_base_save_dir: Option = None; - - if let Some(html_file) = &cli.load { - log_message(&format!("Loading HTML from {}...", html_file), quiet_mode); - html_content_to_parse = fs::read_to_string(html_file)?; - log_message("HTML loaded. Network requests for page content disabled.", quiet_mode); + if let Some(html_file) = &cli_args.load { + info!(file_path = %html_file, "Loading HTML from file"); + html_content_to_parse = fs::read_to_string(html_file)?; // std::io::Error converts to AppError via ? + info!("HTML loaded. Network requests for page content disabled."); } else { - let url_to_fetch = match (&cli.command, &cli.url) { + let url_to_fetch = match (&cli_args.command, &cli_args.url) { (Some(Commands::Latest(latest_args)), _) => { let valid_per_page = ["60", "120", "240"]; if !valid_per_page.contains(&latest_args.per_page.as_str()) { let err_msg = format!("--per_page must be one of {}, got {}", valid_per_page.join(", "), latest_args.per_page); - log_error(&err_msg, quiet_mode); - return Err(err_msg.into()); + error!(error_message = %err_msg, "Invalid per_page argument"); + return Err(err_msg.into()); // String converts to AppError } if latest_args.minimum_cost < 0.0 { - let err_msg = "--minimum_cost must be a non-negative number."; - log_error(err_msg, quiet_mode); - return Err(err_msg.into()); + let err_msg = "--minimum_cost must be a non-negative number."; + error!(error_message = %err_msg, "Invalid minimum_cost argument"); + return Err(err_msg.into()); // String converts to AppError } let base_url = "https://www.ebay.com/sch/i.html?_nkw=&_sacat=175669&_from=R40&_fsrp=1&LH_PrefLoc=3&imm=1&_sop=10"; let url = format!("{}&_ipg={}&_udlo={:.2}", base_url, latest_args.per_page, latest_args.minimum_cost); - log_message(&format!("Constructed URL for 'latest': {}", url), quiet_mode); + info!(constructed_url = %url, "Constructed URL for 'latest' command"); url } (None, Some(url_arg)) => { @@ -428,28 +68,19 @@ async fn main() -> Result<(), Box> { } (None, None) => { let err_msg = "No URL provided and no command specified. Use --help for usage."; - log_error(err_msg, true); - return Err(err_msg.into()); + eprintln!("ERROR: {}", err_msg); + return Err(err_msg.into()); // String converts to AppError } }; - log_message(&format!("Navigating to {}...", url_to_fetch), quiet_mode); - let client = reqwest::Client::builder().user_agent(USER_AGENT).build()?; - let response = client.get(&url_to_fetch).send().await?; - if !response.status().is_success() { - let err_msg = format!("Failed to fetch URL: {} - Status: {}", url_to_fetch, response.status()); - log_error(&err_msg, quiet_mode); - return Err(err_msg.into()); - } - html_content_to_parse = response.text().await?; - log_message("Navigation successful. Page content retrieved.", quiet_mode); + html_content_to_parse = html_utils::fetch_html(&url_to_fetch).await?; // This now returns Result + + if let Some(save_path_str) = &cli_args.save { + info!(file_path = %save_path_str, "Saving HTML to file"); + let mut file = fs::File::create(save_path_str)?; // std::io::Error converts + file.write_all(html_content_to_parse.as_bytes())?; // std::io::Error converts + info!("HTML saved."); - if let Some(save_path_str) = &cli.save { - log_message(&format!("Saving HTML to {}...", save_path_str), quiet_mode); - let mut file = File::create(save_path_str)?; - file.write_all(html_content_to_parse.as_bytes())?; - log_message("HTML saved.", quiet_mode); - should_download_images = true; let save_file_path = PathBuf::from(save_path_str); let base_name = save_file_path.file_stem().unwrap_or_default().to_string_lossy().to_string(); @@ -461,47 +92,46 @@ async fn main() -> Result<(), Box> { } } - log_message("Extracting data...", quiet_mode); - let extracted_results = html_scraper::extract_data_from_html(&html_content_to_parse, quiet_mode)?; - log_message(&format!("Data extraction complete. Found {} items.", extracted_results.len()), quiet_mode); + info!("Extracting data..."); + let extracted_results: Vec = html_utils::extract_data_from_html(&html_content_to_parse)?; // This now returns Result, AppError> + info!(item_count = extracted_results.len(), "Data extraction complete."); if should_download_images && !extracted_results.is_empty() { if let Some(img_base_dir) = image_base_save_dir { - log_message(&format!("Downloading images into subdirectories of {}...", img_base_dir.display()), quiet_mode); + info!(directory = %img_base_dir.display(), "Downloading images"); let mut download_futures = Vec::new(); for item in &extracted_results { if let Some(img_url) = &item.image_url { let img_base_dir_clone = img_base_dir.clone(); - let img_url_clone = img_url.clone(); + let img_url_clone = img_url.clone(); let item_id_clone = item.item_id.clone(); download_futures.push(async move { - if let Err(e) = html_scraper::download_image(&img_url_clone, &img_base_dir_clone, quiet_mode).await { - log_error(&format!("Skipping image download for item ID {} (URL: {}) due to error: {}", item_id_clone, img_url_clone, e), quiet_mode); + if let Err(e) = html_utils::download_image(&img_url_clone, &img_base_dir_clone).await { // This now returns Result<(), AppError> + warn!(item_id = %item_id_clone, image_url = %img_url_clone, error = %e, "Skipping image download due to error"); } }); } } futures::future::join_all(download_futures).await; - log_message("Image download process finished.", quiet_mode); + info!("Image download process finished."); } } - if quiet_mode { - println!("{}", serde_json::to_string(&extracted_results)?); + if cli_args.only_json { + println!("{}", serde_json::to_string(&extracted_results)?); // serde_json::Error converts } else { - println!("{}", serde_json::to_string_pretty(&extracted_results)?); + println!("{}", serde_json::to_string_pretty(&extracted_results)?); // serde_json::Error converts } Ok(()) } -// --- Unit tests for parser functions (optional, but good practice) --- #[cfg(test)] mod tests { - use super::parser::*; - use super::SizeQuantityInfo; + use super::parser_utils::*; + use super::item::SizeQuantityInfo; fn assert_sq_info_eq(actual: SizeQuantityInfo, expected_total_tb: f64, expected_quantity: i32, expected_ind_size_tb: f64, expected_check: bool) { assert!((actual.total_tb - expected_total_tb).abs() < 0.0001, "TotalTB mismatch. Expected: {}, Got: {}", expected_total_tb, actual.total_tb); @@ -527,7 +157,7 @@ mod tests { ]; for (title, total_tb, quantity, ind_size_tb, check) in test_cases { - println!("Testing title: {}", title); + tracing::debug!(testing_title = %title, "Running test_parse_size_and_quantity"); let result = parse_size_and_quantity(title); assert_sq_info_eq(result, total_tb, quantity, ind_size_tb, check); } @@ -544,4 +174,3 @@ mod tests { assert_eq!(parse_price("25.50"), Some(25.50)); } } - diff --git a/ebay_storage/rust/src/parser_utils.rs b/ebay_storage/rust/src/parser_utils.rs new file mode 100644 index 0000000..25b0757 --- /dev/null +++ b/ebay_storage/rust/src/parser_utils.rs @@ -0,0 +1,122 @@ +// src/parser_utils.rs +use super::item::SizeQuantityInfo; +use lazy_static::lazy_static; +use regex::Regex; // Assuming item.rs is in the same directory (src) + +lazy_static! { + static ref EXPLICIT_QTY_PATTERNS: Vec = vec![ + Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\(?\s*(\d+)\s*\)?").unwrap(), + Regex::new(r"\b(?:LOT\s+OF|LOT)\s*\*\s*(\d+)").unwrap(), + Regex::new(r"\b(?:PACK\s+OF|PACK|BULK)\s*\(?\s*(\d+)\s*\)?").unwrap(), + Regex::new(r"\b(\d+)\s*-\s*PACK\b").unwrap(), + Regex::new(r"\b(\d+)\s*COUNT\b").unwrap(), + ]; + static ref SIZE_REGEX: Regex = Regex::new(r"(\d+(?:\.\d+)?)\s*(TB|GB)\b").unwrap(); + static ref SIZE_RANGE_REGEX: Regex = + Regex::new(r"\d+(?:\.\d+)?\s*(?:GB|TB)\s*(?:-|&|OR|TO)\s*\d+(?:\.\d+)?\s*(?:GB|TB)") + .unwrap(); + static ref PRICE_REGEX: Regex = Regex::new(r"\$?([\d,]+\.?\d*)").unwrap(); +} + +/// Parses size and quantity information from an item title. +pub fn parse_size_and_quantity(title: &str) -> SizeQuantityInfo { + let upper_title = title.to_uppercase(); + let mut total_tb = 0.0; + let mut quantity = 1; + let mut needed_description_check = false; + let mut individual_size_tb = 0.0; + + for pattern in EXPLICIT_QTY_PATTERNS.iter() { + if let Some(caps) = pattern.captures(&upper_title) { + if let Some(qty_match) = caps.get(1) { + if let Ok(parsed_qty) = qty_match.as_str().parse::() { + if parsed_qty > 0 && parsed_qty < 500 { + quantity = parsed_qty; + break; + } + } + } + } + } + + let mut size_matches: Vec<(f64, String)> = Vec::new(); + for caps in SIZE_REGEX.captures_iter(&upper_title) { + if let (Some(val_str), Some(unit_str)) = (caps.get(1), caps.get(2)) { + if let Ok(val) = val_str.as_str().parse::() { + size_matches.push((val, unit_str.as_str().to_string())); + } + } + } + + if !size_matches.is_empty() { + let mut unique_sizes_tb: Vec = size_matches + .iter() + .map(|(val, unit)| if unit == "GB" { *val / 1000.0 } else { *val }) + .collect(); + unique_sizes_tb.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + unique_sizes_tb.dedup(); + + if !unique_sizes_tb.is_empty() { + individual_size_tb = unique_sizes_tb[0]; + if unique_sizes_tb.len() > 1 { + needed_description_check = true; + } + } + } + + if SIZE_RANGE_REGEX.is_match(&upper_title) { + needed_description_check = true; + } + if quantity > 1 && upper_title.contains("MIXED") { + needed_description_check = true; + } + if upper_title.contains("CHECK THE DESCRIPTION") + || upper_title.contains("CHECK DESCRIPTION") + || upper_title.contains("SEE DESCRIPTION") + { + if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 { + needed_description_check = true; + } + } + + if individual_size_tb > 0.0 { + total_tb = individual_size_tb * quantity as f64; + } + + if quantity > 1 && total_tb == 0.0 && !size_matches.is_empty() { + needed_description_check = true; + } + + if quantity == 1 && size_matches.len() == 1 && !needed_description_check { + // This condition is implicitly handled + } + + SizeQuantityInfo { + total_tb: (total_tb * 10000.0).round() / 10000.0, + quantity, + individual_size_tb: (individual_size_tb * 10000.0).round() / 10000.0, + needed_description_check, + } +} + +/// Parses price from a string, taking the first price if it's a range. +pub fn parse_price(price_text: &str) -> Option { + let lower_price_text = price_text.to_lowercase(); + if lower_price_text.contains(" to ") { + if let Some(first_part) = lower_price_text.split(" to ").next() { + if let Some(caps) = PRICE_REGEX.captures(first_part) { + if let Some(price_match) = caps.get(1) { + return price_match.as_str().replace(',', "").parse().ok(); + } + } + } + return None; + } + + if let Some(caps) = PRICE_REGEX.captures(price_text) { + if let Some(price_match) = caps.get(1) { + return price_match.as_str().replace(',', "").parse().ok(); + } + } + None +}