Moaaarrr
All checks were successful
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 4m26s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m42s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 5m15s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 11m40s
All checks were successful
Cargo Build & Test / Rust project - latest (1.86) (push) Successful in 4m26s
Cargo Build & Test / Rust project - latest (1.87) (push) Successful in 4m42s
Cargo Build & Test / Rust project - latest (1.85.1) (push) Successful in 5m15s
Cargo Build & Test / Rust project - latest (1.88) (push) Successful in 11m40s
This commit is contained in:
98
Cargo.lock
generated
98
Cargo.lock
generated
@@ -695,7 +695,9 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"dirs",
|
"dirs",
|
||||||
|
"futures",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
"num_enum",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
@@ -848,6 +850,21 @@ dependencies = [
|
|||||||
"new_debug_unreachable",
|
"new_debug_unreachable",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
|
||||||
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-executor",
|
||||||
|
"futures-io",
|
||||||
|
"futures-sink",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
@@ -864,12 +881,34 @@ version = "0.3.31"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-executor"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-macro"
|
||||||
|
version = "0.3.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
@@ -888,8 +927,10 @@ version = "0.3.31"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-io",
|
"futures-io",
|
||||||
|
"futures-macro",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
"memchr",
|
"memchr",
|
||||||
@@ -1566,6 +1607,28 @@ dependencies = [
|
|||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_enum"
|
||||||
|
version = "0.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
|
||||||
|
dependencies = [
|
||||||
|
"num_enum_derive",
|
||||||
|
"rustversion",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_enum_derive"
|
||||||
|
version = "0.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro-crate",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "object"
|
name = "object"
|
||||||
version = "0.36.7"
|
version = "0.36.7"
|
||||||
@@ -1772,6 +1835,15 @@ version = "0.1.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro-crate"
|
||||||
|
version = "3.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
|
||||||
|
dependencies = [
|
||||||
|
"toml_edit",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.95"
|
version = "1.0.95"
|
||||||
@@ -2534,6 +2606,23 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_datetime"
|
||||||
|
version = "0.6.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_edit"
|
||||||
|
version = "0.22.27"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
|
||||||
|
dependencies = [
|
||||||
|
"indexmap",
|
||||||
|
"toml_datetime",
|
||||||
|
"winnow",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tower"
|
name = "tower"
|
||||||
version = "0.5.2"
|
version = "0.5.2"
|
||||||
@@ -3009,6 +3098,15 @@ version = "0.52.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winnow"
|
||||||
|
version = "0.7.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wit-bindgen-rt"
|
name = "wit-bindgen-rt"
|
||||||
version = "0.39.0"
|
version = "0.39.0"
|
||||||
|
@@ -8,7 +8,9 @@ actix-web = "4.11.0"
|
|||||||
chrono = { version = "0.4.41", features = ["serde"] }
|
chrono = { version = "0.4.41", features = ["serde"] }
|
||||||
clap = { version = "4.5.40", features = ["derive"] }
|
clap = { version = "4.5.40", features = ["derive"] }
|
||||||
dirs = "6.0.0"
|
dirs = "6.0.0"
|
||||||
|
futures = "0.3.31"
|
||||||
lazy_static = "1.5.0"
|
lazy_static = "1.5.0"
|
||||||
|
num_enum = "0.7.4"
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
reqwest = { version = "0.12.23", features = ["blocking"] }
|
reqwest = { version = "0.12.23", features = ["blocking"] }
|
||||||
|
135
src/db.rs
135
src/db.rs
@@ -1,5 +1,8 @@
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
use num_enum::TryFromPrimitive;
|
||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
|
use rusqlite::ToSql;
|
||||||
|
use rusqlite::types::FromSql;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@@ -205,15 +208,34 @@ impl ParsedPage {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, PartialEq, Copy, Clone)]
|
#[repr(i64)]
|
||||||
|
#[derive(Serialize, Debug, PartialEq, Copy, Clone, PartialOrd, Ord, Eq, TryFromPrimitive)]
|
||||||
|
pub enum StorageParsingEngineVersion {
|
||||||
|
Testing = 0,
|
||||||
|
Regex = 1,
|
||||||
|
LLM = 2,
|
||||||
|
}
|
||||||
|
impl ToSql for StorageParsingEngineVersion {
|
||||||
|
fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
|
||||||
|
Ok((*self as i64).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl FromSql for StorageParsingEngineVersion {
|
||||||
|
fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult<Self> {
|
||||||
|
let v = value.as_i64()?;
|
||||||
|
Self::try_from(v).map_err(|_| rusqlite::types::FromSqlError::OutOfRange(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||||
pub struct ParsedStorage {
|
pub struct ParsedStorage {
|
||||||
pub id: i64,
|
pub id: i64,
|
||||||
pub item: i64,
|
pub item: i64,
|
||||||
pub total_gigabytes: i64,
|
pub total_gigabytes: i64,
|
||||||
pub quantity: i64,
|
pub quantity: i64,
|
||||||
pub individual_size_gigabytes: i64,
|
pub individual_size_gigabytes: i64,
|
||||||
pub parse_engine: i64,
|
pub parse_engine: StorageParsingEngineVersion,
|
||||||
pub needed_description_check: bool,
|
pub failed_reason: String,
|
||||||
}
|
}
|
||||||
impl DBTable for ParsedStorage {
|
impl DBTable for ParsedStorage {
|
||||||
const TABLE_NAME: &'static str = "Storage_Parsed";
|
const TABLE_NAME: &'static str = "Storage_Parsed";
|
||||||
@@ -224,13 +246,13 @@ impl DBTable for ParsedStorage {
|
|||||||
quantity INTEGER,
|
quantity INTEGER,
|
||||||
sizes_gigabytes TEXT,
|
sizes_gigabytes TEXT,
|
||||||
parse_engine INTEGER,
|
parse_engine INTEGER,
|
||||||
need_description_check INTEGER,
|
failed_reason TEXT,
|
||||||
UNIQUE(item, parse_engine)
|
UNIQUE(item, parse_engine)
|
||||||
FOREIGN KEY(item) REFERENCES Listings(item_id)
|
FOREIGN KEY(item) REFERENCES Listings(item_id)
|
||||||
";
|
";
|
||||||
|
|
||||||
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
||||||
let mut stmt = conn.prepare(&format!("SELECT id, item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check FROM {}", Self::TABLE_NAME))?;
|
let mut stmt = conn.prepare(&format!("SELECT id, item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, failed_reason FROM {}", Self::TABLE_NAME))?;
|
||||||
let iter = stmt.query_map([], |row| {
|
let iter = stmt.query_map([], |row| {
|
||||||
Ok(ParsedStorage {
|
Ok(ParsedStorage {
|
||||||
id: row.get(0)?,
|
id: row.get(0)?,
|
||||||
@@ -242,7 +264,7 @@ impl DBTable for ParsedStorage {
|
|||||||
r.parse().unwrap_or(0)
|
r.parse().unwrap_or(0)
|
||||||
},
|
},
|
||||||
parse_engine: row.get(5)?,
|
parse_engine: row.get(5)?,
|
||||||
needed_description_check: row.get(6)?,
|
failed_reason: row.get(6)?,
|
||||||
})
|
})
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -273,7 +295,7 @@ impl ParsedStorage {
|
|||||||
r.parse().unwrap()
|
r.parse().unwrap()
|
||||||
},
|
},
|
||||||
parse_engine: row.get(5)?,
|
parse_engine: row.get(5)?,
|
||||||
needed_description_check: row.get(6)?,
|
failed_reason: row.get(6)?,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.ok()
|
.ok()
|
||||||
@@ -283,21 +305,26 @@ impl ParsedStorage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_or_update(&self, conn: &Connection) {
|
pub fn add_or_update(&self, conn: &Connection) {
|
||||||
let _ = conn.execute(&format!("
|
let _ = conn
|
||||||
|
.execute(
|
||||||
|
&format!(
|
||||||
|
"
|
||||||
INSERT OR REPLACE INTO {}
|
INSERT OR REPLACE INTO {}
|
||||||
(item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, need_description_check)
|
(item, total_gigabytes, quantity, sizes_gigabytes, parse_engine, failed_reason)
|
||||||
VALUES
|
VALUES
|
||||||
(?1, ?2, ?3, ?4, ?5, ?6)",
|
(?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
Self::TABLE_NAME),
|
Self::TABLE_NAME
|
||||||
|
),
|
||||||
(
|
(
|
||||||
&self.item,
|
&self.item,
|
||||||
self.total_gigabytes,
|
self.total_gigabytes,
|
||||||
self.quantity,
|
self.quantity,
|
||||||
self.individual_size_gigabytes.to_string(),
|
self.individual_size_gigabytes.to_string(),
|
||||||
self.parse_engine,
|
self.parse_engine,
|
||||||
self.needed_description_check
|
&self.failed_reason,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
).unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -494,19 +521,40 @@ impl Listing {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lookup_non_parsed(conn: &Connection) -> Vec<(i64, String)> {
|
pub fn lookup_pending_parse(
|
||||||
let mut stmt = conn
|
conn: &Connection,
|
||||||
.prepare(&format!(
|
allowed_engines: &[i64],
|
||||||
|
count_limit: u64,
|
||||||
|
) -> Vec<(i64, String)> {
|
||||||
|
let engines_filter = if !allowed_engines.is_empty() {
|
||||||
|
format!(
|
||||||
|
"AND ({})",
|
||||||
|
allowed_engines
|
||||||
|
.iter()
|
||||||
|
.map(|e| "ps.parse_engine = ".to_owned() + &e.to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" OR ")
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
let query = format!(
|
||||||
"
|
"
|
||||||
SELECT ei.item_id, ei.title FROM {} AS ei
|
SELECT listing.item_id, listing.title FROM {0} AS listing
|
||||||
LEFT JOIN {} AS sp ON ei.item_id = sp.item
|
WHERE NOT EXISTS (
|
||||||
WHERE sp.item IS NULL",
|
SELECT 1 FROM {1} AS ps
|
||||||
|
WHERE listing.item_id = ps.item {engines_filter}
|
||||||
|
)
|
||||||
|
LIMIT {count_limit}
|
||||||
|
",
|
||||||
Self::TABLE_NAME,
|
Self::TABLE_NAME,
|
||||||
ParsedStorage::TABLE_NAME
|
ParsedStorage::TABLE_NAME
|
||||||
))
|
);
|
||||||
|
conn.prepare(&query)
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap();
|
.unwrap()
|
||||||
stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
|
.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|e| e.unwrap())
|
.map(|e| e.unwrap())
|
||||||
@@ -565,7 +613,7 @@ pub fn listings_get_filtered(
|
|||||||
history: ItemAppearances::lookup(conn, l.item_id),
|
history: ItemAppearances::lookup(conn, l.item_id),
|
||||||
parsed: ParsedStorage::lookup(conn, l.item_id),
|
parsed: ParsedStorage::lookup(conn, l.item_id),
|
||||||
})
|
})
|
||||||
.filter(|lr| lr.parsed.iter().any(|p| !p.needed_description_check))
|
.filter(|lr| lr.parsed.iter().any(|p| p.failed_reason.is_empty()))
|
||||||
.collect::<Vec<ListingsFilterResult>>();
|
.collect::<Vec<ListingsFilterResult>>();
|
||||||
info!(
|
info!(
|
||||||
"Found total {} listings since (str:{} epoch:{})",
|
"Found total {} listings since (str:{} epoch:{})",
|
||||||
@@ -614,6 +662,24 @@ pub fn listings_get_filtered(
|
|||||||
listings
|
listings
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[repr(i64)]
|
||||||
|
#[derive(Serialize, Debug, PartialEq, Copy, Clone, TryFromPrimitive)]
|
||||||
|
pub enum StorageLLMVersion {
|
||||||
|
Testing = 0,
|
||||||
|
Gemini2d5Prompt0 = 1,
|
||||||
|
}
|
||||||
|
impl ToSql for StorageLLMVersion {
|
||||||
|
fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
|
||||||
|
Ok((*self as i64).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl FromSql for StorageLLMVersion {
|
||||||
|
fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult<Self> {
|
||||||
|
let v = value.as_i64()?;
|
||||||
|
Self::try_from(v).map_err(|_| rusqlite::types::FromSqlError::OutOfRange(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// This is mostly meant as a way to cache all of these.
|
// This is mostly meant as a way to cache all of these.
|
||||||
#[derive(Serialize, Debug, PartialEq, Clone)]
|
#[derive(Serialize, Debug, PartialEq, Clone)]
|
||||||
pub struct ParsedLLMStorageResult {
|
pub struct ParsedLLMStorageResult {
|
||||||
@@ -623,6 +689,7 @@ pub struct ParsedLLMStorageResult {
|
|||||||
pub quantity: i64,
|
pub quantity: i64,
|
||||||
pub gigabytes: i64,
|
pub gigabytes: i64,
|
||||||
pub fail_reason: String,
|
pub fail_reason: String,
|
||||||
|
pub llm_id: StorageLLMVersion,
|
||||||
}
|
}
|
||||||
impl DBTable for ParsedLLMStorageResult {
|
impl DBTable for ParsedLLMStorageResult {
|
||||||
const TABLE_NAME: &'static str = "ParsedLLMStorageResult";
|
const TABLE_NAME: &'static str = "ParsedLLMStorageResult";
|
||||||
@@ -632,12 +699,13 @@ impl DBTable for ParsedLLMStorageResult {
|
|||||||
title TEXT NOT NULL,
|
title TEXT NOT NULL,
|
||||||
quantity INTEGER NOT NULL,
|
quantity INTEGER NOT NULL,
|
||||||
gigabytes INTEGER NOT NULL,
|
gigabytes INTEGER NOT NULL,
|
||||||
fail_reason TEXT NOT NULL
|
fail_reason TEXT NOT NULL,
|
||||||
|
llm_id INTEGER NOT NULL
|
||||||
";
|
";
|
||||||
|
|
||||||
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
fn get_all(conn: &Connection) -> rusqlite::Result<Vec<Self>> {
|
||||||
let mut stmt = conn.prepare(&format!(
|
let mut stmt = conn.prepare(&format!(
|
||||||
"SELECT id, item_id, title, quantity, gigabytes, fail_reason FROM {}",
|
"SELECT id, item_id, title, quantity, gigabytes, fail_reason, llm_id FROM {}",
|
||||||
Self::TABLE_NAME
|
Self::TABLE_NAME
|
||||||
))?;
|
))?;
|
||||||
let iter = stmt.query_map([], |row| {
|
let iter = stmt.query_map([], |row| {
|
||||||
@@ -648,6 +716,7 @@ impl DBTable for ParsedLLMStorageResult {
|
|||||||
quantity: row.get(3)?,
|
quantity: row.get(3)?,
|
||||||
gigabytes: row.get(4)?,
|
gigabytes: row.get(4)?,
|
||||||
fail_reason: row.get(5)?,
|
fail_reason: row.get(5)?,
|
||||||
|
llm_id: row.get(6)?,
|
||||||
})
|
})
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -674,6 +743,7 @@ impl ParsedLLMStorageResult {
|
|||||||
quantity: row.get(3)?,
|
quantity: row.get(3)?,
|
||||||
gigabytes: row.get(4)?,
|
gigabytes: row.get(4)?,
|
||||||
fail_reason: row.get(5)?,
|
fail_reason: row.get(5)?,
|
||||||
|
llm_id: row.get(6)?,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.ok()
|
.ok()
|
||||||
@@ -689,9 +759,10 @@ impl ParsedLLMStorageResult {
|
|||||||
title,
|
title,
|
||||||
quantity,
|
quantity,
|
||||||
gigabytes,
|
gigabytes,
|
||||||
fail_reason
|
fail_reason,
|
||||||
|
llm_id
|
||||||
)
|
)
|
||||||
VALUES (?1, ?2, ?3, ?4, ?5)",
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||||
Self::TABLE_NAME
|
Self::TABLE_NAME
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@@ -700,6 +771,7 @@ impl ParsedLLMStorageResult {
|
|||||||
self.quantity,
|
self.quantity,
|
||||||
self.gigabytes,
|
self.gigabytes,
|
||||||
self.fail_reason.clone(),
|
self.fail_reason.clone(),
|
||||||
|
self.llm_id,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -733,7 +805,7 @@ pub struct Stats {
|
|||||||
rows_parsed_storage: i64,
|
rows_parsed_storage: i64,
|
||||||
rows_parsed_page: i64,
|
rows_parsed_page: i64,
|
||||||
rows_item_appearances: i64,
|
rows_item_appearances: i64,
|
||||||
// pub rows_parsed_storage_llm: i64,
|
pub rows_parsed_storage_llm: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_stats(conn: &Connection) -> Stats {
|
pub fn get_stats(conn: &Connection) -> Stats {
|
||||||
@@ -743,7 +815,7 @@ pub fn get_stats(conn: &Connection) -> Stats {
|
|||||||
rows_parsed_storage: ParsedStorage::get_count(conn),
|
rows_parsed_storage: ParsedStorage::get_count(conn),
|
||||||
rows_parsed_page: ParsedPage::get_count(conn),
|
rows_parsed_page: ParsedPage::get_count(conn),
|
||||||
rows_item_appearances: ItemAppearances::get_count(conn),
|
rows_item_appearances: ItemAppearances::get_count(conn),
|
||||||
// rows_parsed_storage_llm: ParsedLLMStorageResult::get_count(conn),
|
rows_parsed_storage_llm: ParsedLLMStorageResult::get_count(conn),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -765,7 +837,7 @@ mod tests {
|
|||||||
let listing = Listing {
|
let listing = Listing {
|
||||||
id: 1,
|
id: 1,
|
||||||
item_id: 1234,
|
item_id: 1234,
|
||||||
title: "Some Title".to_string(),
|
title: "Lot of 2 512GB SSD 6gb/s working with 5% wear".to_string(),
|
||||||
buy_it_now_price_cents: Some(123),
|
buy_it_now_price_cents: Some(123),
|
||||||
has_best_offer: false,
|
has_best_offer: false,
|
||||||
image_url: "google.com".to_string(),
|
image_url: "google.com".to_string(),
|
||||||
@@ -779,8 +851,8 @@ mod tests {
|
|||||||
total_gigabytes: 13,
|
total_gigabytes: 13,
|
||||||
quantity: 3,
|
quantity: 3,
|
||||||
individual_size_gigabytes: 13,
|
individual_size_gigabytes: 13,
|
||||||
parse_engine: 9,
|
parse_engine: StorageParsingEngineVersion::Testing,
|
||||||
needed_description_check: true,
|
failed_reason: "".to_owned(),
|
||||||
};
|
};
|
||||||
parsed.add_or_update(&db);
|
parsed.add_or_update(&db);
|
||||||
assert_eq!(ParsedStorage::lookup(&db, listing.item_id), vec![parsed]);
|
assert_eq!(ParsedStorage::lookup(&db, listing.item_id), vec![parsed]);
|
||||||
@@ -811,6 +883,7 @@ mod tests {
|
|||||||
item_id: 12345,
|
item_id: 12345,
|
||||||
quantity: 32,
|
quantity: 32,
|
||||||
title: "Some Title".to_owned(),
|
title: "Some Title".to_owned(),
|
||||||
|
llm_id: StorageLLMVersion::Testing,
|
||||||
};
|
};
|
||||||
parsedllmstorage.add_or_update(&db);
|
parsedllmstorage.add_or_update(&db);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
120
src/main.rs
120
src/main.rs
@@ -1,12 +1,11 @@
|
|||||||
use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
use actix_web::{App, HttpServer, Responder, Result, get, post, web, web::Data};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use ebay_scraper_rust::db::{
|
use ebay_scraper_rust::db;
|
||||||
DBTable, ItemAppearances, Listing, ParsedLLMStorageResult, ParsedPage, ParsedStorage,
|
use ebay_scraper_rust::db::DBTable;
|
||||||
SearchURL, get_initialized, get_stats, listings_get_filtered,
|
|
||||||
};
|
|
||||||
use ebay_scraper_rust::parser::parse_dir;
|
use ebay_scraper_rust::parser::parse_dir;
|
||||||
use ebay_scraper_rust::{parser_storage_e0, parser_storage_e1};
|
use ebay_scraper_rust::{parser_storage_e0, parser_storage_e1};
|
||||||
|
use futures::future::join_all;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
@@ -32,6 +31,7 @@ struct AppCtx {
|
|||||||
db: rusqlite::Connection,
|
db: rusqlite::Connection,
|
||||||
db_llm: rusqlite::Connection,
|
db_llm: rusqlite::Connection,
|
||||||
download_dir: PathBuf,
|
download_dir: PathBuf,
|
||||||
|
llm_parser: Option<actix_web::rt::task::JoinHandle<()>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
#[derive(Deserialize, Debug)]
|
||||||
@@ -47,7 +47,7 @@ async fn listings_filtered_get(
|
|||||||
filter: web::Query<ListingsFilter>,
|
filter: web::Query<ListingsFilter>,
|
||||||
) -> Result<impl Responder> {
|
) -> Result<impl Responder> {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let res = listings_get_filtered(
|
let res = db::listings_get_filtered(
|
||||||
&ctx.lock().unwrap().db,
|
&ctx.lock().unwrap().db,
|
||||||
&DateTime::<Utc>::from_timestamp(filter.since.unwrap_or(0), 0).unwrap(),
|
&DateTime::<Utc>::from_timestamp(filter.since.unwrap_or(0), 0).unwrap(),
|
||||||
filter.limit.unwrap_or(1_000),
|
filter.limit.unwrap_or(1_000),
|
||||||
@@ -64,12 +64,12 @@ async fn listings_filtered_get(
|
|||||||
|
|
||||||
#[get("/listing/{id}")]
|
#[get("/listing/{id}")]
|
||||||
async fn listing_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
async fn listing_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
||||||
Ok(web::Json(Listing::lookup(&ctx.lock().unwrap().db, *id)))
|
Ok(web::Json(db::Listing::lookup(&ctx.lock().unwrap().db, *id)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/listing/{id}/parsed")]
|
#[get("/listing/{id}/parsed")]
|
||||||
async fn listing_parse_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
async fn listing_parse_get(ctx: Data<Mutex<AppCtx>>, id: web::Path<i64>) -> Result<impl Responder> {
|
||||||
Ok(web::Json(ParsedStorage::lookup(
|
Ok(web::Json(db::ParsedStorage::lookup(
|
||||||
&ctx.lock().unwrap().db,
|
&ctx.lock().unwrap().db,
|
||||||
*id,
|
*id,
|
||||||
)))
|
)))
|
||||||
@@ -86,7 +86,7 @@ async fn listing_history_get(
|
|||||||
ctx: Data<Mutex<AppCtx>>,
|
ctx: Data<Mutex<AppCtx>>,
|
||||||
id: web::Path<i64>,
|
id: web::Path<i64>,
|
||||||
) -> Result<impl Responder> {
|
) -> Result<impl Responder> {
|
||||||
let history: Vec<_> = ItemAppearances::lookup(&ctx.lock().unwrap().db, *id)
|
let history: Vec<_> = db::ItemAppearances::lookup(&ctx.lock().unwrap().db, *id)
|
||||||
.iter()
|
.iter()
|
||||||
// .inspect(|e| info!("got: {:?}", e))
|
// .inspect(|e| info!("got: {:?}", e))
|
||||||
.filter_map(|e| {
|
.filter_map(|e| {
|
||||||
@@ -99,29 +99,85 @@ async fn listing_history_get(
|
|||||||
Ok(web::Json(history))
|
Ok(web::Json(history))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn storage_parse_work(entries: &[(i64, String)]) -> Vec<db::ParsedStorage> {
|
||||||
|
let llm_futures: Vec<_> = entries
|
||||||
|
.iter()
|
||||||
|
.map(|(id, title)| parser_storage_e1::parse_size_and_quantity_llm(*id, title))
|
||||||
|
.collect();
|
||||||
|
let llm_future_results = join_all(llm_futures).await;
|
||||||
|
let llm_results = llm_future_results
|
||||||
|
.iter()
|
||||||
|
.flatten()
|
||||||
|
.map(|e| db::ParsedStorage {
|
||||||
|
id: 0,
|
||||||
|
item: e.item_id,
|
||||||
|
total_gigabytes: e.quantity * e.gigabytes,
|
||||||
|
quantity: e.quantity,
|
||||||
|
individual_size_gigabytes: e.gigabytes,
|
||||||
|
failed_reason: e.fail_reason.clone(),
|
||||||
|
parse_engine: db::StorageParsingEngineVersion::LLM,
|
||||||
|
});
|
||||||
|
// .inspect(|e| e.add_or_update(&unlocked.db))
|
||||||
|
// .map(|e| db::ParsedStorage {
|
||||||
|
// id: 0,
|
||||||
|
// item: e.item_id,
|
||||||
|
// total_gigabytes: e.quantity * e.gigabytes,
|
||||||
|
// quantity: e.quantity,
|
||||||
|
// individual_size_gigabytes: e.gigabytes,
|
||||||
|
// needed_description_check: !e.fail_reason.is_empty(),
|
||||||
|
// parse_engine: db::StorageParsingEngineVersion::LLM,
|
||||||
|
// })
|
||||||
|
// .for_each(|e| e.add_or_update(&unlocked.db));
|
||||||
|
|
||||||
|
// And a regex based parse.
|
||||||
|
let regex_results = entries
|
||||||
|
.iter()
|
||||||
|
.map(|(id, title)| parser_storage_e0::parse_size_and_quantity(*id, &title));
|
||||||
|
// .for_each(|e| e.add_or_update(&unlocked.db));
|
||||||
|
|
||||||
|
regex_results.chain(llm_results).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn storage_parse_worker(ctx: Data<Mutex<AppCtx>>) -> actix_web::rt::task::JoinHandle<()> {
|
||||||
|
actix_web::rt::spawn(async move {
|
||||||
|
loop {
|
||||||
|
actix_web::rt::time::sleep(std::time::Duration::from_millis(1000)).await;
|
||||||
|
let ctx_unlocked = ctx.lock().unwrap();
|
||||||
|
let entries = db::Listing::lookup_pending_parse(&ctx_unlocked.db, &[], 10);
|
||||||
|
let parsed = storage_parse_work(entries.as_slice()).await;
|
||||||
|
for p in parsed {
|
||||||
|
p.add_or_update(&ctx_unlocked.db);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[post("/listing/parse")]
|
#[post("/listing/parse")]
|
||||||
async fn parse_listings(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
async fn parse_listings(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
|
// Prepare a background parser to go through and use an LLM to parse the
|
||||||
|
// storage info.
|
||||||
|
if ctx.lock().unwrap().llm_parser.is_none() {
|
||||||
|
ctx.clone().lock().unwrap().llm_parser = Some(storage_parse_worker(ctx.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
// Lets grab a few entries and then try parsing them with two engines.
|
// Lets grab a few entries and then try parsing them with two engines.
|
||||||
let ctx_locked = ctx.lock().unwrap();
|
let ctx_locked = ctx.lock().unwrap();
|
||||||
let entries: Vec<_> = Listing::lookup_non_parsed(&ctx_locked.db)
|
let entries: Vec<_> = db::Listing::lookup_pending_parse(&ctx_locked.db, &[], 100)
|
||||||
.iter()
|
.iter()
|
||||||
.take(10)
|
.take(10)
|
||||||
.map(|e| e.clone())
|
.map(|e| e.clone())
|
||||||
.collect();
|
.collect();
|
||||||
for (item_id, title) in &entries {
|
for (item_id, title) in &entries {
|
||||||
let ps0 = parser_storage_e0::parse_size_and_quantity(*item_id, &title);
|
|
||||||
ps0.add_or_update(&ctx_locked.db);
|
|
||||||
|
|
||||||
let ps1 =
|
let ps1 =
|
||||||
parser_storage_e1::parse_size_and_quantity(&ctx_locked.db_llm, *item_id, &title).await;
|
parser_storage_e1::parse_size_and_quantity(&ctx_locked.db_llm, *item_id, &title).await;
|
||||||
if ps1.is_some() {
|
if ps1.is_some() {
|
||||||
info!(
|
// info!(
|
||||||
"Parsed using an LLM title:{} and results:{:?}",
|
// "Parsed using an LLM title:{} and results:{:?}",
|
||||||
title,
|
// title,
|
||||||
ps1.unwrap()
|
// ps1.unwrap()
|
||||||
);
|
// );
|
||||||
ps1.unwrap().add_or_update(&ctx_locked.db);
|
ps1.unwrap().add_or_update(&ctx_locked.db);
|
||||||
ps1.unwrap().add_or_update(&ctx_locked.db_llm);
|
// ps1.unwrap().add_or_update(&ctx_locked.db_llm); No need
|
||||||
} else {
|
} else {
|
||||||
error!("Failed to parse {item_id} with title {title}");
|
error!("Failed to parse {item_id} with title {title}");
|
||||||
}
|
}
|
||||||
@@ -131,7 +187,7 @@ async fn parse_listings(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
|||||||
|
|
||||||
#[get("/category")]
|
#[get("/category")]
|
||||||
async fn category_getnames(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
async fn category_getnames(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
Ok(web::Json(SearchURL::names(&ctx.lock().unwrap().db)))
|
Ok(web::Json(db::SearchURL::names(&ctx.lock().unwrap().db)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[post("/category/{category}/parse")]
|
#[post("/category/{category}/parse")]
|
||||||
@@ -156,9 +212,9 @@ async fn category_parse(
|
|||||||
|
|
||||||
#[get("/stats")]
|
#[get("/stats")]
|
||||||
async fn stats_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
async fn stats_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
let stats_db = get_stats(&ctx.lock().unwrap().db);
|
let mut stats_db = db::get_stats(&ctx.lock().unwrap().db);
|
||||||
// let stats_db_llm = get_stats(&ctx.lock().unwrap().db_llm);
|
let stats_db_llm = db::get_stats(&ctx.lock().unwrap().db_llm);
|
||||||
// stats_db.rows_parsed_storage_llm = stats_db_llm.rows_parsed_storage_llm;
|
stats_db.rows_parsed_storage_llm = stats_db_llm.rows_parsed_storage_llm;
|
||||||
Ok(web::Json(stats_db))
|
Ok(web::Json(stats_db))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,11 +222,11 @@ async fn stats_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
|||||||
async fn admin_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
async fn admin_get(ctx: Data<Mutex<AppCtx>>) -> Result<impl Responder> {
|
||||||
let ctx_locked = ctx.lock().unwrap();
|
let ctx_locked = ctx.lock().unwrap();
|
||||||
let query_start_time = Instant::now();
|
let query_start_time = Instant::now();
|
||||||
let search_urls = SearchURL::get_all(&ctx_locked.db).unwrap_or_default();
|
let search_urls = db::SearchURL::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let parsed_pages = ParsedPage::get_all(&ctx_locked.db).unwrap_or_default();
|
let parsed_pages = db::ParsedPage::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let parsed_storages = ParsedStorage::get_all(&ctx_locked.db).unwrap_or_default();
|
let parsed_storages = db::ParsedStorage::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let item_appearances = ItemAppearances::get_all(&ctx_locked.db).unwrap_or_default();
|
let item_appearances = db::ItemAppearances::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let listings = Listing::get_all(&ctx_locked.db).unwrap_or_default();
|
let listings = db::Listing::get_all(&ctx_locked.db).unwrap_or_default();
|
||||||
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
|
let total_query_time = query_start_time.elapsed().as_micros() as f64 / 1000.0;
|
||||||
|
|
||||||
let html_gen_start_time = Instant::now();
|
let html_gen_start_time = Instant::now();
|
||||||
@@ -313,17 +369,19 @@ async fn main() -> std::io::Result<()> {
|
|||||||
|
|
||||||
let app_data = Data::new(Mutex::new(AppCtx {
|
let app_data = Data::new(Mutex::new(AppCtx {
|
||||||
download_dir: scrapedatadir.clone(),
|
download_dir: scrapedatadir.clone(),
|
||||||
db: get_initialized(None),
|
db: db::get_initialized(None),
|
||||||
db_llm: {
|
db_llm: {
|
||||||
let db_path = scrapedatadir.with_file_name("llm.sqlite");
|
let db_path = scrapedatadir.with_file_name("llm.sqlite");
|
||||||
let db = rusqlite::Connection::open(db_path).unwrap();
|
let db = rusqlite::Connection::open(&db_path).unwrap();
|
||||||
ParsedLLMStorageResult::initialize(&db);
|
db::ParsedLLMStorageResult::initialize(&db);
|
||||||
|
info!("Created {:?} for caching LLM parsed title.", db_path);
|
||||||
db
|
db
|
||||||
},
|
},
|
||||||
|
llm_parser: None,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Prepare our backend via pulling in what catagories we are preconfigured with.
|
// Prepare our backend via pulling in what catagories we are preconfigured with.
|
||||||
SearchURL::scan(&app_data.lock().unwrap().db, &scrapedatadir, "url.json");
|
db::SearchURL::scan(&app_data.lock().unwrap().db, &scrapedatadir, "url.json");
|
||||||
|
|
||||||
HttpServer::new(move || {
|
HttpServer::new(move || {
|
||||||
App::new()
|
App::new()
|
||||||
|
@@ -32,7 +32,7 @@ pub fn parse_size_and_quantity(item_id: i64, title: &str) -> ParsedStorage {
|
|||||||
let upper_title = title.to_uppercase();
|
let upper_title = title.to_uppercase();
|
||||||
let mut total_gb = 0i64;
|
let mut total_gb = 0i64;
|
||||||
let mut quantity = 1i64;
|
let mut quantity = 1i64;
|
||||||
let mut needed_description_check = false;
|
let mut failed_reason = String::new();
|
||||||
let mut individual_size_gb = 0i64;
|
let mut individual_size_gb = 0i64;
|
||||||
|
|
||||||
for pattern in EXPLICIT_QTY_PATTERNS.iter() {
|
for pattern in EXPLICIT_QTY_PATTERNS.iter() {
|
||||||
@@ -68,36 +68,35 @@ pub fn parse_size_and_quantity(item_id: i64, title: &str) -> ParsedStorage {
|
|||||||
if !unique_sizes_gb.is_empty() {
|
if !unique_sizes_gb.is_empty() {
|
||||||
individual_size_gb = unique_sizes_gb[0];
|
individual_size_gb = unique_sizes_gb[0];
|
||||||
if unique_sizes_gb.len() > 1 {
|
if unique_sizes_gb.len() > 1 {
|
||||||
needed_description_check = true;
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if SIZE_RANGE_REGEX.is_match(&upper_title) {
|
if SIZE_RANGE_REGEX.is_match(&upper_title) {
|
||||||
needed_description_check = true;
|
failed_reason = "No Size Given".to_owned();
|
||||||
}
|
}
|
||||||
if quantity > 1 && upper_title.contains("MIXED") {
|
if quantity > 1 && upper_title.contains("MIXED") {
|
||||||
needed_description_check = true;
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
}
|
}
|
||||||
if upper_title.contains("CHECK THE DESCRIPTION")
|
if upper_title.contains("CHECK THE DESCRIPTION")
|
||||||
|| upper_title.contains("CHECK DESCRIPTION")
|
|| upper_title.contains("CHECK DESCRIPTION")
|
||||||
|| upper_title.contains("SEE DESCRIPTION")
|
|| upper_title.contains("SEE DESCRIPTION")
|
||||||
{
|
{
|
||||||
if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 {
|
if quantity > 1 || size_matches.is_empty() || size_matches.len() > 1 {
|
||||||
needed_description_check = true;
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if upper_title.contains("READ") {
|
||||||
|
failed_reason = "Mixed Sizes".to_owned();
|
||||||
|
}
|
||||||
|
|
||||||
if individual_size_gb > 0 {
|
if individual_size_gb > 0 {
|
||||||
total_gb = individual_size_gb * quantity;
|
total_gb = individual_size_gb * quantity;
|
||||||
}
|
}
|
||||||
|
|
||||||
if quantity > 1 && total_gb == 0 && !size_matches.is_empty() {
|
if quantity > 1 && total_gb == 0 && !size_matches.is_empty() {
|
||||||
needed_description_check = true;
|
failed_reason = "No size given".to_owned();
|
||||||
}
|
|
||||||
|
|
||||||
if quantity == 1 && size_matches.len() == 1 && !needed_description_check {
|
|
||||||
// This condition is implicitly handled
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ParsedStorage {
|
ParsedStorage {
|
||||||
@@ -106,8 +105,8 @@ pub fn parse_size_and_quantity(item_id: i64, title: &str) -> ParsedStorage {
|
|||||||
total_gigabytes: total_gb,
|
total_gigabytes: total_gb,
|
||||||
quantity,
|
quantity,
|
||||||
individual_size_gigabytes: individual_size_gb,
|
individual_size_gigabytes: individual_size_gb,
|
||||||
needed_description_check,
|
failed_reason: failed_reason,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,8 +124,8 @@ mod tests {
|
|||||||
total_gigabytes: 512 * 3,
|
total_gigabytes: 512 * 3,
|
||||||
quantity: 3,
|
quantity: 3,
|
||||||
individual_size_gigabytes: 512,
|
individual_size_gigabytes: 512,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -138,8 +137,8 @@ mod tests {
|
|||||||
total_gigabytes: 240,
|
total_gigabytes: 240,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 240,
|
individual_size_gigabytes: 240,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -151,8 +150,8 @@ mod tests {
|
|||||||
total_gigabytes: 1024,
|
total_gigabytes: 1024,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 1024,
|
individual_size_gigabytes: 1024,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: true,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
false, // Sadly this one fails :/
|
false, // Sadly this one fails :/
|
||||||
),
|
),
|
||||||
@@ -164,8 +163,8 @@ mod tests {
|
|||||||
total_gigabytes: 7 * 1024,
|
total_gigabytes: 7 * 1024,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 7 * 1024,
|
individual_size_gigabytes: 7 * 1024,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -177,8 +176,8 @@ mod tests {
|
|||||||
total_gigabytes: 6 * 256,
|
total_gigabytes: 6 * 256,
|
||||||
quantity: 6,
|
quantity: 6,
|
||||||
individual_size_gigabytes: 256,
|
individual_size_gigabytes: 256,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
@@ -190,8 +189,8 @@ mod tests {
|
|||||||
total_gigabytes: 1966,
|
total_gigabytes: 1966,
|
||||||
quantity: 1,
|
quantity: 1,
|
||||||
individual_size_gigabytes: 1966,
|
individual_size_gigabytes: 1966,
|
||||||
parse_engine: 0,
|
parse_engine: crate::db::StorageParsingEngineVersion::Regex,
|
||||||
needed_description_check: false,
|
failed_reason: String::new(),
|
||||||
},
|
},
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
|
@@ -1,10 +1,10 @@
|
|||||||
use crate::db::ParsedLLMStorageResult;
|
use crate::db::{
|
||||||
use crate::db::ParsedStorage;
|
ParsedLLMStorageResult, ParsedStorage, StorageLLMVersion, StorageParsingEngineVersion,
|
||||||
use actix_web::mime::APPLICATION_JSON;
|
};
|
||||||
use reqwest::header::AUTHORIZATION;
|
use reqwest::header::{AUTHORIZATION, CONTENT_TYPE};
|
||||||
use reqwest::header::CONTENT_TYPE;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
use tracing::error;
|
||||||
|
|
||||||
// Given this prompt and a string of "(Lot of 6) Samsung MZ-VLB2560 256GB M.2NVMe Internal SSD
|
// Given this prompt and a string of "(Lot of 6) Samsung MZ-VLB2560 256GB M.2NVMe Internal SSD
|
||||||
// (MZVLB256HBHQ-000H1)", we get 338 input tokens and 36 output tokens. Assuming no caching, then
|
// (MZVLB256HBHQ-000H1)", we get 338 input tokens and 36 output tokens. Assuming no caching, then
|
||||||
@@ -28,22 +28,6 @@ And an example for an unclear title of "Pallet of assorted 128GB to 5TB drives";
|
|||||||
}
|
}
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
fn create_request(title: &str) -> serde_json::Value {
|
|
||||||
json!({
|
|
||||||
"model": "gemini-2.5-flash-lite",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": SYSTEM_PROMPT
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": title
|
|
||||||
}
|
|
||||||
]
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
|
#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)]
|
||||||
struct LLMParsedResponse {
|
struct LLMParsedResponse {
|
||||||
pub quantity: i64,
|
pub quantity: i64,
|
||||||
@@ -66,40 +50,111 @@ struct OpenAIMessage {
|
|||||||
content: String,
|
content: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses size and quantity information from an item title.
|
#[cfg(test)]
|
||||||
pub async fn parse_size_and_quantity(
|
const OPENAI_LLM_URL: &str = "https://badurl.hak8or.com/litellm_api/chat/completions";
|
||||||
db: &rusqlite::Connection,
|
#[cfg(not(test))]
|
||||||
|
const OPENAI_LLM_URL: &str = "https://ai.hak8or.com/litellm_api/chat/completions";
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
const OPENAI_LLM_API_KEY: &str = "Bearer sk-YmVlcC1ib29wLWEtcm9ib3Q";
|
||||||
|
#[cfg(not(test))]
|
||||||
|
const OPENAI_LLM_API_KEY: &str = "Bearer sk-HMGML94x2ag6ggOoDghSGA";
|
||||||
|
|
||||||
|
pub async fn parse_size_and_quantity_llm(
|
||||||
item_id: i64,
|
item_id: i64,
|
||||||
title: &str,
|
title: &str,
|
||||||
) -> Option<ParsedStorage> {
|
) -> Option<ParsedLLMStorageResult> {
|
||||||
let client = reqwest::Client::new();
|
let client = reqwest::Client::new();
|
||||||
let req = client
|
let req = client
|
||||||
.post("https://ai.hak8or.com/litellm_api/chat/completions")
|
.post(OPENAI_LLM_URL)
|
||||||
.header(CONTENT_TYPE, APPLICATION_JSON.to_string())
|
.header(CONTENT_TYPE, actix_web::mime::APPLICATION_JSON.to_string())
|
||||||
.header(AUTHORIZATION, "Bearer sk-HMGML94x2ag6ggOoDghSGA")
|
.header(AUTHORIZATION, OPENAI_LLM_API_KEY)
|
||||||
.body(create_request(title).to_string());
|
.body(
|
||||||
|
json!({
|
||||||
|
"model": "gemini-2.5-flash-lite",
|
||||||
|
"reasoning_effort": "disable",
|
||||||
|
"thinking": {"type": "disabled", "budget_tokens": 0},
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": SYSTEM_PROMPT },
|
||||||
|
{ "role": "user", "content": title }
|
||||||
|
]
|
||||||
|
})
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
let reply_body = req.send().await.ok()?.text().await.ok()?;
|
let reply_body = req.send().await.ok()?.text().await.ok()?;
|
||||||
let repl_json: OpenAIResponse = serde_json::from_str(&reply_body).ok()?;
|
let repl_json: OpenAIResponse = serde_json::from_str(&reply_body).ok()?;
|
||||||
|
match repl_json.choices.len() {
|
||||||
|
0 => {
|
||||||
|
error!("When parsing title, LLM returned ZERO choices");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
1 => { /* Nothing to do */ }
|
||||||
|
a => error!("When parsing title, LLM returned {a}, >1 choices, using first!"),
|
||||||
|
}
|
||||||
let reply_parsed_storage_json: LLMParsedResponse =
|
let reply_parsed_storage_json: LLMParsedResponse =
|
||||||
serde_json::from_str(&repl_json.choices[0].message.content).ok()?;
|
serde_json::from_str(&repl_json.choices[0].message.content).ok()?;
|
||||||
|
|
||||||
let plsr = ParsedLLMStorageResult {
|
if !reply_parsed_storage_json.fail_reason.is_empty() {
|
||||||
|
error!(
|
||||||
|
"Failed parsing item_id:{item_id}, title:{title}, due to reason:{}",
|
||||||
|
reply_parsed_storage_json.fail_reason
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(ParsedLLMStorageResult {
|
||||||
id: 0,
|
id: 0,
|
||||||
fail_reason: reply_parsed_storage_json.fail_reason.clone(),
|
fail_reason: reply_parsed_storage_json.fail_reason.clone(),
|
||||||
gigabytes: reply_parsed_storage_json.gigabytes,
|
gigabytes: reply_parsed_storage_json.gigabytes,
|
||||||
item_id,
|
item_id,
|
||||||
quantity: reply_parsed_storage_json.quantity,
|
quantity: reply_parsed_storage_json.quantity,
|
||||||
title: title.to_owned(),
|
title: title.to_owned(),
|
||||||
};
|
llm_id: StorageLLMVersion::Gemini2d5Prompt0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since we can't have a hashmap in a const, and I don't want to play with
|
||||||
|
// making our parsed result struct contain a CoW string for fail_reason and
|
||||||
|
// title, we are stuck with this ...
|
||||||
|
pub fn parse_cached(item_id: i64, title: &str) -> Option<ParsedLLMStorageResult> {
|
||||||
|
match title {
|
||||||
|
"Lot of 2 512GB SSD 6gb/s working with 5% wear" => Some(ParsedLLMStorageResult {
|
||||||
|
id: 0,
|
||||||
|
item_id: item_id,
|
||||||
|
fail_reason: "".to_string(),
|
||||||
|
gigabytes: 512,
|
||||||
|
quantity: 2,
|
||||||
|
title: title.to_owned(),
|
||||||
|
llm_id: StorageLLMVersion::Testing,
|
||||||
|
}),
|
||||||
|
"Lot of 2 assorted SSD" => Some(ParsedLLMStorageResult {
|
||||||
|
id: 0,
|
||||||
|
fail_reason: "mixed sizes".to_owned(),
|
||||||
|
gigabytes: 0,
|
||||||
|
item_id,
|
||||||
|
quantity: 0,
|
||||||
|
title: title.to_owned(),
|
||||||
|
llm_id: StorageLLMVersion::Testing,
|
||||||
|
}),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses size and quantity information from an item title.
|
||||||
|
pub async fn parse_size_and_quantity(
|
||||||
|
db: &rusqlite::Connection,
|
||||||
|
item_id: i64,
|
||||||
|
title: &str,
|
||||||
|
) -> Option<ParsedStorage> {
|
||||||
|
let plsr = parse_size_and_quantity_llm(item_id, title).await?;
|
||||||
plsr.add_or_update(&db);
|
plsr.add_or_update(&db);
|
||||||
|
|
||||||
Some(ParsedStorage {
|
Some(ParsedStorage {
|
||||||
id: 0,
|
id: 0,
|
||||||
item: item_id,
|
item: item_id,
|
||||||
total_gigabytes: reply_parsed_storage_json.quantity * reply_parsed_storage_json.gigabytes,
|
total_gigabytes: plsr.quantity * plsr.gigabytes,
|
||||||
quantity: reply_parsed_storage_json.quantity,
|
quantity: plsr.quantity,
|
||||||
individual_size_gigabytes: reply_parsed_storage_json.gigabytes,
|
individual_size_gigabytes: plsr.gigabytes,
|
||||||
needed_description_check: !reply_parsed_storage_json.fail_reason.is_empty(),
|
failed_reason: plsr.fail_reason,
|
||||||
parse_engine: 1,
|
parse_engine: StorageParsingEngineVersion::LLM,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user