filter: reservoir sample the algo feed

so its not the same static 15 pubkeys

Signed-off-by: William Casarin <jb55@jb55.com>
This commit is contained in:
William Casarin
2025-09-08 15:01:52 -07:00
parent c765b031e9
commit 0b4545d598
4 changed files with 33 additions and 35 deletions

15
Cargo.lock generated
View File

@@ -193,7 +193,7 @@ dependencies = [
"objc2-foundation 0.3.1", "objc2-foundation 0.3.1",
"parking_lot", "parking_lot",
"percent-encoding", "percent-encoding",
"windows-sys 0.52.0", "windows-sys 0.59.0",
"x11rb", "x11rb",
] ]
@@ -246,7 +246,7 @@ dependencies = [
"enumflags2", "enumflags2",
"futures-channel", "futures-channel",
"futures-util", "futures-util",
"rand 0.9.1", "rand 0.9.2",
"raw-window-handle", "raw-window-handle",
"serde", "serde",
"serde_repr", "serde_repr",
@@ -3542,6 +3542,7 @@ dependencies = [
"profiling", "profiling",
"puffin", "puffin",
"puffin_egui", "puffin_egui",
"rand 0.9.2",
"regex", "regex",
"secp256k1 0.30.0", "secp256k1 0.30.0",
"serde", "serde",
@@ -3683,7 +3684,7 @@ dependencies = [
"nostrdb", "nostrdb",
"notedeck", "notedeck",
"notedeck_ui", "notedeck_ui",
"rand 0.9.1", "rand 0.9.2",
"serde", "serde",
"serde_json", "serde_json",
"sha2", "sha2",
@@ -4603,7 +4604,7 @@ dependencies = [
"bytes", "bytes",
"getrandom 0.3.3", "getrandom 0.3.3",
"lru-slab", "lru-slab",
"rand 0.9.1", "rand 0.9.2",
"ring", "ring",
"rustc-hash 2.1.1", "rustc-hash 2.1.1",
"rustls", "rustls",
@@ -4657,9 +4658,9 @@ dependencies = [
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.9.1" version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [ dependencies = [
"rand_chacha 0.9.0", "rand_chacha 0.9.0",
"rand_core 0.9.3", "rand_core 0.9.3",
@@ -6278,7 +6279,7 @@ dependencies = [
"http", "http",
"httparse", "httparse",
"log", "log",
"rand 0.9.1", "rand 0.9.2",
"rustls", "rustls",
"rustls-pki-types", "rustls-pki-types",
"sha1", "sha1",

View File

@@ -19,6 +19,7 @@ chrono = "0.4.40"
base32 = "0.4.0" base32 = "0.4.0"
base64 = "0.22.1" base64 = "0.22.1"
rmpv = "1.3.0" rmpv = "1.3.0"
rand = "0.9.2"
bech32 = { version = "0.11", default-features = false } bech32 = { version = "0.11", default-features = false }
bitflags = "2.5.0" bitflags = "2.5.0"
dirs = "5.0.1" dirs = "5.0.1"

View File

@@ -51,6 +51,7 @@ bitflags = { workspace = true }
regex = "1" regex = "1"
chrono = { workspace = true } chrono = { workspace = true }
indexmap = {workspace = true} indexmap = {workspace = true}
rand = {workspace = true}
crossbeam-channel = "0.5" crossbeam-channel = "0.5"
[dev-dependencies] [dev-dependencies]

View File

@@ -289,48 +289,43 @@ pub fn last_n_per_pubkey_from_tags(
kind: u64, kind: u64,
notes_per_pubkey: u64, notes_per_pubkey: u64,
) -> Result<Vec<Filter>, Error> { ) -> Result<Vec<Filter>, Error> {
use rand::Rng;
let mut filters: Vec<Filter> = vec![]; let mut filters: Vec<Filter> = vec![];
let mut rng = rand::rng();
for tag in note.tags() { // TODO: fix arbitrary MAX_FILTER limit in nostrdb
// TODO: fix arbitrary MAX_FILTER limit in nostrdb const LIMIT: usize = 15;
if filters.len() == 15 {
break;
}
for (i, tag) in note.tags().iter().enumerate() {
if tag.count() < 2 { if tag.count() < 2 {
continue; continue;
} }
let t = if let Some(t) = tag.get_unchecked(0).variant().str() { let Some("p") = tag.get_str(0) else {
t
} else {
continue; continue;
}; };
if t == "p" { let Some(author) = tag.get_id(1) else {
let author = if let Some(author) = tag.get_unchecked(1).variant().id() { continue;
author };
} else {
continue;
};
let mk_filter = || {
let mut filter = Filter::new(); let mut filter = Filter::new();
filter.start_authors_field()?; let _ = filter.start_authors_field();
filter.add_id_element(author)?; let _ = filter.add_id_element(author);
filter.end_field(); filter.end_field();
filters.push(filter.kinds([kind]).limit(notes_per_pubkey).build()); filter.kinds([kind]).limit(notes_per_pubkey).build()
} else if t == "t" { };
let hashtag = if let Some(hashtag) = tag.get_unchecked(1).variant().str() {
hashtag
} else {
continue;
};
let mut filter = Filter::new(); // since we're limited due to a nostrdb bug, we reservoir sample to keep things interesting
filter.start_tags_field('t')?; if filters.len() < LIMIT {
filter.add_str_element(hashtag)?; filters.push(mk_filter());
filter.end_field(); } else {
filters.push(filter.kinds([kind]).limit(notes_per_pubkey).build()); let j = rng.random_range(0..=i);
if j < LIMIT {
filters[j] = mk_filter();
}
} }
} }