nostrdb: search: sort search terms from largest to smallest
Add a helper for sorting search words from largest to smallest. This should help search performance. For example, let's say our search index is like so: "the pokemon is cool" the the the ... * 1000 Our root word search would have to start 1000 new recursive queries. By sorting by the largest word: pokemon pokemon pokemon ... * 10 We only have to do 10 recursive searches, assuming larger words are less common, which will likely be the case most of the time Signed-off-by: William Casarin <jb55@jb55.com>
This commit is contained in:
committed by
Daniel D’Aquino
parent
04d4ff4e99
commit
46541694a0
@@ -3990,6 +3990,32 @@ void ndb_text_search_config_set_limit(struct ndb_text_search_config *cfg, int li
|
|||||||
cfg->limit = limit;
|
cfg->limit = limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int compare_search_words(const void *pa, const void *pb)
|
||||||
|
{
|
||||||
|
struct ndb_word *a, *b;
|
||||||
|
|
||||||
|
a = (struct ndb_word *)pa;
|
||||||
|
b = (struct ndb_word *)pb;
|
||||||
|
|
||||||
|
if (a->word_len == b->word_len) {
|
||||||
|
return 0;
|
||||||
|
} else if (a->word_len > b->word_len) {
|
||||||
|
// biggest words should be at the front of the list,
|
||||||
|
// so we say it's "smaller" here
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort search words from largest to smallest. Larger words are less likely
|
||||||
|
// in the index, allowing our scan to walk fewer words at the root when
|
||||||
|
// recursively matching.
|
||||||
|
void sort_largest_to_smallest(struct ndb_search_words *words)
|
||||||
|
{
|
||||||
|
qsort(words->words, words->num_words, sizeof(words->words[0]), compare_search_words);
|
||||||
|
}
|
||||||
|
|
||||||
int ndb_text_search(struct ndb_txn *txn, const char *query,
|
int ndb_text_search(struct ndb_txn *txn, const char *query,
|
||||||
struct ndb_text_search_results *results,
|
struct ndb_text_search_results *results,
|
||||||
struct ndb_text_search_config *config)
|
struct ndb_text_search_config *config)
|
||||||
@@ -4038,6 +4064,11 @@ int ndb_text_search(struct ndb_txn *txn, const char *query,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: sort words from largest to smallest. This should complete the
|
||||||
|
// query quicker because the larger words are likely to have fewer
|
||||||
|
// entries in the search index.
|
||||||
|
sort_largest_to_smallest(&search_words);
|
||||||
|
|
||||||
// for each word, we recursively find all of the submatches
|
// for each word, we recursively find all of the submatches
|
||||||
while (results->num_results < limit) {
|
while (results->num_results < limit) {
|
||||||
last_result = NULL;
|
last_result = NULL;
|
||||||
|
|||||||
Reference in New Issue
Block a user