@@ -230,6 +230,7 @@ enum ndb_query_plan {
230
230
NDB_PLAN_AUTHOR_KINDS ,
231
231
NDB_PLAN_CREATED ,
232
232
NDB_PLAN_TAGS ,
233
+ NDB_PLAN_SEARCH ,
233
234
};
234
235
235
236
// A id + u64 + timestamp
@@ -334,27 +335,27 @@ static int ndb_make_noted_text_search_key(unsigned char *buf, int bufsize,
334
335
335
336
static int ndb_make_text_search_key_low (unsigned char * buf , int bufsize ,
336
337
int wordlen , const char * word ,
338
+ uint64_t since ,
337
339
int * keysize )
338
340
{
339
- uint64_t timestamp , note_id ;
340
- timestamp = 0 ;
341
+ uint64_t note_id ;
341
342
note_id = 0 ;
342
343
return ndb_make_text_search_key (buf , bufsize , 0 , wordlen , word ,
343
- timestamp , note_id , keysize );
344
+ since , note_id , keysize );
344
345
}
345
346
346
347
static int ndb_make_text_search_key_high (unsigned char * buf , int bufsize ,
347
348
int wordlen , const char * word ,
349
+ uint64_t until ,
348
350
int * keysize )
349
351
{
350
- uint64_t timestamp , note_id ;
351
- timestamp = INT32_MAX ;
352
+ uint64_t note_id ;
352
353
note_id = INT32_MAX ;
353
354
return ndb_make_text_search_key (buf , bufsize , 0 , wordlen , word ,
354
- timestamp , note_id , keysize );
355
+ until , note_id , keysize );
355
356
}
356
357
357
- typedef int (* ndb_text_search_key_order_fn )(unsigned char * buf , int bufsize , int wordlen , const char * word , int * keysize );
358
+ typedef int (* ndb_text_search_key_order_fn )(unsigned char * buf , int bufsize , int wordlen , const char * word , uint64_t timestamp , int * keysize );
358
359
359
360
/** From LMDB: Compare two items lexically */
360
361
static int mdb_cmp_memn (const MDB_val * a , const MDB_val * b ) {
@@ -3044,6 +3045,43 @@ static int query_is_full(struct ndb_query_results *results, int limit)
3044
3045
return cursor_count (& results -> cur , sizeof (struct ndb_query_result )) >= limit ;
3045
3046
}
3046
3047
3048
+ static int ndb_query_plan_execute_search (struct ndb_txn * txn ,
3049
+ struct ndb_filter * filter ,
3050
+ struct ndb_query_results * results ,
3051
+ int limit )
3052
+ {
3053
+ const char * search ;
3054
+ int i ;
3055
+ struct ndb_text_search_results text_results ;
3056
+ struct ndb_text_search_result * text_result ;
3057
+ struct ndb_text_search_config config ;
3058
+ struct ndb_query_result result ;
3059
+
3060
+ ndb_default_text_search_config (& config );
3061
+
3062
+ if (!(search = ndb_filter_find_search (filter )))
3063
+ return 0 ;
3064
+
3065
+ if (!ndb_text_search_with (txn , search , & text_results , & config , filter ))
3066
+ return 0 ;
3067
+
3068
+ for (i = 0 ; i < text_results .num_results ; i ++ ) {
3069
+ if (query_is_full (results , limit ))
3070
+ break ;
3071
+
3072
+ text_result = & text_results .results [i ];
3073
+
3074
+ result .note = text_result -> note ;
3075
+ result .note_size = text_result -> note_size ;
3076
+ result .note_id = text_result -> key .note_id ;
3077
+
3078
+ if (!push_query_result (results , & result ))
3079
+ break ;
3080
+ }
3081
+
3082
+ return 1 ;
3083
+ }
3084
+
3047
3085
static int ndb_query_plan_execute_ids (struct ndb_txn * txn ,
3048
3086
struct ndb_filter * filter ,
3049
3087
struct ndb_query_results * results ,
@@ -3456,15 +3494,18 @@ static int ndb_query_plan_execute_kinds(struct ndb_txn *txn,
3456
3494
3457
3495
static enum ndb_query_plan ndb_filter_plan (struct ndb_filter * filter )
3458
3496
{
3459
- struct ndb_filter_elements * ids , * kinds , * authors , * tags ;
3497
+ struct ndb_filter_elements * ids , * kinds , * authors , * tags , * search ;
3460
3498
3461
3499
ids = ndb_filter_find_elements (filter , NDB_FILTER_IDS );
3500
+ search = ndb_filter_find_elements (filter , NDB_FILTER_SEARCH );
3462
3501
kinds = ndb_filter_find_elements (filter , NDB_FILTER_KINDS );
3463
3502
authors = ndb_filter_find_elements (filter , NDB_FILTER_AUTHORS );
3464
3503
tags = ndb_filter_find_elements (filter , NDB_FILTER_TAGS );
3465
3504
3466
3505
// this is rougly similar to the heuristic in strfry's dbscan
3467
- if (ids ) {
3506
+ if (search ) {
3507
+ return NDB_PLAN_SEARCH ;
3508
+ } else if (ids ) {
3468
3509
return NDB_PLAN_IDS ;
3469
3510
} else if (kinds && authors && authors -> count <= 10 ) {
3470
3511
return NDB_PLAN_AUTHOR_KINDS ;
@@ -3483,6 +3524,7 @@ static const char *ndb_query_plan_name(int plan_id)
3483
3524
{
3484
3525
switch (plan_id ) {
3485
3526
case NDB_PLAN_IDS : return "ids" ;
3527
+ case NDB_PLAN_SEARCH : return "search" ;
3486
3528
case NDB_PLAN_KINDS : return "kinds" ;
3487
3529
case NDB_PLAN_TAGS : return "tags" ;
3488
3530
case NDB_PLAN_CREATED : return "created" ;
@@ -3518,6 +3560,11 @@ static int ndb_query_filter(struct ndb_txn *txn, struct ndb_filter *filter,
3518
3560
return 0 ;
3519
3561
break ;
3520
3562
3563
+ case NDB_PLAN_SEARCH :
3564
+ if (!ndb_query_plan_execute_search (txn , filter , & results , limit ))
3565
+ return 0 ;
3566
+ break ;
3567
+
3521
3568
// We have just kinds, just scan the kind index
3522
3569
case NDB_PLAN_KINDS :
3523
3570
if (!ndb_query_plan_execute_kinds (txn , filter , & results , limit ))
@@ -4031,24 +4078,44 @@ int ndb_text_search_with(struct ndb_txn *txn, const char *query,
4031
4078
struct ndb_word * search_word ;
4032
4079
struct ndb_note * note ;
4033
4080
struct cursor cur ;
4081
+ uint64_t since , until , timestamp_op , * pint , note_size ;
4034
4082
ndb_text_search_key_order_fn key_order_fn ;
4035
4083
MDB_dbi text_db ;
4036
4084
MDB_cursor * cursor ;
4037
4085
MDB_val k , v ;
4038
4086
int i , j , keysize , saved_size , limit ;
4039
4087
MDB_cursor_op op , order_op ;
4040
4088
4089
+ note_size = 0 ;
4090
+ note = 0 ;
4041
4091
saved = NULL ;
4042
4092
ndb_text_search_results_init (results );
4043
4093
ndb_search_words_init (& search_words );
4044
4094
4045
- // search config
4095
+ until = UINT64_MAX ;
4096
+ since = 0 ;
4046
4097
limit = MAX_TEXT_SEARCH_RESULTS ;
4098
+
4099
+ // until, since from filter
4100
+ if (filter != NULL ) {
4101
+ if ((pint = ndb_filter_get_int (filter , NDB_FILTER_UNTIL )))
4102
+ until = * pint ;
4103
+
4104
+ if ((pint = ndb_filter_get_int (filter , NDB_FILTER_SINCE )))
4105
+ since = * pint ;
4106
+
4107
+ if ((pint = ndb_filter_get_int (filter , NDB_FILTER_LIMIT )))
4108
+ limit = * pint ;
4109
+ }
4110
+
4047
4111
order_op = MDB_PREV ;
4048
4112
key_order_fn = ndb_make_text_search_key_high ;
4113
+ timestamp_op = until ;
4049
4114
if (config ) {
4050
4115
if (config -> order == NDB_ORDER_ASCENDING ) {
4051
4116
order_op = MDB_NEXT ;
4117
+ // set the min timestamp value to since when ascending
4118
+ timestamp_op = since ;
4052
4119
key_order_fn = ndb_make_text_search_key_low ;
4053
4120
}
4054
4121
limit = min (limit , config -> limit );
@@ -4067,9 +4134,11 @@ int ndb_text_search_with(struct ndb_txn *txn, const char *query,
4067
4134
return 0 ;
4068
4135
}
4069
4136
4070
- // TODO: sort words from largest to smallest. This should complete the
4071
- // query quicker because the larger words are likely to have fewer
4072
- // entries in the search index.
4137
+ // This should complete the query quicker because the larger words are
4138
+ // likely to have fewer entries in the search index. This is not always
4139
+ // true. Words with higher frequency (like bitcoin on nostr in 2024)
4140
+ // may be slower. TODO: Skip word recursion by leveraging a minimal
4141
+ // perfect hashmap of parsed words on a note
4073
4142
sort_largest_to_smallest (& search_words );
4074
4143
4075
4144
// for each word, we recursively find all of the submatches
@@ -4099,7 +4168,9 @@ int ndb_text_search_with(struct ndb_txn *txn, const char *query,
4099
4168
// match
4100
4169
if (!key_order_fn (buffer , sizeof (buffer ),
4101
4170
search_words .words [0 ].word_len ,
4102
- search_words .words [0 ].word , & keysize ))
4171
+ search_words .words [0 ].word ,
4172
+ timestamp_op ,
4173
+ & keysize ))
4103
4174
{
4104
4175
// word is too big to fit in 1024-sized key
4105
4176
continue ;
@@ -4172,10 +4243,12 @@ int ndb_text_search_with(struct ndb_txn *txn, const char *query,
4172
4243
4173
4244
// save the first key match, since we will continue from
4174
4245
// this on the next root word result
4175
- if (j == 0 && !saved ) {
4176
- memcpy (saved_buf , k .mv_data , k .mv_size );
4177
- saved = saved_buf ;
4178
- saved_size = k .mv_size ;
4246
+ if (j == 0 ) {
4247
+ if (!saved ) {
4248
+ memcpy (saved_buf , k .mv_data , k .mv_size );
4249
+ saved = saved_buf ;
4250
+ saved_size = k .mv_size ;
4251
+ }
4179
4252
4180
4253
// since we will be trying to match the same
4181
4254
// note_id on all subsequent word matches,
@@ -4185,15 +4258,20 @@ int ndb_text_search_with(struct ndb_txn *txn, const char *query,
4185
4258
// remaining word queries
4186
4259
if (filter ) {
4187
4260
if ((note = ndb_get_note_by_key (txn ,
4188
- result -> key .note_id , NULL )))
4261
+ result -> key .note_id ,
4262
+ & note_size )))
4189
4263
{
4190
4264
if (!ndb_filter_matches (filter , note )) {
4191
4265
break ;
4192
4266
}
4267
+ result -> note = note ;
4268
+ result -> note_size = note_size ;
4193
4269
}
4194
4270
}
4195
4271
}
4196
4272
4273
+ result -> note = note ;
4274
+ result -> note_size = note_size ;
4197
4275
last_candidate = * result ;
4198
4276
last_result = & last_candidate ;
4199
4277
}
0 commit comments