Skip to content

Commit f16e026

Browse files
committed
Fix Unicode case-insensitive search for keyword queries
Use custom lower_unicode() function for proper Unicode case folding instead of SQLite's LIKE operator which only supports ASCII.
1 parent 869cbd4 commit f16e026

File tree

6 files changed

+129
-7
lines changed

6 files changed

+129
-7
lines changed

pkg/sqlite/driver.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ func (d *CustomSQLiteDriver) Open(dsn string) (driver.Conn, error) {
3030
"durationToTinyInt": durationToTinyIntFn,
3131
"basename": basenameFn,
3232
"phash_distance": phashDistanceFn,
33+
"lower_unicode": lowerUnicodeFn,
3334
}
3435

3536
for name, fn := range funcs {

pkg/sqlite/functions.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package sqlite
22

33
import (
4+
"fmt"
45
"path/filepath"
56
"strconv"
67
"strings"
@@ -35,3 +36,27 @@ func durationToTinyIntFn(str string) (int64, error) {
3536
func basenameFn(str string) (string, error) {
3637
return filepath.Base(str), nil
3738
}
39+
40+
// custom SQLite function to enable case-insensitive searches
41+
// that properly handle unicode characters
42+
func lowerUnicodeFn(str interface{}) (string, error) {
43+
// handle NULL values
44+
if str == nil {
45+
return "", nil
46+
}
47+
48+
// handle different types
49+
switch v := str.(type) {
50+
case string:
51+
return strings.ToLower(v), nil
52+
case int64:
53+
// convert int64 to string (for phash fingerprints)
54+
return strings.ToLower(strconv.FormatInt(v, 10)), nil
55+
case []byte:
56+
// handle BLOB type if needed
57+
return strings.ToLower(string(v)), nil
58+
default:
59+
// for any other type, try converting to string
60+
return strings.ToLower(fmt.Sprintf("%v", v)), nil
61+
}
62+
}

pkg/sqlite/performer_test.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2432,6 +2432,86 @@ func TestPerformerStore_FindByStashIDStatus(t *testing.T) {
24322432
}
24332433
}
24342434

2435+
func TestPerformerQueryUnicodeSearchCaseInsensitive(t *testing.T) {
2436+
withTxn(func(ctx context.Context) error {
2437+
qb := db.Performer
2438+
2439+
// test cases with various Unicode characters
2440+
testCases := []struct {
2441+
name string
2442+
performerName string
2443+
searchTerm string
2444+
}{
2445+
{
2446+
"Cyrillic lowercase search",
2447+
"Анна",
2448+
"анна",
2449+
},
2450+
{
2451+
"Cyrillic uppercase search",
2452+
"мария",
2453+
"МАРИЯ",
2454+
},
2455+
{
2456+
"Accented Latin lowercase",
2457+
"Zoë",
2458+
"zoë",
2459+
},
2460+
{
2461+
"Accented Latin uppercase",
2462+
"chloé",
2463+
"CHLOÉ",
2464+
},
2465+
{
2466+
"Greek lowercase search",
2467+
"Έλενα",
2468+
"έλενα",
2469+
},
2470+
}
2471+
2472+
for _, tc := range testCases {
2473+
t.Run(tc.name, func(t *testing.T) {
2474+
// create performer with unicode name
2475+
performer := models.Performer{
2476+
Name: tc.performerName,
2477+
}
2478+
err := qb.Create(ctx, &models.CreatePerformerInput{Performer: &performer})
2479+
if err != nil {
2480+
t.Fatalf("Error creating performer: %s", err.Error())
2481+
}
2482+
2483+
// search using different case
2484+
findFilter := &models.FindFilterType{
2485+
Q: &tc.searchTerm,
2486+
}
2487+
2488+
performers, _, err := qb.Query(ctx, nil, findFilter)
2489+
if err != nil {
2490+
t.Fatalf("Error querying performers: %s", err.Error())
2491+
}
2492+
2493+
// should find the performer regardless of case
2494+
found := false
2495+
for _, p := range performers {
2496+
if p.ID == performer.ID {
2497+
found = true
2498+
break
2499+
}
2500+
}
2501+
2502+
assert.True(t, found)
2503+
2504+
// clean up
2505+
if err := qb.Destroy(ctx, performer.ID); err != nil {
2506+
t.Fatalf("Error cleaning up performer: %s", err.Error())
2507+
}
2508+
})
2509+
}
2510+
2511+
return nil
2512+
})
2513+
}
2514+
24352515
// TODO Update
24362516
// TODO Destroy
24372517
// TODO Find

pkg/sqlite/query.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,21 +181,31 @@ func (qb *queryBuilder) addFilter(f *filterBuilder) error {
181181
func (qb *queryBuilder) parseQueryString(columns []string, q string) {
182182
specs := models.ParseSearchString(q)
183183

184+
// helper to wrap column with coalesce if it doesn't already have it
185+
wrapColumn := func(column string) string {
186+
// if column already has COALESCE or CAST, don't wrap again
187+
if strings.HasPrefix(strings.ToUpper(strings.TrimSpace(column)), "COALESCE") ||
188+
strings.HasPrefix(strings.ToUpper(strings.TrimSpace(column)), "CAST") {
189+
return column
190+
}
191+
return coalesce(column)
192+
}
193+
184194
for _, t := range specs.MustHave {
185195
var clauses []string
186196

187197
for _, column := range columns {
188-
clauses = append(clauses, column+" LIKE ?")
189-
qb.addArg(like(t))
198+
clauses = append(clauses, "lower_unicode("+wrapColumn(column)+") LIKE ?")
199+
qb.addArg(likeLower(t))
190200
}
191201

192202
qb.addWhere("(" + strings.Join(clauses, " OR ") + ")")
193203
}
194204

195205
for _, t := range specs.MustNot {
196206
for _, column := range columns {
197-
qb.addWhere(coalesce(column) + " NOT LIKE ?")
198-
qb.addArg(like(t))
207+
qb.addWhere("lower_unicode(" + wrapColumn(column) + ") NOT LIKE ?")
208+
qb.addArg(likeLower(t))
199209
}
200210
}
201211

@@ -204,8 +214,8 @@ func (qb *queryBuilder) parseQueryString(columns []string, q string) {
204214

205215
for _, column := range columns {
206216
for _, v := range set {
207-
clauses = append(clauses, column+" LIKE ?")
208-
qb.addArg(like(v))
217+
clauses = append(clauses, "lower_unicode("+wrapColumn(column)+") LIKE ?")
218+
qb.addArg(likeLower(v))
209219
}
210220
}
211221

pkg/sqlite/scene.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,7 @@ func (qb *SceneStore) makeQuery(ctx context.Context, sceneFilter *models.SceneFi
958958
},
959959
)
960960

961-
filepathColumn := "folders.path || '" + string(filepath.Separator) + "' || files.basename"
961+
filepathColumn := "COALESCE(folders.path, '') || '" + string(filepath.Separator) + "' || COALESCE(files.basename, '')"
962962
searchColumns := []string{"scenes.title", "scenes.details", filepathColumn, "files_fingerprints.fingerprint", "scene_markers.title"}
963963
query.parseQueryString(searchColumns, *q)
964964
}

pkg/sqlite/sql.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,9 @@ func coalesce(column string) string {
362362
func like(v string) string {
363363
return "%" + v + "%"
364364
}
365+
366+
// wraps a string with wildcard characters and converts it to lowercase
367+
// for use in case-insensitive LIKE queries with the lower_unicode() SQL function.
368+
func likeLower(v string) string {
369+
return "%" + strings.ToLower(v) + "%"
370+
}

0 commit comments

Comments
 (0)