Skip to content

Commit f76b6ad

Browse files
fix(panic): fix panic when censoring utf-8 (#64)
* fix(panic): fix panic when censoring utf-8 Signed-off-by: stephenduke-care <[email protected]> * fix(panic): fix panic when censoring utf-8 Signed-off-by: stephenduke-care <[email protected]> --------- Signed-off-by: stephenduke-care <[email protected]>
1 parent 8607c2b commit f76b6ad

File tree

2 files changed

+54
-33
lines changed

2 files changed

+54
-33
lines changed

goaway.go

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -143,63 +143,63 @@ func (g *ProfanityDetector) indexToRune(s string, index int) int {
143143
count := 0
144144
for i := range s {
145145
if i == index {
146-
return count
146+
break
147+
}
148+
if i < index {
149+
count++
147150
}
148-
count++
149151
}
150-
return -1
152+
return count
151153
}
152154

153-
// Censor takes in a string (word or sentence) and tries to censor all profanities found.
154155
func (g *ProfanityDetector) Censor(s string) string {
155156
censored := []rune(s)
156157
var originalIndexes []int
157158
s, originalIndexes = g.sanitize(s, true)
158-
// Check for false negatives
159-
for _, word := range g.falseNegatives {
159+
runeWordLength := 0
160+
161+
g.checkProfanity(&s, &originalIndexes, &censored, g.falseNegatives, &runeWordLength)
162+
g.removeFalsePositives(&s, &originalIndexes, &runeWordLength)
163+
g.checkProfanity(&s, &originalIndexes, &censored, g.profanities, &runeWordLength)
164+
165+
return string(censored)
166+
}
167+
168+
func (g *ProfanityDetector) checkProfanity(s *string, originalIndexes *[]int, censored *[]rune, wordList []string, runeWordLength *int) {
169+
for _, word := range wordList {
160170
currentIndex := 0
171+
*runeWordLength = len([]rune(word))
161172
for currentIndex != -1 {
162-
if foundIndex := strings.Index(s[currentIndex:], word); foundIndex != -1 {
163-
for i := 0; i < len([]rune(word)); i++ {
164-
runeIndex := g.indexToRune(string(censored), currentIndex+foundIndex+i)
165-
censored[originalIndexes[runeIndex]] = '*'
173+
if foundIndex := strings.Index((*s)[currentIndex:], word); foundIndex != -1 {
174+
for i := 0; i < *runeWordLength; i++ {
175+
runeIndex := g.indexToRune(*s, currentIndex+foundIndex) + i
176+
if runeIndex < len(*originalIndexes) {
177+
(*censored)[(*originalIndexes)[runeIndex]] = '*'
178+
}
166179
}
167-
currentIndex += foundIndex + len([]rune(word))
180+
currentIndex += foundIndex + len([]byte(word))
168181
} else {
169182
break
170183
}
171184
}
172185
}
173-
// Remove false positives
186+
}
187+
188+
func (g *ProfanityDetector) removeFalsePositives(s *string, originalIndexes *[]int, runeWordLength *int) {
174189
for _, word := range g.falsePositives {
175190
currentIndex := 0
191+
*runeWordLength = len([]rune(word))
176192
for currentIndex != -1 {
177-
if foundIndex := strings.Index(s[currentIndex:], word); foundIndex != -1 {
178-
foundRuneIndex := g.indexToRune(s, foundIndex)
179-
originalIndexes = append(originalIndexes[:foundRuneIndex], originalIndexes[foundRuneIndex+len(word):]...)
180-
currentIndex += foundIndex + len([]rune(word))
181-
} else {
182-
break
183-
}
184-
}
185-
s = strings.Replace(s, word, "", -1)
186-
}
187-
// Check for profanities
188-
for _, word := range g.profanities {
189-
currentIndex := 0
190-
for currentIndex != -1 {
191-
if foundIndex := strings.Index(s[currentIndex:], word); foundIndex != -1 {
192-
for i := 0; i < len([]rune(word)); i++ {
193-
runeIndex := g.indexToRune(string(censored), currentIndex+foundIndex+i)
194-
censored[originalIndexes[runeIndex]] = '*'
195-
}
196-
currentIndex += foundIndex + len([]rune(word))
193+
if foundIndex := strings.Index((*s)[currentIndex:], word); foundIndex != -1 {
194+
foundRuneIndex := g.indexToRune(*s, foundIndex)
195+
*originalIndexes = append((*originalIndexes)[:foundRuneIndex], (*originalIndexes)[foundRuneIndex+*runeWordLength:]...)
196+
currentIndex += foundIndex + len([]byte(word))
197197
} else {
198198
break
199199
}
200200
}
201+
*s = strings.Replace(*s, word, "", -1)
201202
}
202-
return string(censored)
203203
}
204204

205205
func (g ProfanityDetector) sanitize(s string, rememberOriginalIndexes bool) (string, []int) {

goaway_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,3 +655,24 @@ func TestSanitizeWithoutSanitizingLeetSpeak(t *testing.T) {
655655
t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString)
656656
}
657657
}
658+
659+
func TestDefaultDriver_UTF8(t *testing.T) {
660+
detector := NewProfanityDetector().WithCustomDictionary(
661+
[]string{"anal", "あほ"}, // profanities
662+
[]string{"あほほ"}, // falsePositives
663+
[]string{"あほほし"}, // falseNegatives
664+
)
665+
666+
unsanitizedString := "いい加減にしろ あほほし あほほ あほ anal ほ"
667+
expectedString := "いい加減にしろ **** あほほ ** **** ほ"
668+
669+
isProfane := detector.IsProfane(unsanitizedString)
670+
if !isProfane {
671+
t.Error("Expected false, got false from sentence", unsanitizedString)
672+
}
673+
674+
sanitizedString := detector.Censor(unsanitizedString)
675+
if sanitizedString != expectedString {
676+
t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString)
677+
}
678+
}

0 commit comments

Comments
 (0)