Skip to content

Commit 2a984b1

Browse files
committed
refactored the take_partial function and use String module functions to handle complex unicode characters/emojis
1 parent 361f409 commit 2a984b1

File tree

4 files changed

+79
-111
lines changed

4 files changed

+79
-111
lines changed

lib/delta.ex

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ defmodule Delta do
234234
235235
iex> delta = [Op.insert("01🙋45")]
236236
iex> Delta.slice(delta, 1, 2)
237-
** (RuntimeError) Encoding failed in take_partial {"1🙋45", %{"insert" => "1🙋45"}, 2, {:incomplete, "1", <<216, 61>>}, {:error, "", <<222, 75, 0, 52, 0, 53>>}}
237+
[%{"insert" => "1🙋"}]
238238
"""
239239
@spec slice(t, non_neg_integer, non_neg_integer) :: t
240240
def slice(delta, index, len) do
@@ -254,43 +254,38 @@ defmodule Delta do
254254
255255
iex> delta = [Op.insert("01🙋45")]
256256
iex> Delta.slice_max(delta, 1, 2)
257-
[%{"insert" => "1"}]
257+
[%{"insert" => "1🙋"}]
258258
"""
259259
@doc since: "0.2.0"
260260
@spec slice_max(t, non_neg_integer, non_neg_integer) :: t
261261
def slice_max(delta, index, len) do
262-
{_left, right} = split(delta, index, align: true)
263-
{middle, _rest} = split(right, len, align: true)
262+
{_left, right} = split(delta, index)
263+
{middle, _rest} = split(right, len)
264264
middle
265265
end
266266

267267
@doc ~S"""
268268
Splits delta at the given index.
269269
270-
## Options
271-
272-
* `:align` - when `true`, allow moving index left if
273-
we're likely to split a grapheme otherwise.
274-
275270
## Examples
276271
iex> delta = [Op.insert("Hello World")]
277272
iex> Delta.split(delta, 5)
278273
{[%{"insert" => "Hello"}], [%{"insert" => " World"}]}
279274
280275
iex> delta = [Op.insert("01🙋45")]
281276
iex> Delta.split(delta, 3, align: true)
282-
{[%{"insert" => "01"}], [%{"insert" => "🙋45"}]}
277+
{[%{"insert" => "01🙋"}], [%{"insert" => "45"}]}
283278
284279
iex> delta = [Op.insert("a"), Op.insert("b", %{"bold" => true})]
285280
iex> Delta.split(delta, 3)
286281
{[%{"insert" => "a"}, %{"insert" => "b", "attributes" => %{"bold" => true}}], []}
287282
"""
288-
@spec split(t, non_neg_integer | fun, Keyword.t()) :: {t, t}
289-
def split(delta, index, opts \\ [])
283+
@spec split(t, non_neg_integer | fun) :: {t, t}
284+
def split(delta, index)
290285

291-
def split(delta, 0, _), do: {[], delta}
286+
def split(delta, 0), do: {[], delta}
292287

293-
def split(delta, index, opts) when is_integer(index) do
288+
def split(delta, index) when is_integer(index) do
294289
do_split(
295290
[],
296291
delta,
@@ -303,36 +298,35 @@ defmodule Delta do
303298
{:cont, index - op_size}
304299
end
305300
end,
306-
index,
307-
opts
301+
index
308302
)
309303
end
310304

311-
def split(delta, func, opts) when is_function(func) do
312-
do_split([], delta, func, nil, opts)
305+
def split(delta, func) when is_function(func) do
306+
do_split([], delta, func, nil)
313307
end
314308

315-
defp do_split(passed, [], _, _, _), do: {Enum.reverse(passed), []}
309+
defp do_split(passed, [], _, _), do: {Enum.reverse(passed), []}
316310

317-
defp do_split(passed, remaining, func, context, opts) when is_function(func, 1) do
318-
do_split(passed, remaining, fn op, _ -> func.(op) end, context, opts)
311+
defp do_split(passed, remaining, func, context) when is_function(func, 1) do
312+
do_split(passed, remaining, fn op, _ -> func.(op) end, context)
319313
end
320314

321-
defp do_split(passed, remaining, func, context, opts) when is_function(func, 2) do
315+
defp do_split(passed, remaining, func, context) when is_function(func, 2) do
322316
[first | remaining] = remaining
323317

324318
case func.(first, context) do
325319
:cont ->
326-
do_split([first | passed], remaining, func, context, opts)
320+
do_split([first | passed], remaining, func, context)
327321

328322
{:cont, context} ->
329-
do_split([first | passed], remaining, func, context, opts)
323+
do_split([first | passed], remaining, func, context)
330324

331325
0 ->
332326
{Enum.reverse(passed), [first | remaining]}
333327

334328
index ->
335-
case Op.take(first, index, opts) do
329+
case Op.take(first, index) do
336330
{left, false} ->
337331
{Enum.reverse([left | passed]), remaining}
338332

lib/delta/op.ex

Lines changed: 18 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
defmodule Delta.Op do
22
alias Delta.Attr
3-
alias Delta.Utils
43
alias Delta.EmbedHandler
54

65
@type t :: insert_op | retain_op | delete_op
@@ -171,14 +170,11 @@ defmodule Delta.Op do
171170
5
172171
173172
iex> Op.text_size("🏴󠁧󠁢󠁳󠁣󠁴󠁿")
174-
14
173+
1
175174
"""
176175
@spec text_size(text :: binary) :: non_neg_integer
177176
def text_size(text) do
178-
text
179-
|> :unicode.characters_to_binary(:utf8, :utf16)
180-
|> byte_size()
181-
|> div(2)
177+
String.length(text)
182178
end
183179

184180
@doc ~S"""
@@ -210,22 +206,20 @@ defmodule Delta.Op do
210206
iex> Op.insert("Hello") |> Op.take(3)
211207
{%{"insert" => "Hel"}, %{"insert" => "lo"}}
212208
213-
iex> assert_raise RuntimeError, fn -> Op.insert("🏴󠁧󠁢󠁳󠁣󠁴󠁿") |> Op.take(1) end
214-
215-
iex> Op.insert("🏴󠁧󠁢󠁳󠁣󠁴󠁿") |> Op.take(1, align: true)
216-
{%{"insert" => ""}, %{"insert" => "🏴󠁧󠁢󠁳󠁣󠁴󠁿"}}
209+
iex> Op.insert("🏴󠁧󠁢󠁳󠁣󠁴󠁿") |> Op.take(1)
210+
{%{"insert" => "🏴󠁧󠁢󠁳󠁣󠁴󠁿"}, false}
217211
"""
218-
@spec take(op :: t, length :: non_neg_integer, opts :: Keyword.t()) :: {t, t | boolean}
219-
def take(op, length, opts \\ [])
212+
@spec take(op :: t, length :: non_neg_integer) :: {t, t | boolean}
213+
def take(op, length)
220214

221-
def take(op = %{"insert" => embed}, _length, _opts) when not is_bitstring(embed) do
215+
def take(op = %{"insert" => embed}, _length) when not is_bitstring(embed) do
222216
{op, false}
223217
end
224218

225-
def take(op, length, opts) do
219+
def take(op, length) do
226220
case size(op) - length do
227221
0 -> {op, false}
228-
_ -> take_partial(op, length, opts)
222+
_ -> take_partial(op, length)
229223
end
230224
end
231225

@@ -350,47 +344,21 @@ defmodule Delta.Op do
350344
{op1, a, op2, b}
351345
end
352346

353-
@spec take_partial(t, non_neg_integer, Keyword.t()) :: {t, t}
354-
defp take_partial(op, 0, _opts), do: {insert("", op["attributes"]), op}
355-
356-
defp take_partial(%{"insert" => text} = op, len, opts) do
357-
binary = :unicode.characters_to_binary(text, :utf8, :utf16)
358-
binary_length = byte_size(binary)
359-
360-
left =
361-
binary
362-
|> Kernel.binary_part(0, len * 2)
363-
|> :unicode.characters_to_binary(:utf16, :utf8)
364-
365-
right =
366-
binary
367-
|> Kernel.binary_part(len * 2, binary_length - len * 2)
368-
|> :unicode.characters_to_binary(:utf16, :utf8)
347+
@spec take_partial(t, non_neg_integer) :: {t, t}
348+
defp take_partial(op, 0), do: {insert("", op["attributes"]), op}
369349

370-
case {is_binary(left), is_binary(right), Keyword.get(opts, :align, false)} do
371-
{true, true, false} ->
372-
{insert(left, op["attributes"]), insert(right, op["attributes"])}
373-
374-
{true, true, true} ->
375-
if Utils.slices_likely_cut_emoji?(left, right) do
376-
take_partial(op, len - 1, opts)
377-
else
378-
{insert(left, op["attributes"]), insert(right, op["attributes"])}
379-
end
380-
381-
{_, _, true} ->
382-
take_partial(op, len - 1, opts)
383-
384-
_ ->
385-
raise "Encoding failed in take_partial #{inspect({text, op, len, left, right})}"
386-
end
350+
defp take_partial(%{"insert" => text} = op, len) do
351+
length = String.length(text)
352+
left = String.slice(text, 0, len)
353+
right = String.slice(text, len, length - len)
354+
{insert(left, op["attributes"]), insert(right, op["attributes"])}
387355
end
388356

389-
defp take_partial(%{"delete" => full} = op, length, _opts) do
357+
defp take_partial(%{"delete" => full} = op, length) do
390358
{delete(length, op["attributes"]), delete(full - length, op["attributes"])}
391359
end
392360

393-
defp take_partial(%{"retain" => full} = op, length, _opts) do
361+
defp take_partial(%{"retain" => full} = op, length) do
394362
{retain(length, op["attributes"]), retain(full - length, op["attributes"])}
395363
end
396364

test/delta/delta/compose_test.exs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@ defmodule Tests.Delta.Compose do
22
use Delta.Support.Case, async: false
33
doctest Delta, only: [compose: 2, compose_all: 1]
44

5+
describe ".compose/2 (utf-16/32)" do
6+
test "take_partial with two 16-bit code units" do
7+
a = [Op.insert("Take the 💊💊")]
8+
b = [Op.retain(10)]
9+
10+
assert [%{"insert" => "Take the 💊💊"}] == Delta.compose(a, b)
11+
end
12+
end
13+
514
describe ".compose/2 (basic)" do
615
test "insert + insert" do
716
a = [Op.insert("A")]

test/delta/delta_test.exs

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,17 @@ defmodule Tests.Delta do
5656

5757
test "slice normal emoji" do
5858
delta = [%{"insert" => "01🙋45"}]
59-
assert Delta.slice(delta, 1, 4) == [%{"insert" => "1🙋4"}]
59+
assert Delta.slice(delta, 1, 4) == [%{"insert" => "1🙋45"}]
6060
end
6161

6262
test "slice emoji with zero width joiner" do
6363
delta = [%{"insert" => "01🙋‍♂️78"}]
64-
assert Delta.slice(delta, 1, 7) == [%{"insert" => "1🙋‍♂️7"}]
64+
assert Delta.slice(delta, 1, 7) == [%{"insert" => "1🙋‍♂️78"}]
6565
end
6666

6767
test "slice emoji with joiner and modifer" do
6868
delta = [%{"insert" => "01🙋🏽‍♂️90"}]
69-
assert Delta.slice(delta, 1, 9) == [%{"insert" => "1🙋🏽‍♂️9"}]
69+
assert Delta.slice(delta, 1, 9) == [%{"insert" => "1🙋🏽‍♂️90"}]
7070
end
7171

7272
test "slice with 0 index" do
@@ -121,17 +121,17 @@ defmodule Tests.Delta do
121121

122122
test "slice normal emoji" do
123123
delta = [%{"insert" => "01🙋45"}]
124-
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1🙋4"}]
124+
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1🙋45"}]
125125
end
126126

127127
test "slice emoji with zero width joiner" do
128128
delta = [%{"insert" => "01🙋‍♂️78"}]
129-
assert Delta.slice_max(delta, 1, 7) == [%{"insert" => "1🙋‍♂️7"}]
129+
assert Delta.slice_max(delta, 1, 7) == [%{"insert" => "1🙋‍♂️78"}]
130130
end
131131

132132
test "slice emoji with joiner and modifer" do
133133
delta = [%{"insert" => "01🙋🏽‍♂️90"}]
134-
assert Delta.slice_max(delta, 1, 9) == [%{"insert" => "1🙋🏽‍♂️9"}]
134+
assert Delta.slice_max(delta, 1, 9) == [%{"insert" => "1🙋🏽‍♂️90"}]
135135
end
136136

137137
test "slice with 0 index" do
@@ -147,39 +147,36 @@ defmodule Tests.Delta do
147147
test "slice emoji: codepoint + variation selector" do
148148
# "01☹️345"
149149
delta = [%{"insert" => "01\u2639\uFE0F345"}]
150-
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1"}]
151-
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1\u2639\uFE0F"}]
150+
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1☹️"}]
151+
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1☹️3"}]
152152
end
153153

154154
test "slice emoji: codepoint + skin tone modifier" do
155155
# "01🤵🏽345"
156156
delta = [%{"insert" => "01\u{1F935}\u{1F3FD}345"}]
157-
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1"}]
158-
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1"}]
159-
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1"}]
160-
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1\u{1F935}\u{1F3FD}"}]
157+
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1🤵🏽"}]
158+
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1🤵🏽3"}]
159+
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1🤵🏽34"}]
160+
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1🤵🏽345"}]
161161
end
162162

163163
test "slice emoji: codepoint + ZWJ + codepoint" do
164164
# "01👨‍🏭345"
165165
delta = [%{"insert" => "01\u{1F468}\u200D\u{1F3ED}345"}]
166-
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1"}]
167-
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1"}]
168-
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1"}]
169-
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1"}]
170-
assert Delta.slice_max(delta, 1, 6) == [%{"insert" => "1\u{1F468}\u200D\u{1F3ED}"}]
166+
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1👨‍🏭"}]
167+
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1👨‍🏭3"}]
168+
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1👨‍🏭34"}]
169+
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1👨‍🏭345"}]
170+
assert Delta.slice_max(delta, 1, 6) == [%{"insert" => "1👨‍🏭345"}]
171171
end
172172

173173
test "slice emoji: flags" do
174174
# "01🇦🇺345"
175175
delta = [%{"insert" => "01\u{1F1E6}\u{1F1FA}345"}]
176-
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1"}]
177-
# "1🇦"
178-
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1\u{1F1E6}"}]
179-
# "1🇦"
180-
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1\u{1F1E6}"}]
181-
# "1🇦🇺"
182-
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1\u{1F1E6}\u{1F1FA}"}]
176+
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1🇦🇺"}]
177+
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1🇦🇺3"}]
178+
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1🇦🇺34"}]
179+
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1🇦🇺345"}]
183180
end
184181

185182
test "slice emoji: tag sequence" do
@@ -188,25 +185,25 @@ defmodule Tests.Delta do
188185
%{"insert" => "01\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}345"}
189186
]
190187

191-
for len <- 2..14 do
192-
assert Delta.slice_max(delta, 1, len) == [%{"insert" => "1"}]
193-
end
194-
195-
assert Delta.slice_max(delta, 1, 15) == [
196-
%{"insert" => "1\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}"}
197-
]
188+
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1🏴󠁧󠁢󠁳󠁣󠁴󠁿"}]
189+
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1🏴󠁧󠁢󠁳󠁣󠁴󠁿3"}]
190+
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1🏴󠁧󠁢󠁳󠁣󠁴󠁿34"}]
191+
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1🏴󠁧󠁢󠁳󠁣󠁴󠁿345"}]
192+
assert Delta.slice_max(delta, 1, 6) == [%{"insert" => "1🏴󠁧󠁢󠁳󠁣󠁴󠁿345"}]
198193
end
199194

200195
test "slice complex emoji" do
201196
# "01🚵🏻‍♀️345"
202197
delta = [%{"insert" => "01\u{1F6B5}\u{1F3FB}\u{200D}\u{2640}\u{FE0F}345"}]
203-
204-
for len <- 2..7 do
205-
assert Delta.slice_max(delta, 1, len) == [%{"insert" => "1"}]
206-
end
198+
assert Delta.slice_max(delta, 1, 2) == [%{"insert" => "1🚵🏻‍♀️"}]
199+
assert Delta.slice_max(delta, 1, 3) == [%{"insert" => "1🚵🏻‍♀️3"}]
200+
assert Delta.slice_max(delta, 1, 4) == [%{"insert" => "1🚵🏻‍♀️34"}]
201+
assert Delta.slice_max(delta, 1, 5) == [%{"insert" => "1🚵🏻‍♀️345"}]
202+
assert Delta.slice_max(delta, 1, 6) == [%{"insert" => "1🚵🏻‍♀️345"}]
203+
assert Delta.slice_max(delta, 1, 7) == [%{"insert" => "1🚵🏻‍♀️345"}]
207204

208205
assert Delta.slice_max(delta, 1, 8) == [
209-
%{"insert" => "1\u{1F6B5}\u{1F3FB}\u{200D}\u{2640}\u{FE0F}"}
206+
%{"insert" => "1\u{1F6B5}\u{1F3FB}\u{200D}\u{2640}\u{FE0F}345"}
210207
]
211208
end
212209
end

0 commit comments

Comments
 (0)