@@ -22,6 +22,7 @@ typedef struct JSON_Generator_StateStruct {
22
22
bool ascii_only ;
23
23
bool script_safe ;
24
24
bool strict ;
25
+ bool escape_html_entities ;
25
26
} JSON_Generator_State ;
26
27
27
28
#ifndef RB_UNLIKELY
@@ -32,7 +33,7 @@ static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNesting
32
33
33
34
static ID i_to_s , i_to_json , i_new , i_pack , i_unpack , i_create_id , i_extend , i_encode ;
34
35
static VALUE sym_indent , sym_space , sym_space_before , sym_object_nl , sym_array_nl , sym_max_nesting , sym_allow_nan ,
35
- sym_ascii_only , sym_depth , sym_buffer_initial_length , sym_script_safe , sym_escape_slash , sym_strict , sym_as_json ;
36
+ sym_ascii_only , sym_depth , sym_buffer_initial_length , sym_script_safe , sym_escape_slash , sym_strict , sym_as_json , sym_escape_html_entities ;
36
37
37
38
38
39
#define GET_STATE_TO (self , state ) \
@@ -251,11 +252,11 @@ static const unsigned char script_safe_escape_table[256] = {
251
252
4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
252
253
};
253
254
254
- static inline unsigned char search_script_safe_escape (search_state * search )
255
+ static inline unsigned char search_with_escape_table (search_state * search , unsigned char * table )
255
256
{
256
257
while (search -> ptr < search -> end ) {
257
258
unsigned char ch = (unsigned char )* search -> ptr ;
258
- unsigned char ch_len = script_safe_escape_table [ch ];
259
+ unsigned char ch_len = table [ch ];
259
260
260
261
if (RB_UNLIKELY (ch_len )) {
261
262
if (ch_len & ESCAPE_MASK ) {
@@ -279,14 +280,39 @@ static inline unsigned char search_script_safe_escape(search_state *search)
279
280
return 0 ;
280
281
}
281
282
282
- static void convert_UTF8_to_script_safe_JSON (search_state * search )
283
+ static inline void convert_UTF8_to_JSON_with_table (search_state * search , const unsigned char table [ 256 ] )
283
284
{
284
285
unsigned char ch_len ;
285
- while ((ch_len = search_script_safe_escape (search ))) {
286
+ while ((ch_len = search_with_escape_table (search , ( unsigned char * ) table ))) {
286
287
escape_UTF8_char (search , ch_len );
287
288
}
288
289
}
289
290
291
+ static const unsigned char escape_html_entities_escape_table [256 ] = {
292
+ // ASCII Control Characters
293
+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
294
+ 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
295
+ // ASCII Characters
296
+ 0 , 0 , 9 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , // '"', '&', and '/'
297
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 9 , 0 , // < and >
298
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
299
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
300
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
301
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
302
+ // Continuation byte
303
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
304
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
305
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
306
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
307
+ // First byte of a 2-byte code point
308
+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
309
+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
310
+ // First byte of a 3-byte code point
311
+ 3 , 3 ,11 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , // 0xE2 is the start of \u2028 and \u2029
312
+ //First byte of a 4+ byte code point
313
+ 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
314
+ };
315
+
290
316
static const unsigned char ascii_only_escape_table [256 ] = {
291
317
// ASCII Control Characters
292
318
9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 ,
@@ -977,9 +1003,11 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
977
1003
case ENC_CODERANGE_7BIT :
978
1004
case ENC_CODERANGE_VALID :
979
1005
if (RB_UNLIKELY (state -> ascii_only )) {
980
- convert_UTF8_to_ASCII_only_JSON (& search , state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
1006
+ convert_UTF8_to_ASCII_only_JSON (& search , state -> escape_html_entities ? escape_html_entities_escape_table : (state -> script_safe ? script_safe_escape_table : ascii_only_escape_table ));
1007
+ } else if (RB_UNLIKELY (state -> escape_html_entities )) {
1008
+ convert_UTF8_to_JSON_with_table (& search , escape_html_entities_escape_table );
981
1009
} else if (RB_UNLIKELY (state -> script_safe )) {
982
- convert_UTF8_to_script_safe_JSON (& search );
1010
+ convert_UTF8_to_JSON_with_table (& search , script_safe_escape_table );
983
1011
} else {
984
1012
convert_UTF8_to_JSON (& search );
985
1013
}
@@ -1609,6 +1637,19 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
1609
1637
return Qnil ;
1610
1638
}
1611
1639
1640
+ static VALUE cState_escape_html_entities (VALUE self )
1641
+ {
1642
+ GET_STATE (self );
1643
+ return state -> escape_html_entities ? Qtrue : Qfalse ;
1644
+ }
1645
+
1646
+ static VALUE cState_escape_html_entities_set (VALUE self , VALUE val )
1647
+ {
1648
+ GET_STATE (self );
1649
+ state -> escape_html_entities = RTEST (val );
1650
+ return val ;
1651
+ }
1652
+
1612
1653
static int configure_state_i (VALUE key , VALUE val , VALUE _arg )
1613
1654
{
1614
1655
JSON_Generator_State * state = (JSON_Generator_State * )_arg ;
@@ -1627,6 +1668,8 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1627
1668
else if (key == sym_escape_slash ) { state -> script_safe = RTEST (val ); }
1628
1669
else if (key == sym_strict ) { state -> strict = RTEST (val ); }
1629
1670
else if (key == sym_as_json ) { state -> as_json = RTEST (val ) ? rb_convert_type (val , T_DATA , "Proc" , "to_proc" ) : Qfalse ; }
1671
+ else if (key == sym_escape_html_entities ) { state -> escape_html_entities = RTEST (val ); }
1672
+
1630
1673
return ST_CONTINUE ;
1631
1674
}
1632
1675
@@ -1740,6 +1783,9 @@ void Init_generator(void)
1740
1783
rb_define_method (cState , "depth=" , cState_depth_set , 1 );
1741
1784
rb_define_method (cState , "buffer_initial_length" , cState_buffer_initial_length , 0 );
1742
1785
rb_define_method (cState , "buffer_initial_length=" , cState_buffer_initial_length_set , 1 );
1786
+ rb_define_method (cState , "escape_html_entities" , cState_escape_html_entities , 0 );
1787
+ rb_define_method (cState , "escape_html_entities?" , cState_escape_html_entities , 0 );
1788
+ rb_define_method (cState , "escape_html_entities=" , cState_escape_html_entities_set , 1 );
1743
1789
rb_define_method (cState , "generate" , cState_generate , -1 );
1744
1790
rb_define_alias (cState , "generate_new" , "generate" ); // :nodoc:
1745
1791
@@ -1813,6 +1859,7 @@ void Init_generator(void)
1813
1859
sym_escape_slash = ID2SYM (rb_intern ("escape_slash" ));
1814
1860
sym_strict = ID2SYM (rb_intern ("strict" ));
1815
1861
sym_as_json = ID2SYM (rb_intern ("as_json" ));
1862
+ sym_escape_html_entities = ID2SYM (rb_intern ("escape_html_entities" ));
1816
1863
1817
1864
usascii_encindex = rb_usascii_encindex ();
1818
1865
utf8_encindex = rb_utf8_encindex ();
0 commit comments