@@ -18,50 +18,119 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
18
18
i_aref , i_send , i_respond_to_p , i_match , i_keys , i_depth ,
19
19
i_buffer_initial_length , i_dup , i_script_safe , i_escape_slash , i_strict ;
20
20
21
- /* Escapes the UTF16 character and stores the result in the buffer buf. */
22
- static void unicode_escape (char * buf , UTF16 character )
21
+ /* Converts in_string to a JSON string (without the wrapping '"'
22
+ * characters) in FBuffer out_buffer.
23
+ *
24
+ * Character are JSON-escaped according to:
25
+ *
26
+ * - Always: ASCII control characters (0x00-0x1F), dquote, and
27
+ * backslash.
28
+ *
29
+ * - If out_ascii_only: non-ASCII characters (>0x7F)
30
+ *
31
+ * - If out_script_safe: forwardslash, line separator (U+2028), and
32
+ * paragraph separator (U+2029)
33
+ *
34
+ * Everything else (should be UTF-8) is just passed through and
35
+ * appended to the result.
36
+ */
37
+ static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_ascii_only , bool out_script_safe )
23
38
{
24
- const char * digits = "0123456789abcdef" ;
39
+ const char * hexdig = "0123456789abcdef" ;
40
+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
25
41
26
- buf [2 ] = digits [character >> 12 ];
27
- buf [3 ] = digits [(character >> 8 ) & 0xf ];
28
- buf [4 ] = digits [(character >> 4 ) & 0xf ];
29
- buf [5 ] = digits [character & 0xf ];
30
- }
42
+ const char * in_utf8_str = RSTRING_PTR (in_string );
43
+ unsigned long in_utf8_len = RSTRING_LEN (in_string );
44
+ bool in_is_ascii_only = rb_enc_str_asciionly_p (in_string );
31
45
32
- /* Escapes the UTF16 character and stores the result in the buffer buf, then
33
- * the buffer buf is appended to the FBuffer buffer. */
34
- static void unicode_escape_to_buffer (FBuffer * buffer , char buf [6 ], UTF16
35
- character )
36
- {
37
- unicode_escape (buf , character );
38
- fbuffer_append (buffer , buf , 6 );
39
- }
46
+ unsigned long pos ;
40
47
41
- /* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
42
- * and control characters are JSON escaped. */
43
- static void convert_UTF8_to_JSON_ASCII (FBuffer * buffer , VALUE string , char script_safe )
44
- {
45
- const UTF8 * source = (UTF8 * ) RSTRING_PTR (string );
46
- const UTF8 * sourceEnd = source + RSTRING_LEN (string );
47
- char buf [6 ] = { '\\' , 'u' };
48
+ for (pos = 0 ; pos < in_utf8_len ;) {
49
+ uint32_t ch ;
50
+ unsigned long ch_len ;
51
+ bool should_escape ;
48
52
49
- RB_GC_GUARD (string );
50
- }
53
+ /* UTF-8 decoding */
54
+ if (in_is_ascii_only ) {
55
+ ch = in_utf8_str [pos ];
56
+ ch_len = 1 ;
57
+ } else {
58
+ short i ;
59
+ if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
60
+ else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
61
+ else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
62
+ else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
63
+ else
64
+ rb_raise (rb_path2class ("JSON::GeneratorError" ),
65
+ "source sequence is illegal/malformed utf-8" );
66
+ if ((pos + ch_len ) > in_utf8_len )
67
+ rb_raise (rb_path2class ("JSON::GeneratorError" ),
68
+ "partial character in source, but hit end" );
69
+ for (i = 1 ; i < ch_len ; i ++ ) {
70
+ if ((in_utf8_str [pos + i ] & 0xC0 ) != 0x80 ) /* leading 2 bits should be 0b10 */
71
+ rb_raise (rb_path2class ("JSON::GeneratorError" ),
72
+ "source sequence is illegal/malformed utf-8" );
73
+ ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
74
+ }
75
+ if (ch > 0x10FFFF )
76
+ rb_raise (rb_path2class ("JSON::GeneratorError" ),
77
+ "source sequence is illegal/malformed utf-8" );
78
+ }
51
79
52
- /* Converts string to a JSON string in FBuffer buffer, where only the
53
- * characters required by the JSON standard are JSON escaped. The remaining
54
- * characters (should be UTF8) are just passed through and appended to the
55
- * result. */
56
- static void convert_UTF8_to_JSON (FBuffer * buffer , VALUE string , char script_safe )
57
- {
58
- const char * ptr = RSTRING_PTR (string ), * p ;
59
- unsigned long len = RSTRING_LEN (string ), start = 0 , end = 0 ;
60
- const char * escape = NULL ;
61
- int escape_len ;
62
- unsigned char c ;
63
- char buf [6 ] = { '\\' , 'u' };
64
- int ascii_only = rb_enc_str_asciionly_p (string );
80
+ /* JSON policy */
81
+ should_escape =
82
+ (ch < 0x20 ) ||
83
+ (ch == '"' ) ||
84
+ (ch == '\\' ) ||
85
+ (out_ascii_only && (ch > 0x7F )) ||
86
+ (out_script_safe && (ch == '/' )) ||
87
+ (out_script_safe && (ch == 0x2028 )) ||
88
+ (out_script_safe && (ch == 0x2029 ));
89
+
90
+ /* JSON encoding */
91
+ if (should_escape ) {
92
+ switch (ch ) {
93
+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
94
+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
95
+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
96
+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
97
+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
98
+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
99
+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
100
+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
101
+ default :
102
+ if (ch <= 0xFFFF ) {
103
+ scratch [2 ] = hexdig [ch >> 12 ];
104
+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
105
+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
106
+ scratch [5 ] = hexdig [ch & 0xf ];
107
+ fbuffer_append (out_buffer , scratch , 6 );
108
+ } else {
109
+ uint16_t hi , lo ;
110
+ ch -= 0x10000 ;
111
+ hi = 0xD800 + (uint16_t )(ch >> 10 );
112
+ lo = 0xDC00 + (uint16_t )(ch & 0x3FF );
113
+
114
+ scratch [2 ] = hexdig [hi >> 12 ];
115
+ scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
116
+ scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
117
+ scratch [5 ] = hexdig [hi & 0xf ];
118
+
119
+ scratch [8 ] = hexdig [lo >> 12 ];
120
+ scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
121
+ scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
122
+ scratch [11 ] = hexdig [lo & 0xf ];
123
+
124
+ fbuffer_append (out_buffer , scratch , 12 );
125
+ }
126
+ }
127
+ } else {
128
+ fbuffer_append (out_buffer , & in_utf8_str [pos ], ch_len );
129
+ }
130
+
131
+ pos += ch_len ;
132
+ }
133
+ RB_GC_GUARD (in_string );
65
134
}
66
135
67
136
static char * fstrndup (const char * ptr , unsigned long len ) {
@@ -698,12 +767,7 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
698
767
if (!enc_utf8_compatible_p (rb_enc_get (obj ))) {
699
768
obj = rb_str_export_to_enc (obj , rb_utf8_encoding ());
700
769
}
701
-
702
- if (state -> ascii_only ) {
703
- convert_UTF8_to_JSON_ASCII (buffer , obj , state -> script_safe );
704
- } else {
705
- convert_UTF8_to_JSON (buffer , obj , state -> script_safe );
706
- }
770
+ convert_UTF8_to_JSON (buffer , obj , state -> ascii_only , state -> script_safe );
707
771
fbuffer_append_char (buffer , '"' );
708
772
}
709
773
0 commit comments