1
1
/*
2
- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
2
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
3
+ * Copyright (c) 2017, 2022 SAP SE. All rights reserved.
3
4
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
5
*
5
6
* This code is free software; you can redistribute it and/or modify it
46
47
// Note:
47
48
// cnt is signed int. Do not rely on high word!
48
49
// counts # characters, not bytes.
49
- // The result is the number of characters copied before the first incompatible character was found.
50
- // If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
51
- // by a few bytes. The result always indicates the number of copied characters.
52
- // When used as a character index, the returned value points to the first incompatible character.
53
50
//
54
- // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
55
- // - Different number of characters may have been written to dead array (if precise is false).
56
- // - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
51
+ // The result indicates success or failure of the operation.
52
+ // General compress operation (cut off high order byte which must be all zeroes).
53
+ // = len - all characters have been successfully compressed.
54
+ // = 0 - compress failed. At least one character was found with a non-zero high order byte.
55
+ // This is the failure return value which exactly corresponds to the Java implementation.
56
+ // 0 <= result < len - compress failed. That many characters were compressed successfully
57
+ // before the first non-compressable character was found. This is the
58
+ // current, but not fully compatible, implementation. See below.
59
+ // Encode to ISO or 7-bit ASCII array.
60
+ // = len - all characters have been encoded successfully.
61
+ // < len - encode failed. That many characters were encoded successfully.
62
+ // When used as an index into the character array, the return value addresses the
63
+ // first not encodeable character.
64
+ //
65
+ // If precise is true, the processing stops exactly at the point where a failure is detected.
66
+ // More characters than indicated by the return value may have been read from the src array.
67
+ // Exactly the number of characters indicated by the return value have been written to dst.
68
+ // If precise is false, a few characters more than indicated by the return value may have been
69
+ // written to the dst array. In any failure case, The result value indexes the first invalid character.
57
70
unsigned int C2_MacroAssembler::string_compress (Register result, Register src, Register dst, Register cnt,
58
- Register tmp, bool precise) {
71
+ Register tmp, bool precise, bool toASCII ) {
59
72
assert_different_registers (Z_R0, Z_R1, result, src, dst, cnt, tmp);
60
73
74
+ unsigned short char_mask = 0xff00 ; // all selected bits must be '0' for a char to be valid
75
+ unsigned int mask_ix_l = 0 ; // leftmost one bit pos in mask
76
+ unsigned int mask_ix_r = 7 ; // rightmost one bit pos in mask
61
77
if (precise) {
62
- BLOCK_COMMENT (" encode_iso_array {" );
78
+ if (toASCII) {
79
+ BLOCK_COMMENT (" encode_ascii_array {" );
80
+ char_mask = 0xff80 ;
81
+ mask_ix_r = 8 ; // rightmost one bit pos in mask. ASCII only uses codes 0..127
82
+ } else {
83
+ BLOCK_COMMENT (" encode_iso_array {" );
84
+ }
63
85
} else {
64
86
BLOCK_COMMENT (" string_compress {" );
87
+ assert (!toASCII, " Can't compress strings to 7-bit ASCII" );
65
88
}
66
89
int block_start = offset ();
67
90
@@ -72,13 +95,13 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
72
95
Register Rmask = result; // holds incompatibility check mask until result value is stored.
73
96
Label ScalarShortcut, AllDone;
74
97
75
- z_iilf (Rmask, 0xFF00FF00 );
76
- z_iihf (Rmask, 0xFF00FF00 );
98
+ z_iilf (Rmask, ( unsigned int )char_mask<< 16 | ( unsigned int )char_mask );
99
+ z_iihf (Rmask, ( unsigned int )char_mask<< 16 | ( unsigned int )char_mask );
77
100
78
101
#if 0 // Sacrifice shortcuts for code compactness
79
102
{
80
103
//---< shortcuts for short strings (very frequent) >---
81
- // Strings with 4 and 8 characters were fond to occur very frequently.
104
+ // Strings with 4 and 8 characters were found to occur very frequently.
82
105
// Therefore, we handle them right away with minimal overhead.
83
106
Label skipShortcut, skip4Shortcut, skip8Shortcut;
84
107
Register Rout = Z_R0;
@@ -133,7 +156,8 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
133
156
if (VM_Version::has_VectorFacility ()) {
134
157
const int min_vcnt = 32 ; // Minimum #characters required to use vector instructions.
135
158
// Otherwise just do nothing in vector mode.
136
- // Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)).
159
+ // Must correspond to # vector registers used by implementation,
160
+ // and must be a power of 2.
137
161
const int log_min_vcnt = exact_log2 (min_vcnt);
138
162
Label VectorLoop, VectorDone, VectorBreak;
139
163
@@ -150,7 +174,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
150
174
z_brz (VectorDone); // not enough data for vector loop
151
175
152
176
z_vzero (Vzero); // all zeroes
153
- z_vgmh (Vmask, 0 , 7 ); // generate 0xff00 mask for all 2-byte elements
177
+ z_vgmh (Vmask, mask_ix_l, mask_ix_r ); // generate 0xff00/0xff80 mask for all 2-byte elements
154
178
z_sllg (Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
155
179
156
180
bind (VectorLoop);
@@ -162,7 +186,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
162
186
z_vo (Vtmp2, Z_V22, Z_V23);
163
187
z_vo (Vtmp1, Vtmp1, Vtmp2);
164
188
z_vn (Vtmp1, Vtmp1, Vmask);
165
- z_vceqhs (Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress.
189
+ z_vceqhs (Vtmp1, Vtmp1, Vzero); // all bits selected by mask must be zero for successful compress.
166
190
z_bvnt (VectorBreak); // break vector loop if not all vector elements compare eq -> incompatible character found.
167
191
// re-process data from current iteration in break handler.
168
192
@@ -187,7 +211,8 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
187
211
{
188
212
const int min_cnt = 8 ; // Minimum #characters required to use unrolled loop.
189
213
// Otherwise just do nothing in unrolled loop.
190
- // Must be multiple of 8.
214
+ // Must correspond to # registers used by implementation,
215
+ // and must be a power of 2.
191
216
const int log_min_cnt = exact_log2 (min_cnt);
192
217
Label UnrolledLoop, UnrolledDone, UnrolledBreak;
193
218
@@ -197,7 +222,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
197
222
z_lr (Rix, Rcnt);
198
223
z_sr (Rix, Z_R0);
199
224
}
200
- z_sra (Rix, log_min_cnt); // unrolled loop count
225
+ z_sra (Rix, log_min_cnt); // unrolled loop count
201
226
z_brz (UnrolledDone);
202
227
203
228
bind (UnrolledLoop);
@@ -244,6 +269,8 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
244
269
z_sll (Rix, log_min_cnt); // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
245
270
z_sr (Z_R0, Rix); // fix # chars processed OK so far.
246
271
if (!precise) {
272
+ // Because we don't need to be precise, we just return the # of characters which have been written.
273
+ // The first illegal character is in the index range [result-min_cnt/2, result+min_cnt/2).
247
274
z_lgfr (result, Z_R0);
248
275
z_sllg (Z_R1, Z_R0, 1 ); // # src bytes already processed. Only lower 32 bits are valid!
249
276
// Z_R1 contents must be treated as unsigned operand! For huge strings,
@@ -274,7 +301,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
274
301
z_brh(ScalarDoit);
275
302
z_llh(Z_R1, 0, Z_R0, Rsrc);
276
303
z_bre(Scalar2Char);
277
- z_tmll(Z_R1, 0xff00 );
304
+ z_tmll(Z_R1, char_mask );
278
305
z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed
279
306
z_brnaz(AllDone);
280
307
z_stc(Z_R1, 0, Z_R0, Rdst);
@@ -283,11 +310,11 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
283
310
284
311
bind(Scalar2Char);
285
312
z_llh(Z_R0, 2, Z_R0, Rsrc);
286
- z_tmll(Z_R1, 0xff00 );
313
+ z_tmll(Z_R1, char_mask );
287
314
z_lghi(result, 0); // cnt == 2, first char invalid, no chars successfully processed
288
315
z_brnaz(AllDone);
289
316
z_stc(Z_R1, 0, Z_R0, Rdst);
290
- z_tmll(Z_R0, 0xff00 );
317
+ z_tmll(Z_R0, char_mask );
291
318
z_lghi(result, 1); // cnt == 2, second char invalid, one char successfully processed
292
319
z_brnaz(AllDone);
293
320
z_stc(Z_R0, 1, Z_R0, Rdst);
@@ -299,17 +326,17 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
299
326
#endif
300
327
301
328
if (VM_Version::has_DistinctOpnds ()) {
302
- z_srk (Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop
329
+ z_srk (Rix, Rcnt, Z_R0); // remaining # chars to compress in scalar loop
303
330
} else {
304
331
z_lr (Rix, Rcnt);
305
332
z_sr (Rix, Z_R0);
306
333
}
307
- z_lgfr (result, Rcnt); // # processed characters (if all runs ok).
308
- z_brz (ScalarDone); // uses CC from Rix calculation
334
+ z_lgfr (result, Rcnt); // # processed characters (if all encodes ok).
335
+ z_brz (ScalarDone); // anything left to do? ( uses CC from Rix calculation)
309
336
310
337
bind (ScalarLoop);
311
338
z_llh (Z_R1, 0 , Z_R0, Rsrc);
312
- z_tmll (Z_R1, 0xff00 );
339
+ z_tmll (Z_R1, char_mask );
313
340
z_brnaz (ScalarBreak);
314
341
z_stc (Z_R1, 0 , Z_R0, Rdst);
315
342
add2reg (Rsrc, 2 );
@@ -329,7 +356,11 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
329
356
bind (AllDone);
330
357
331
358
if (precise) {
332
- BLOCK_COMMENT (" } encode_iso_array" );
359
+ if (toASCII) {
360
+ BLOCK_COMMENT (" } encode_ascii_array" );
361
+ } else {
362
+ BLOCK_COMMENT (" } encode_iso_array" );
363
+ }
333
364
} else {
334
365
BLOCK_COMMENT (" } string_compress" );
335
366
}
0 commit comments