Skip to content

Commit c43ce85

Browse files
committed
8278302: [s390] Implement fast-path for ASCII-compatible CharsetEncoders
Reviewed-by: mdoerr
1 parent 1b14157 commit c43ce85

File tree

4 files changed

+82
-35
lines changed

4 files changed

+82
-35
lines changed

src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
2-
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2017, 2022 SAP SE. All rights reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -46,22 +47,44 @@
4647
// Note:
4748
// cnt is signed int. Do not rely on high word!
4849
// counts # characters, not bytes.
49-
// The result is the number of characters copied before the first incompatible character was found.
50-
// If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
51-
// by a few bytes. The result always indicates the number of copied characters.
52-
// When used as a character index, the returned value points to the first incompatible character.
5350
//
54-
// Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
55-
// - Different number of characters may have been written to dead array (if precise is false).
56-
// - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
51+
// The result indicates success or failure of the operation.
52+
// General compress operation (cut off high order byte which must be all zeroes).
53+
// = len - all characters have been successfully compressed.
54+
// = 0 - compress failed. At least one character was found with a non-zero high order byte.
55+
// This is the failure return value which exactly corresponds to the Java implementation.
56+
// 0 <= result < len - compress failed. That many characters were compressed successfully
57+
// before the first non-compressable character was found. This is the
58+
// current, but not fully compatible, implementation. See below.
59+
// Encode to ISO or 7-bit ASCII array.
60+
// = len - all characters have been encoded successfully.
61+
// < len - encode failed. That many characters were encoded successfully.
62+
// When used as an index into the character array, the return value addresses the
63+
// first not encodeable character.
64+
//
65+
// If precise is true, the processing stops exactly at the point where a failure is detected.
66+
// More characters than indicated by the return value may have been read from the src array.
67+
// Exactly the number of characters indicated by the return value have been written to dst.
68+
// If precise is false, a few characters more than indicated by the return value may have been
69+
// written to the dst array. In any failure case, The result value indexes the first invalid character.
5770
unsigned int C2_MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,
58-
Register tmp, bool precise) {
71+
Register tmp, bool precise, bool toASCII) {
5972
assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);
6073

74+
unsigned short char_mask = 0xff00; // all selected bits must be '0' for a char to be valid
75+
unsigned int mask_ix_l = 0; // leftmost one bit pos in mask
76+
unsigned int mask_ix_r = 7; // rightmost one bit pos in mask
6177
if (precise) {
62-
BLOCK_COMMENT("encode_iso_array {");
78+
if (toASCII) {
79+
BLOCK_COMMENT("encode_ascii_array {");
80+
char_mask = 0xff80;
81+
mask_ix_r = 8; // rightmost one bit pos in mask. ASCII only uses codes 0..127
82+
} else {
83+
BLOCK_COMMENT("encode_iso_array {");
84+
}
6385
} else {
6486
BLOCK_COMMENT("string_compress {");
87+
assert(!toASCII, "Can't compress strings to 7-bit ASCII");
6588
}
6689
int block_start = offset();
6790

@@ -72,13 +95,13 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
7295
Register Rmask = result; // holds incompatibility check mask until result value is stored.
7396
Label ScalarShortcut, AllDone;
7497

75-
z_iilf(Rmask, 0xFF00FF00);
76-
z_iihf(Rmask, 0xFF00FF00);
98+
z_iilf(Rmask, (unsigned int)char_mask<<16 | (unsigned int)char_mask);
99+
z_iihf(Rmask, (unsigned int)char_mask<<16 | (unsigned int)char_mask);
77100

78101
#if 0 // Sacrifice shortcuts for code compactness
79102
{
80103
//---< shortcuts for short strings (very frequent) >---
81-
// Strings with 4 and 8 characters were fond to occur very frequently.
104+
// Strings with 4 and 8 characters were found to occur very frequently.
82105
// Therefore, we handle them right away with minimal overhead.
83106
Label skipShortcut, skip4Shortcut, skip8Shortcut;
84107
Register Rout = Z_R0;
@@ -133,7 +156,8 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
133156
if (VM_Version::has_VectorFacility()) {
134157
const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
135158
// Otherwise just do nothing in vector mode.
136-
// Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)).
159+
// Must correspond to # vector registers used by implementation,
160+
// and must be a power of 2.
137161
const int log_min_vcnt = exact_log2(min_vcnt);
138162
Label VectorLoop, VectorDone, VectorBreak;
139163

@@ -150,7 +174,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
150174
z_brz(VectorDone); // not enough data for vector loop
151175

152176
z_vzero(Vzero); // all zeroes
153-
z_vgmh(Vmask, 0, 7); // generate 0xff00 mask for all 2-byte elements
177+
z_vgmh(Vmask, mask_ix_l, mask_ix_r); // generate 0xff00/0xff80 mask for all 2-byte elements
154178
z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
155179

156180
bind(VectorLoop);
@@ -162,7 +186,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
162186
z_vo(Vtmp2, Z_V22, Z_V23);
163187
z_vo(Vtmp1, Vtmp1, Vtmp2);
164188
z_vn(Vtmp1, Vtmp1, Vmask);
165-
z_vceqhs(Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress.
189+
z_vceqhs(Vtmp1, Vtmp1, Vzero); // all bits selected by mask must be zero for successful compress.
166190
z_bvnt(VectorBreak); // break vector loop if not all vector elements compare eq -> incompatible character found.
167191
// re-process data from current iteration in break handler.
168192

@@ -187,7 +211,8 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
187211
{
188212
const int min_cnt = 8; // Minimum #characters required to use unrolled loop.
189213
// Otherwise just do nothing in unrolled loop.
190-
// Must be multiple of 8.
214+
// Must correspond to # registers used by implementation,
215+
// and must be a power of 2.
191216
const int log_min_cnt = exact_log2(min_cnt);
192217
Label UnrolledLoop, UnrolledDone, UnrolledBreak;
193218

@@ -197,7 +222,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
197222
z_lr(Rix, Rcnt);
198223
z_sr(Rix, Z_R0);
199224
}
200-
z_sra(Rix, log_min_cnt); // unrolled loop count
225+
z_sra(Rix, log_min_cnt); // unrolled loop count
201226
z_brz(UnrolledDone);
202227

203228
bind(UnrolledLoop);
@@ -244,6 +269,8 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
244269
z_sll(Rix, log_min_cnt); // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
245270
z_sr(Z_R0, Rix); // fix # chars processed OK so far.
246271
if (!precise) {
272+
// Because we don't need to be precise, we just return the # of characters which have been written.
273+
// The first illegal character is in the index range [result-min_cnt/2, result+min_cnt/2).
247274
z_lgfr(result, Z_R0);
248275
z_sllg(Z_R1, Z_R0, 1); // # src bytes already processed. Only lower 32 bits are valid!
249276
// Z_R1 contents must be treated as unsigned operand! For huge strings,
@@ -274,7 +301,7 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
274301
z_brh(ScalarDoit);
275302
z_llh(Z_R1, 0, Z_R0, Rsrc);
276303
z_bre(Scalar2Char);
277-
z_tmll(Z_R1, 0xff00);
304+
z_tmll(Z_R1, char_mask);
278305
z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed
279306
z_brnaz(AllDone);
280307
z_stc(Z_R1, 0, Z_R0, Rdst);
@@ -283,11 +310,11 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
283310

284311
bind(Scalar2Char);
285312
z_llh(Z_R0, 2, Z_R0, Rsrc);
286-
z_tmll(Z_R1, 0xff00);
313+
z_tmll(Z_R1, char_mask);
287314
z_lghi(result, 0); // cnt == 2, first char invalid, no chars successfully processed
288315
z_brnaz(AllDone);
289316
z_stc(Z_R1, 0, Z_R0, Rdst);
290-
z_tmll(Z_R0, 0xff00);
317+
z_tmll(Z_R0, char_mask);
291318
z_lghi(result, 1); // cnt == 2, second char invalid, one char successfully processed
292319
z_brnaz(AllDone);
293320
z_stc(Z_R0, 1, Z_R0, Rdst);
@@ -299,17 +326,17 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
299326
#endif
300327

301328
if (VM_Version::has_DistinctOpnds()) {
302-
z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop
329+
z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in scalar loop
303330
} else {
304331
z_lr(Rix, Rcnt);
305332
z_sr(Rix, Z_R0);
306333
}
307-
z_lgfr(result, Rcnt); // # processed characters (if all runs ok).
308-
z_brz(ScalarDone); // uses CC from Rix calculation
334+
z_lgfr(result, Rcnt); // # processed characters (if all encodes ok).
335+
z_brz(ScalarDone); // anything left to do? (uses CC from Rix calculation)
309336

310337
bind(ScalarLoop);
311338
z_llh(Z_R1, 0, Z_R0, Rsrc);
312-
z_tmll(Z_R1, 0xff00);
339+
z_tmll(Z_R1, char_mask);
313340
z_brnaz(ScalarBreak);
314341
z_stc(Z_R1, 0, Z_R0, Rdst);
315342
add2reg(Rsrc, 2);
@@ -329,7 +356,11 @@ unsigned int C2_MacroAssembler::string_compress(Register result, Register src, R
329356
bind(AllDone);
330357

331358
if (precise) {
332-
BLOCK_COMMENT("} encode_iso_array");
359+
if (toASCII) {
360+
BLOCK_COMMENT("} encode_ascii_array");
361+
} else {
362+
BLOCK_COMMENT("} encode_iso_array");
363+
}
333364
} else {
334365
BLOCK_COMMENT("} string_compress");
335366
}

src/hotspot/cpu/s390/c2_MacroAssembler_s390.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
2-
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2017, 2022 SAP SE. All rights reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -38,7 +39,7 @@
3839
// Early clobber: result.
3940
// Boolean precise controls accuracy of result value.
4041
unsigned int string_compress(Register result, Register src, Register dst, Register cnt,
41-
Register tmp, bool precise);
42+
Register tmp, bool precise, bool toASCII);
4243

4344
// Inflate byte[] to char[].
4445
unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp);

src/hotspot/cpu/s390/matcher_s390.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
2-
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2017, 2022 SAP SE. All rights reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -150,6 +151,6 @@
150151
}
151152

152153
// Implements a variant of EncodeISOArrayNode that encode ASCII only
153-
static const bool supports_encode_ascii_array = false;
154+
static const bool supports_encode_ascii_array = true;
154155

155156
#endif // CPU_S390_MATCHER_S390_HPP

src/hotspot/cpu/s390/s390.ad

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//
2-
// Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
3-
// Copyright (c) 2017, 2020 SAP SE. All rights reserved.
2+
// Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved.
3+
// Copyright (c) 2017, 2022 SAP SE. All rights reserved.
44
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
//
66
// This code is free software; you can redistribute it and/or modify it
@@ -10230,7 +10230,7 @@ instruct string_compress(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tm
1023010230
format %{ "String Compress $src->$dst($len) -> $result" %}
1023110231
ins_encode %{
1023210232
__ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
10233-
$tmp$$Register, false);
10233+
$tmp$$Register, false, false);
1023410234
%}
1023510235
ins_pipe(pipe_class_dummy);
1023610236
%}
@@ -10291,10 +10291,24 @@ instruct encode_iso_array(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI t
1029110291
match(Set result (EncodeISOArray src (Binary dst len)));
1029210292
effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too.
1029310293
ins_cost(300);
10294-
format %{ "Encode array $src->$dst($len) -> $result" %}
10294+
format %{ "Encode iso array $src->$dst($len) -> $result" %}
1029510295
ins_encode %{
1029610296
__ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
10297-
$tmp$$Register, true);
10297+
$tmp$$Register, true, false);
10298+
%}
10299+
ins_pipe(pipe_class_dummy);
10300+
%}
10301+
10302+
// encode char[] to byte[] in ASCII
10303+
instruct encode_ascii_array(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tmp, flagsReg cr) %{
10304+
predicate(((EncodeISOArrayNode*)n)->is_ascii());
10305+
match(Set result (EncodeISOArray src (Binary dst len)));
10306+
effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too.
10307+
ins_cost(300);
10308+
format %{ "Encode ascii array $src->$dst($len) -> $result" %}
10309+
ins_encode %{
10310+
__ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register,
10311+
$tmp$$Register, true, true);
1029810312
%}
1029910313
ins_pipe(pipe_class_dummy);
1030010314
%}

0 commit comments

Comments
 (0)