Skip to content

Commit 53f775b

Browse files
committed
[flang][runtime] Support B/O/Z editing of CHARACTER
This is a common extension, though semantics differ across compilers. I've chosen to interpret the CHARACTER data as if it were an arbitrary-precision integer value and format or read it as such. This matches Intel's compilers and nvfortran. (GNU Fortran can't handle lengths > 1 and XLF seems to get the enddianness wrong.) This patch generalizes the previous implementations of B/O/Z input and output so that they'll work for arbitrary data in memory, and then uses them for all B/O/Z input/output, including (now) CHARACTER. Differential Revision: https://reviews.llvm.org/D124547
1 parent 75881d8 commit 53f775b

File tree

2 files changed

+185
-46
lines changed

2 files changed

+185
-46
lines changed

flang/runtime/edit-input.cpp

Lines changed: 73 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,37 +16,76 @@
1616

1717
namespace Fortran::runtime::io {
1818

19-
static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
20-
int base, int totalBitSize) {
19+
template <int LOG2_BASE>
20+
static bool EditBOZInput(
21+
IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
2122
std::optional<int> remaining;
2223
std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
23-
common::UnsignedInt128 value{0};
24+
if (*next == '0') {
25+
do {
26+
next = io.NextInField(remaining, edit);
27+
} while (next && *next == '0');
28+
}
29+
// Count significant digits after any leading white space & zeroes
30+
int digits{0};
2431
for (; next; next = io.NextInField(remaining, edit)) {
2532
char32_t ch{*next};
2633
if (ch == ' ' || ch == '\t') {
2734
continue;
2835
}
29-
int digit{0};
3036
if (ch >= '0' && ch <= '1') {
31-
digit = ch - '0';
32-
} else if (base >= 8 && ch >= '2' && ch <= '7') {
33-
digit = ch - '0';
34-
} else if (base >= 10 && ch >= '8' && ch <= '9') {
35-
digit = ch - '0';
36-
} else if (base == 16 && ch >= 'A' && ch <= 'Z') {
37-
digit = ch + 10 - 'A';
38-
} else if (base == 16 && ch >= 'a' && ch <= 'z') {
39-
digit = ch + 10 - 'a';
37+
} else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
38+
} else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
39+
} else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
40+
} else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
4041
} else {
4142
io.GetIoErrorHandler().SignalError(
4243
"Bad character '%lc' in B/O/Z input field", ch);
4344
return false;
4445
}
45-
value *= base;
46-
value += digit;
46+
++digits;
47+
}
48+
auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
49+
if (significantBytes > bytes) {
50+
io.GetIoErrorHandler().SignalError(
51+
"B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
52+
return false;
53+
}
54+
// Reset to start of significant digits
55+
io.HandleRelativePosition(-digits);
56+
remaining.reset();
57+
// Make a second pass now that the digit count is known
58+
std::memset(n, 0, bytes);
59+
int increment{isHostLittleEndian ? -1 : 1};
60+
auto *data{reinterpret_cast<unsigned char *>(n) +
61+
(isHostLittleEndian ? significantBytes - 1 : 0)};
62+
int shift{((digits - 1) * LOG2_BASE) & 7};
63+
if (shift + LOG2_BASE > 8) {
64+
shift -= 8; // misaligned octal
65+
}
66+
while (digits > 0) {
67+
char32_t ch{*io.NextInField(remaining, edit)};
68+
int digit{0};
69+
if (ch >= '0' && ch <= '9') {
70+
digit = ch - '0';
71+
} else if (ch >= 'A' && ch <= 'F') {
72+
digit = ch + 10 - 'A';
73+
} else if (ch >= 'a' && ch <= 'f') {
74+
digit = ch + 10 - 'a';
75+
} else {
76+
continue;
77+
}
78+
--digits;
79+
if (shift < 0) {
80+
shift += 8;
81+
if (shift + LOG2_BASE > 8) { // misaligned octal
82+
*data |= digit >> (8 - shift);
83+
}
84+
data += increment;
85+
}
86+
*data |= digit << shift;
87+
shift -= LOG2_BASE;
4788
}
48-
// TODO: check for overflow
49-
std::memcpy(n, &value, totalBitSize >> 3);
5089
return true;
5190
}
5291

@@ -83,11 +122,11 @@ bool EditIntegerInput(
83122
case 'I':
84123
break;
85124
case 'B':
86-
return EditBOZInput(io, edit, n, 2, kind << 3);
125+
return EditBOZInput<1>(io, edit, n, kind);
87126
case 'O':
88-
return EditBOZInput(io, edit, n, 8, kind << 3);
127+
return EditBOZInput<3>(io, edit, n, kind);
89128
case 'Z':
90-
return EditBOZInput(io, edit, n, 16, kind << 3);
129+
return EditBOZInput<4>(io, edit, n, kind);
91130
case 'A': // legacy extension
92131
return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
93132
default:
@@ -457,7 +496,6 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
457496

458497
template <int KIND>
459498
bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
460-
constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
461499
switch (edit.descriptor) {
462500
case DataEdit::ListDirected:
463501
if (IsNamelistName(io)) {
@@ -472,14 +510,14 @@ bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
472510
case 'G':
473511
return EditCommonRealInput<KIND>(io, edit, n);
474512
case 'B':
475-
return EditBOZInput(
476-
io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
513+
return EditBOZInput<1>(io, edit, n,
514+
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
477515
case 'O':
478-
return EditBOZInput(
479-
io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
516+
return EditBOZInput<3>(io, edit, n,
517+
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
480518
case 'Z':
481-
return EditBOZInput(
482-
io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
519+
return EditBOZInput<4>(io, edit, n,
520+
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
483521
case 'A': // legacy extension
484522
return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
485523
default:
@@ -590,7 +628,7 @@ static bool EditListDirectedCharacterInput(
590628
// or the end of the current record. Subtlety: the "remaining" count
591629
// here is a dummy that's used to avoid the interpretation of separators
592630
// in NextInField.
593-
std::optional<int> remaining{maxUTF8Bytes};
631+
std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
594632
while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
595633
switch (*next) {
596634
case ' ':
@@ -602,8 +640,7 @@ static bool EditListDirectedCharacterInput(
602640
break;
603641
default:
604642
*x++ = *next;
605-
--length;
606-
remaining = maxUTF8Bytes;
643+
remaining = --length > 0 ? maxUTF8Bytes : 0;
607644
}
608645
}
609646
std::fill_n(x, length, ' ');
@@ -619,6 +656,12 @@ bool EditCharacterInput(
619656
case 'A':
620657
case 'G':
621658
break;
659+
case 'B':
660+
return EditBOZInput<1>(io, edit, x, length * sizeof *x);
661+
case 'O':
662+
return EditBOZInput<3>(io, edit, x, length * sizeof *x);
663+
case 'Z':
664+
return EditBOZInput<4>(io, edit, x, length * sizeof *x);
622665
default:
623666
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
624667
"Data edit descriptor '%c' may not be used with a CHARACTER data item",

flang/runtime/edit-output.cpp

Lines changed: 112 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,85 @@
1313

1414
namespace Fortran::runtime::io {
1515

16+
// B/O/Z output of arbitrarily sized data emits a binary/octal/hexadecimal
17+
// representation of what is interpreted to be a single unsigned integer value.
18+
// When used with character data, endianness is exposed.
19+
template <int LOG2_BASE>
20+
static bool EditBOZOutput(IoStatementState &io, const DataEdit &edit,
21+
const unsigned char *data0, std::size_t bytes) {
22+
int digits{static_cast<int>((bytes * 8) / LOG2_BASE)};
23+
int get{static_cast<int>(bytes * 8) - digits * LOG2_BASE};
24+
get = get ? get : LOG2_BASE;
25+
int shift{7};
26+
int increment{isHostLittleEndian ? -1 : 1};
27+
const unsigned char *data{data0 + (isHostLittleEndian ? bytes - 1 : 0)};
28+
int skippedZeroes{0};
29+
int digit{0};
30+
// The same algorithm is used to generate digits for real (below)
31+
// as well as for generating them only to skip leading zeroes (here).
32+
// Bits are copied one at a time from the source data.
33+
// TODO: Multiple bit copies for hexadecimal, where misalignment
34+
// is not possible; or for octal when all 3 bits come from the
35+
// same byte.
36+
while (bytes > 0) {
37+
if (get == 0) {
38+
if (digit != 0) {
39+
break; // first nonzero leading digit
40+
}
41+
++skippedZeroes;
42+
get = LOG2_BASE;
43+
} else if (shift < 0) {
44+
data += increment;
45+
--bytes;
46+
shift = 7;
47+
} else {
48+
digit = 2 * digit + ((*data >> shift--) & 1);
49+
--get;
50+
}
51+
}
52+
// Emit leading spaces and zeroes; detect field overflow
53+
int leadingZeroes{0};
54+
int editWidth{edit.width.value_or(0)};
55+
int significant{digits - skippedZeroes};
56+
if (edit.digits && significant <= *edit.digits) { // Bw.m, Ow.m, Zw.m
57+
if (*edit.digits == 0 && bytes == 0) {
58+
editWidth = std::max(1, editWidth);
59+
} else {
60+
leadingZeroes = *edit.digits - significant;
61+
}
62+
} else if (bytes == 0) {
63+
leadingZeroes = 1;
64+
}
65+
int subTotal{leadingZeroes + significant};
66+
int leadingSpaces{std::max(0, editWidth - subTotal)};
67+
if (editWidth > 0 && leadingSpaces + subTotal > editWidth) {
68+
return io.EmitRepeated('*', editWidth);
69+
}
70+
if (!(io.EmitRepeated(' ', leadingSpaces) &&
71+
io.EmitRepeated('0', leadingZeroes))) {
72+
return false;
73+
}
74+
// Emit remaining digits
75+
while (bytes > 0) {
76+
if (get == 0) {
77+
char ch{static_cast<char>(digit >= 10 ? 'A' + digit - 10 : '0' + digit)};
78+
if (!io.Emit(&ch, 1)) {
79+
return false;
80+
}
81+
get = LOG2_BASE;
82+
digit = 0;
83+
} else if (shift < 0) {
84+
data += increment;
85+
--bytes;
86+
shift = 7;
87+
} else {
88+
digit = 2 * digit + ((*data >> shift--) & 1);
89+
--get;
90+
}
91+
}
92+
return true;
93+
}
94+
1695
template <int KIND>
1796
bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
1897
common::HostSignedIntType<8 * KIND> n) {
@@ -38,21 +117,14 @@ bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
38117
}
39118
break;
40119
case 'B':
41-
for (; un > 0; un >>= 1) {
42-
*--p = '0' + (static_cast<int>(un) & 1);
43-
}
44-
break;
120+
return EditBOZOutput<1>(
121+
io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
45122
case 'O':
46-
for (; un > 0; un >>= 3) {
47-
*--p = '0' + (static_cast<int>(un) & 7);
48-
}
49-
break;
123+
return EditBOZOutput<3>(
124+
io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
50125
case 'Z':
51-
for (; un > 0; un >>= 4) {
52-
int digit = static_cast<int>(un) & 0xf;
53-
*--p = digit >= 10 ? 'A' + (digit - 10) : '0' + digit;
54-
}
55-
break;
126+
return EditBOZOutput<4>(
127+
io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
56128
case 'A': // legacy extension
57129
return EditCharacterOutput(
58130
io, edit, reinterpret_cast<char *>(&n), sizeof n);
@@ -442,11 +514,17 @@ template <int KIND> bool RealOutputEditing<KIND>::Edit(const DataEdit &edit) {
442514
case 'F':
443515
return EditFOutput(edit);
444516
case 'B':
517+
return EditBOZOutput<1>(io_, edit,
518+
reinterpret_cast<const unsigned char *>(&x_),
519+
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
445520
case 'O':
521+
return EditBOZOutput<3>(io_, edit,
522+
reinterpret_cast<const unsigned char *>(&x_),
523+
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
446524
case 'Z':
447-
return EditIntegerOutput<KIND>(io_, edit,
448-
static_cast<common::HostSignedIntType<8 * KIND>>(
449-
decimal::BinaryFloatingPointNumber<binaryPrecision>{x_}.raw()));
525+
return EditBOZOutput<4>(io_, edit,
526+
reinterpret_cast<const unsigned char *>(&x_),
527+
common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
450528
case 'G':
451529
return Edit(EditForGOutput(edit));
452530
case 'A': // legacy extension
@@ -475,6 +553,15 @@ bool EditLogicalOutput(IoStatementState &io, const DataEdit &edit, bool truth) {
475553
case 'G':
476554
return io.EmitRepeated(' ', std::max(0, edit.width.value_or(1) - 1)) &&
477555
io.Emit(truth ? "T" : "F", 1);
556+
case 'B':
557+
return EditBOZOutput<1>(io, edit,
558+
reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
559+
case 'O':
560+
return EditBOZOutput<3>(io, edit,
561+
reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
562+
case 'Z':
563+
return EditBOZOutput<4>(io, edit,
564+
reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
478565
default:
479566
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
480567
"Data edit descriptor '%c' may not be used with a LOGICAL data item",
@@ -544,6 +631,15 @@ bool EditCharacterOutput(IoStatementState &io, const DataEdit &edit,
544631
case 'A':
545632
case 'G':
546633
break;
634+
case 'B':
635+
return EditBOZOutput<1>(io, edit,
636+
reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
637+
case 'O':
638+
return EditBOZOutput<3>(io, edit,
639+
reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
640+
case 'Z':
641+
return EditBOZOutput<4>(io, edit,
642+
reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
547643
default:
548644
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
549645
"Data edit descriptor '%c' may not be used with a CHARACTER data item",

0 commit comments

Comments
 (0)