Skip to content

Commit 5a451a4

Browse files
committed
[flang] Runtime: SCAN and VERIFY
Implement the related character intrinsic functions SCAN and VERIFY. Differential Revision: https://reviews.llvm.org/D97580
1 parent 4679676 commit 5a451a4

File tree

3 files changed

+254
-5
lines changed

3 files changed

+254
-5
lines changed

flang/runtime/character.cpp

Lines changed: 204 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ static void Compare(Descriptor &result, const Descriptor &x,
9494
elements *= ub[j];
9595
xAt[j] = yAt[j] = 1;
9696
}
97-
result.Establish(TypeCategory::Logical, 1, ub, rank);
97+
result.Establish(
98+
TypeCategory::Logical, 1, nullptr, rank, ub, CFI_attribute_allocatable);
9899
if (result.Allocate(lb, ub) != CFI_SUCCESS) {
99100
terminator.Crash("Compare: could not allocate storage for result");
100101
}
@@ -145,7 +146,8 @@ static void AdjustLRHelper(Descriptor &result, const Descriptor &string,
145146
stringAt[j] = 1;
146147
}
147148
std::size_t elementBytes{string.ElementBytes()};
148-
result.Establish(string.type(), elementBytes, ub, rank);
149+
result.Establish(string.type(), elementBytes, nullptr, rank, ub,
150+
CFI_attribute_allocatable);
149151
if (result.Allocate(lb, ub) != CFI_SUCCESS) {
150152
terminator.Crash("ADJUSTL/R: could not allocate storage for result");
151153
}
@@ -196,7 +198,8 @@ static void LenTrim(Descriptor &result, const Descriptor &string,
196198
elements *= ub[j];
197199
stringAt[j] = 1;
198200
}
199-
result.Establish(TypeCategory::Integer, sizeof(INT), ub, rank);
201+
result.Establish(TypeCategory::Integer, sizeof(INT), nullptr, rank, ub,
202+
CFI_attribute_allocatable);
200203
if (result.Allocate(lb, ub) != CFI_SUCCESS) {
201204
terminator.Crash("LEN_TRIM: could not allocate storage for result");
202205
}
@@ -232,6 +235,133 @@ static void LenTrimKind(Descriptor &result, const Descriptor &string, int kind,
232235
}
233236
}
234237

238+
// SCAN and VERIFY implementation help. These intrinsic functions
239+
// do pretty much the same thing, so they're templatized with a
240+
// distinguishing flag.
241+
242+
template <typename CHAR, bool IS_VERIFY = false>
243+
inline std::size_t ScanVerify(const CHAR *x, std::size_t xLen, const CHAR *set,
244+
std::size_t setLen, bool back) {
245+
std::size_t at{back ? xLen : 1};
246+
int increment{back ? -1 : 1};
247+
for (; xLen-- > 0; at += increment) {
248+
CHAR ch{x[at - 1]};
249+
bool inSet{false};
250+
// TODO: If set is sorted, could use binary search
251+
for (std::size_t j{0}; j < setLen; ++j) {
252+
if (set[j] == ch) {
253+
inSet = true;
254+
break;
255+
}
256+
}
257+
if (inSet != IS_VERIFY) {
258+
return at;
259+
}
260+
}
261+
return 0;
262+
}
263+
264+
// Specialization for one-byte characters
265+
template <bool IS_VERIFY = false>
266+
inline std::size_t ScanVerify(const char *x, std::size_t xLen, const char *set,
267+
std::size_t setLen, bool back) {
268+
std::size_t at{back ? xLen : 1};
269+
int increment{back ? -1 : 1};
270+
if (xLen > 0) {
271+
std::uint64_t bitSet[256 / 64]{0};
272+
std::uint64_t one{1};
273+
for (std::size_t j{0}; j < setLen; ++j) {
274+
unsigned setCh{static_cast<unsigned char>(set[j])};
275+
bitSet[setCh / 64] |= one << (setCh % 64);
276+
}
277+
for (; xLen-- > 0; at += increment) {
278+
unsigned ch{static_cast<unsigned char>(x[at - 1])};
279+
bool inSet{((bitSet[ch / 64] >> (ch % 64)) & 1) != 0};
280+
if (inSet != IS_VERIFY) {
281+
return at;
282+
}
283+
}
284+
}
285+
return 0;
286+
}
287+
288+
static bool IsLogicalElementTrue(
289+
const Descriptor &logical, const SubscriptValue at[]) {
290+
// A LOGICAL value is false if and only if all of its bytes are zero.
291+
const char *p{logical.Element<char>(at)};
292+
for (std::size_t j{logical.ElementBytes()}; j-- > 0; ++p) {
293+
if (*p) {
294+
return true;
295+
}
296+
}
297+
return false;
298+
}
299+
300+
template <typename INT, typename CHAR, bool IS_VERIFY = false>
301+
static void ScanVerify(Descriptor &result, const Descriptor &string,
302+
const Descriptor &set, const Descriptor *back,
303+
const Terminator &terminator) {
304+
int rank{string.rank() ? string.rank()
305+
: set.rank() ? set.rank() : back ? back->rank() : 0};
306+
SubscriptValue lb[maxRank], ub[maxRank], stringAt[maxRank], setAt[maxRank],
307+
backAt[maxRank];
308+
SubscriptValue elements{1};
309+
for (int j{0}; j < rank; ++j) {
310+
lb[j] = 1;
311+
ub[j] = string.rank()
312+
? string.GetDimension(j).Extent()
313+
: set.rank() ? set.GetDimension(j).Extent()
314+
: back ? back->GetDimension(j).Extent() : 1;
315+
elements *= ub[j];
316+
stringAt[j] = setAt[j] = backAt[j] = 1;
317+
}
318+
result.Establish(TypeCategory::Integer, sizeof(INT), nullptr, rank, ub,
319+
CFI_attribute_allocatable);
320+
if (result.Allocate(lb, ub) != CFI_SUCCESS) {
321+
terminator.Crash("SCAN/VERIFY: could not allocate storage for result");
322+
}
323+
std::size_t stringElementChars{string.ElementBytes() >> shift<CHAR>};
324+
std::size_t setElementChars{set.ElementBytes() >> shift<CHAR>};
325+
for (SubscriptValue resultAt{0}; elements-- > 0; resultAt += sizeof(INT),
326+
string.IncrementSubscripts(stringAt), set.IncrementSubscripts(setAt),
327+
back && back->IncrementSubscripts(backAt)) {
328+
*result.OffsetElement<INT>(resultAt) =
329+
ScanVerify<CHAR, IS_VERIFY>(string.Element<CHAR>(stringAt),
330+
stringElementChars, set.Element<CHAR>(setAt), setElementChars,
331+
back && IsLogicalElementTrue(*back, backAt));
332+
}
333+
}
334+
335+
template <typename CHAR, bool IS_VERIFY = false>
336+
static void ScanVerifyKind(Descriptor &result, const Descriptor &string,
337+
const Descriptor &set, const Descriptor *back, int kind,
338+
const Terminator &terminator) {
339+
switch (kind) {
340+
case 1:
341+
ScanVerify<std::int8_t, CHAR, IS_VERIFY>(
342+
result, string, set, back, terminator);
343+
break;
344+
case 2:
345+
ScanVerify<std::int16_t, CHAR, IS_VERIFY>(
346+
result, string, set, back, terminator);
347+
break;
348+
case 4:
349+
ScanVerify<std::int32_t, CHAR, IS_VERIFY>(
350+
result, string, set, back, terminator);
351+
break;
352+
case 8:
353+
ScanVerify<std::int64_t, CHAR, IS_VERIFY>(
354+
result, string, set, back, terminator);
355+
break;
356+
case 16:
357+
ScanVerify<common::uint128_t, CHAR, IS_VERIFY>(
358+
result, string, set, back, terminator);
359+
break;
360+
default:
361+
terminator.Crash("SCAN/VERIFY: bad KIND=%d", kind);
362+
}
363+
}
364+
235365
template <typename TO, typename FROM>
236366
static void CopyAndPad(
237367
TO *to, const FROM *from, std::size_t toChars, std::size_t fromChars) {
@@ -608,7 +738,7 @@ void RTNAME(CharacterPad1)(char *lhs, std::size_t bytes, std::size_t offset) {
608738
}
609739
}
610740

611-
// Intrinsic functions
741+
// Intrinsic function entry points
612742

613743
void RTNAME(AdjustL)(Descriptor &result, const Descriptor &string,
614744
const char *sourceFile, int sourceLine) {
@@ -649,11 +779,47 @@ void RTNAME(LenTrim)(Descriptor &result, const Descriptor &string, int kind,
649779
}
650780
}
651781

782+
std::size_t RTNAME(Scan1)(const char *x, std::size_t xLen, const char *set,
783+
std::size_t setLen, bool back) {
784+
return ScanVerify<char, false>(x, xLen, set, setLen, back);
785+
}
786+
std::size_t RTNAME(Scan2)(const char16_t *x, std::size_t xLen,
787+
const char16_t *set, std::size_t setLen, bool back) {
788+
return ScanVerify<char16_t, false>(x, xLen, set, setLen, back);
789+
}
790+
std::size_t RTNAME(Scan4)(const char32_t *x, std::size_t xLen,
791+
const char32_t *set, std::size_t setLen, bool back) {
792+
return ScanVerify<char32_t, false>(x, xLen, set, setLen, back);
793+
}
794+
795+
void RTNAME(Scan)(Descriptor &result, const Descriptor &string,
796+
const Descriptor &set, const Descriptor *back, int kind,
797+
const char *sourceFile, int sourceLine) {
798+
Terminator terminator{sourceFile, sourceLine};
799+
switch (string.raw().type) {
800+
case CFI_type_char:
801+
ScanVerifyKind<char, false>(result, string, set, back, kind, terminator);
802+
break;
803+
case CFI_type_char16_t:
804+
ScanVerifyKind<char16_t, false>(
805+
result, string, set, back, kind, terminator);
806+
break;
807+
case CFI_type_char32_t:
808+
ScanVerifyKind<char32_t, false>(
809+
result, string, set, back, kind, terminator);
810+
break;
811+
default:
812+
terminator.Crash(
813+
"SCAN: bad string type code %d", static_cast<int>(string.raw().type));
814+
}
815+
}
816+
652817
void RTNAME(Repeat)(Descriptor &result, const Descriptor &string,
653818
std::size_t ncopies, const char *sourceFile, int sourceLine) {
654819
Terminator terminator{sourceFile, sourceLine};
655820
std::size_t origBytes{string.ElementBytes()};
656-
result.Establish(string.type(), origBytes * ncopies, nullptr, 0);
821+
result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr,
822+
CFI_attribute_allocatable);
657823
if (result.Allocate(nullptr, nullptr) != CFI_SUCCESS) {
658824
terminator.Crash("REPEAT could not allocate storage for result");
659825
}
@@ -692,6 +858,39 @@ void RTNAME(Trim)(Descriptor &result, const Descriptor &string,
692858
std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes);
693859
}
694860

861+
std::size_t RTNAME(Verify1)(const char *x, std::size_t xLen, const char *set,
862+
std::size_t setLen, bool back) {
863+
return ScanVerify<char, true>(x, xLen, set, setLen, back);
864+
}
865+
std::size_t RTNAME(Verify2)(const char16_t *x, std::size_t xLen,
866+
const char16_t *set, std::size_t setLen, bool back) {
867+
return ScanVerify<char16_t, true>(x, xLen, set, setLen, back);
868+
}
869+
std::size_t RTNAME(Verify4)(const char32_t *x, std::size_t xLen,
870+
const char32_t *set, std::size_t setLen, bool back) {
871+
return ScanVerify<char32_t, true>(x, xLen, set, setLen, back);
872+
}
873+
874+
void RTNAME(Verify)(Descriptor &result, const Descriptor &string,
875+
const Descriptor &set, const Descriptor *back, int kind,
876+
const char *sourceFile, int sourceLine) {
877+
Terminator terminator{sourceFile, sourceLine};
878+
switch (string.raw().type) {
879+
case CFI_type_char:
880+
ScanVerifyKind<char, true>(result, string, set, back, kind, terminator);
881+
break;
882+
case CFI_type_char16_t:
883+
ScanVerifyKind<char16_t, true>(result, string, set, back, kind, terminator);
884+
break;
885+
case CFI_type_char32_t:
886+
ScanVerifyKind<char32_t, true>(result, string, set, back, kind, terminator);
887+
break;
888+
default:
889+
terminator.Crash(
890+
"VERIFY: bad string type code %d", static_cast<int>(string.raw().type));
891+
}
892+
}
893+
695894
void RTNAME(CharacterMax)(Descriptor &accumulator, const Descriptor &x,
696895
const char *sourceFile, int sourceLine) {
697896
MaxMin<false>(accumulator, x, sourceFile, sourceLine);

flang/runtime/character.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,26 @@ void RTNAME(CharacterMaxLoc)(Descriptor &result, const Descriptor &x,
107107
void RTNAME(CharacterMinLoc)(Descriptor &result, const Descriptor &x,
108108
int dim = 0, const Descriptor *mask = nullptr, int kind = sizeof(int),
109109
bool back = false, const char *sourceFile = nullptr, int sourceLine = 0);
110+
111+
std::size_t RTNAME(Scan1)(
112+
const char *, std::size_t, const char *set, std::size_t, bool back = false);
113+
std::size_t RTNAME(Scan2)(const char16_t *, std::size_t, const char16_t *set,
114+
std::size_t, bool back = false);
115+
std::size_t RTNAME(Scan4)(const char32_t *, std::size_t, const char32_t *set,
116+
std::size_t, bool back = false);
117+
void RTNAME(Scan)(Descriptor &result, const Descriptor &string,
118+
const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
119+
const char *sourceFile = nullptr, int sourceLine = 0);
120+
121+
std::size_t RTNAME(Verify1)(
122+
const char *, std::size_t, const char *set, std::size_t, bool back = false);
123+
std::size_t RTNAME(Verify2)(const char16_t *, std::size_t, const char16_t *set,
124+
std::size_t, bool back = false);
125+
std::size_t RTNAME(Verify4)(const char32_t *, std::size_t, const char32_t *set,
126+
std::size_t, bool back = false);
127+
void RTNAME(Verify)(Descriptor &result, const Descriptor &string,
128+
const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
129+
const char *sourceFile = nullptr, int sourceLine = 0);
110130
}
111131
} // namespace Fortran::runtime
112132
#endif // FORTRAN_RUNTIME_CHARACTER_H_

flang/unittests/Runtime/character.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,24 @@ static void Compare(const char *x, const char *y, std::size_t xBytes,
4646
TestCharCompare(y, x, yBytes, xBytes, -expect);
4747
}
4848

49+
static void Scan(
50+
const char *str, const char *set, bool back, std::size_t expect) {
51+
auto res{RTNAME(Scan1)(str, std::strlen(str), set, std::strlen(set), back)};
52+
if (res != expect) {
53+
Fail() << "Scan(" << str << ',' << set << ",back=" << back << "): got "
54+
<< res << ", should be " << expect << '\n';
55+
}
56+
}
57+
58+
static void Verify(
59+
const char *str, const char *set, bool back, std::size_t expect) {
60+
auto res{RTNAME(Verify1)(str, std::strlen(str), set, std::strlen(set), back)};
61+
if (res != expect) {
62+
Fail() << "Verify(" << str << ',' << set << ",back=" << back << "): got "
63+
<< res << ", should be " << expect << '\n';
64+
}
65+
}
66+
4967
int main() {
5068
StartTests();
5169
for (std::size_t j{0}; j < 8; ++j) {
@@ -55,5 +73,17 @@ int main() {
5573
Compare("abc", "def", 3, 3, -1);
5674
Compare("ab ", "abc", 3, 2, 0);
5775
Compare("abc", "abc", 2, 3, -1);
76+
Scan("abc", "abc", false, 1);
77+
Scan("abc", "abc", true, 3);
78+
Scan("abc", "cde", false, 3);
79+
Scan("abc", "cde", true, 3);
80+
Scan("abc", "x", false, 0);
81+
Scan("", "x", false, 0);
82+
Verify("abc", "abc", false, 0);
83+
Verify("abc", "abc", true, 0);
84+
Verify("abc", "cde", false, 1);
85+
Verify("abc", "cde", true, 2);
86+
Verify("abc", "x", false, 1);
87+
Verify("", "x", false, 0);
5888
return EndTests();
5989
}

0 commit comments

Comments
 (0)