Skip to content

Commit 0a146a9

Browse files
[AIX] asm output: use character literals in byte lists for strings
This patch improves the assembly output produced for string literals by using character literals in byte lists. This provides the benefits of having printable characters appear as such in the assembly output and of having strings kept as logical units on the same line. Reviewed By: daltenty Differential Revision: https://reviews.llvm.org/D80953
1 parent c6b18cf commit 0a146a9

File tree

7 files changed

+108
-53
lines changed

7 files changed

+108
-53
lines changed

llvm/include/llvm/MC/MCAsmInfo.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment };
5454
/// This class is intended to be used as a base class for asm
5555
/// properties and features specific to the target.
5656
class MCAsmInfo {
57+
public:
58+
/// Assembly character literal syntax types.
59+
enum AsmCharLiteralSyntax {
60+
ACLS_Unknown, /// Unknown; character literals not used by LLVM for this
61+
/// target.
62+
ACLS_SingleQuotePrefix, /// The desired character is prefixed by a single
63+
/// quote, e.g., `'A`.
64+
};
65+
5766
protected:
5867
//===------------------------------------------------------------------===//
5968
// Properties to be set by the target writer, used to configure asm printer.
@@ -200,6 +209,16 @@ class MCAsmInfo {
200209
/// doesn't support this, it can be set to null. Defaults to "\t.asciz\t"
201210
const char *AscizDirective;
202211

212+
/// This directive accepts a comma-separated list of bytes for emission as a
213+
/// string of bytes. For targets that do not support this, it shall be set to
214+
/// null. Defaults to null.
215+
const char *ByteListDirective = nullptr;
216+
217+
/// Form used for character literals in the assembly syntax. Useful for
218+
/// producing strings as byte lists. If a target does not use or support
219+
/// this, it shall be set to ACLS_Unknown. Defaults to ACLS_Unknown.
220+
AsmCharLiteralSyntax CharacterLiteralSyntax = ACLS_Unknown;
221+
203222
/// These directives are used to output some unit of integer data to the
204223
/// current section. If a data directive is set to null, smaller data
205224
/// directives will be used to emit the large sizes. Defaults to "\t.byte\t",
@@ -562,6 +581,10 @@ class MCAsmInfo {
562581
}
563582
const char *getAsciiDirective() const { return AsciiDirective; }
564583
const char *getAscizDirective() const { return AscizDirective; }
584+
const char *getByteListDirective() const { return ByteListDirective; }
585+
AsmCharLiteralSyntax characterLiteralSyntax() const {
586+
return CharacterLiteralSyntax;
587+
}
565588
bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; }
566589
unsigned getTextAlignFillValue() const { return TextAlignFillValue; }
567590
const char *getGlobalDirective() const { return GlobalDirective; }

llvm/lib/MC/MCAsmInfoXCOFF.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
2424
ZeroDirectiveSupportsNonZeroValue = false;
2525
AsciiDirective = nullptr; // not supported
2626
AscizDirective = nullptr; // not supported
27+
ByteListDirective = "\t.byte\t";
28+
CharacterLiteralSyntax = ACLS_SingleQuotePrefix;
2729

2830
// Use .vbyte for data definition to avoid directives that apply an implicit
2931
// alignment.

llvm/lib/MC/MCAsmStreamer.cpp

Lines changed: 72 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,47 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
971971

972972
static inline char toOctal(int X) { return (X&7)+'0'; }
973973

974+
static void PrintByteList(StringRef Data, raw_ostream &OS,
975+
MCAsmInfo::AsmCharLiteralSyntax ACLS) {
976+
assert(!Data.empty() && "Cannot generate an empty list.");
977+
const auto printCharacterInOctal = [&OS](unsigned char C) {
978+
OS << '0';
979+
OS << toOctal(C >> 6);
980+
OS << toOctal(C >> 3);
981+
OS << toOctal(C >> 0);
982+
};
983+
const auto printOneCharacterFor = [printCharacterInOctal](
984+
auto printOnePrintingCharacter) {
985+
return [printCharacterInOctal, printOnePrintingCharacter](unsigned char C) {
986+
if (isPrint(C)) {
987+
printOnePrintingCharacter(static_cast<char>(C));
988+
return;
989+
}
990+
printCharacterInOctal(C);
991+
};
992+
};
993+
const auto printCharacterList = [Data, &OS](const auto &printOneCharacter) {
994+
const auto BeginPtr = Data.begin(), EndPtr = Data.end();
995+
for (const unsigned char C : make_range(BeginPtr, EndPtr - 1)) {
996+
printOneCharacter(C);
997+
OS << ',';
998+
}
999+
printOneCharacter(*(EndPtr - 1));
1000+
};
1001+
switch (ACLS) {
1002+
case MCAsmInfo::ACLS_Unknown:
1003+
printCharacterList(printCharacterInOctal);
1004+
return;
1005+
case MCAsmInfo::ACLS_SingleQuotePrefix:
1006+
printCharacterList(printOneCharacterFor([&OS](char C) {
1007+
const char AsmCharLitBuf[2] = {'\'', C};
1008+
OS << StringRef(AsmCharLitBuf, sizeof(AsmCharLitBuf));
1009+
}));
1010+
return;
1011+
}
1012+
llvm_unreachable("Invalid AsmCharLiteralSyntax value!");
1013+
}
1014+
9741015
static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
9751016
OS << '"';
9761017

@@ -1009,33 +1050,42 @@ void MCAsmStreamer::emitBytes(StringRef Data) {
10091050
"Cannot emit contents before setting section!");
10101051
if (Data.empty()) return;
10111052

1012-
// If only single byte is provided or no ascii or asciz directives is
1013-
// supported, emit as vector of 8bits data.
1014-
if (Data.size() == 1 ||
1015-
!(MAI->getAscizDirective() || MAI->getAsciiDirective())) {
1016-
if (MCTargetStreamer *TS = getTargetStreamer()) {
1017-
TS->emitRawBytes(Data);
1053+
const auto emitAsString = [this](StringRef Data) {
1054+
// If the data ends with 0 and the target supports .asciz, use it, otherwise
1055+
// use .ascii or a byte-list directive
1056+
if (MAI->getAscizDirective() && Data.back() == 0) {
1057+
OS << MAI->getAscizDirective();
1058+
Data = Data.substr(0, Data.size() - 1);
1059+
} else if (LLVM_LIKELY(MAI->getAsciiDirective())) {
1060+
OS << MAI->getAsciiDirective();
1061+
} else if (MAI->getByteListDirective()) {
1062+
OS << MAI->getByteListDirective();
1063+
PrintByteList(Data, OS, MAI->characterLiteralSyntax());
1064+
EmitEOL();
1065+
return true;
10181066
} else {
1019-
const char *Directive = MAI->getData8bitsDirective();
1020-
for (const unsigned char C : Data.bytes()) {
1021-
OS << Directive << (unsigned)C;
1022-
EmitEOL();
1023-
}
1067+
return false;
10241068
}
1069+
1070+
PrintQuotedString(Data, OS);
1071+
EmitEOL();
1072+
return true;
1073+
};
1074+
1075+
if (Data.size() != 1 && emitAsString(Data))
10251076
return;
1026-
}
10271077

1028-
// If the data ends with 0 and the target supports .asciz, use it, otherwise
1029-
// use .ascii
1030-
if (MAI->getAscizDirective() && Data.back() == 0) {
1031-
OS << MAI->getAscizDirective();
1032-
Data = Data.substr(0, Data.size()-1);
1033-
} else {
1034-
OS << MAI->getAsciiDirective();
1078+
// Only single byte is provided or no ascii, asciz, or byte-list directives
1079+
// are applicable. Emit as vector of individual 8bits data elements.
1080+
if (MCTargetStreamer *TS = getTargetStreamer()) {
1081+
TS->emitRawBytes(Data);
1082+
return;
1083+
}
1084+
const char *Directive = MAI->getData8bitsDirective();
1085+
for (const unsigned char C : Data.bytes()) {
1086+
OS << Directive << (unsigned)C;
1087+
EmitEOL();
10351088
}
1036-
1037-
PrintQuotedString(Data, OS);
1038-
EmitEOL();
10391089
}
10401090

10411091
void MCAsmStreamer::emitBinaryData(StringRef Data) {
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s
3+
4+
@str = constant [256 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\7F\80\81\82\83\84\85\86\87\88\89\8A\8B\8C\8D\8E\8F\90\91\92\93\94\95\96\97\98\99\9A\9B\9C\9D\9E\9F\A0\A1\A2\A3\A4\A5\A6\A7\A8\A9\AA\AB\AC\AD\AE\AF\B0\B1\B2\B3\B4\B5\B6\B7\B8\B9\BA\BB\BC\BD\BE\BF\C0\C1\C2\C3\C4\C5\C6\C7\C8\C9\CA\CB\CC\CD\CE\CF\D0\D1\D2\D3\D4\D5\D6\D7\D8\D9\DA\DB\DC\DD\DE\DF\E0\E1\E2\E3\E4\E5\E6\E7\E8\E9\EA\EB\EC\ED\EE\EF\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\FA\FB\FC\FD\FE\FF\00", align 1
5+
6+
; CHECK-LABEL:str:
7+
; CHECK-NEXT: .byte 0001,0002,0003,0004,0005,0006,0007,0010,0011,0012,0013,0014,0015,0016,0017,0020,0021,0022,0023,0024,0025,0026,0027,0030,0031,0032,0033,0034,0035,0036,0037,' ,'!,'",'#,'$,'%,'&,'','(,'),'*,'+,',,'-,'.,'/,'0,'1,'2,'3,'4,'5,'6,'7,'8,'9,':,';,'<,'=,'>,'?,'@,'A,'B,'C,'D,'E,'F,'G,'H,'I,'J,'K,'L,'M,'N,'O,'P,'Q,'R,'S,'T,'U,'V,'W,'X,'Y,'Z,'[,'\,'],'^,'_,'`,'a,'b,'c,'d,'e,'f,'g,'h,'i,'j,'k,'l,'m,'n,'o,'p,'q,'r,'s,'t,'u,'v,'w,'x,'y,'z,'{,'|,'},'~,0177,0200,0201,0202,0203,0204,0205,0206,0207,0210,0211,0212,0213,0214,0215,0216,0217,0220,0221,0222,0223,0224,0225,0226,0227,0230,0231,0232,0233,0234,0235,0236,0237,0240,0241,0242,0243,0244,0245,0246,0247,0250,0251,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267,0270,0271,0272,0273,0274,0275,0276,0277,0300,0301,0302,0303,0304,0305,0306,0307,0310,0311,0312,0313,0314,0315,0316,0317,0320,0321,0322,0323,0324,0325,0326,0327,0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0345,0346,0347,0350,0351,0352,0353,0354,0355,0356,0357,0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0372,0373,0374,0375,0376,0377,0000

llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,7 @@
8686

8787
; CHECK: .globl chrarray
8888
; CHECK-NEXT: chrarray:
89-
; CHECK-NEXT: .byte 97
90-
; CHECK-NEXT: .byte 98
91-
; CHECK-NEXT: .byte 99
92-
; CHECK-NEXT: .byte 100
89+
; CHECK-NEXT: .byte 'a,'b,'c,'d
9390

9491
; CHECK: .globl dblarr
9592
; CHECK-NEXT: .align 3

llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,30 +41,9 @@ entry:
4141
; CHECK-NEXT: .vbyte 4, 0 # 0x0
4242
; CHECK-NEXT: .csect .rodata.str1.1[RO],2
4343
; CHECK-NEXT: L..strA:
44-
; CHECK-NEXT: .byte 104
45-
; CHECK-NEXT: .byte 101
46-
; CHECK-NEXT: .byte 108
47-
; CHECK-NEXT: .byte 108
48-
; CHECK-NEXT: .byte 111
49-
; CHECK-NEXT: .byte 32
50-
; CHECK-NEXT: .byte 119
51-
; CHECK-NEXT: .byte 111
52-
; CHECK-NEXT: .byte 114
53-
; CHECK-NEXT: .byte 108
54-
; CHECK-NEXT: .byte 100
55-
; CHECK-NEXT: .byte 33
56-
; CHECK-NEXT: .byte 10
57-
; CHECK-NEXT: .byte 0
44+
; CHECK-NEXT: .byte 'h,'e,'l,'l,'o,' ,'w,'o,'r,'l,'d,'!,0012,0000
5845
; CHECK-NEXT: L...str:
59-
; CHECK-NEXT: .byte 97
60-
; CHECK-NEXT: .byte 98
61-
; CHECK-NEXT: .byte 99
62-
; CHECK-NEXT: .byte 100
63-
; CHECK-NEXT: .byte 101
64-
; CHECK-NEXT: .byte 102
65-
; CHECK-NEXT: .byte 103
66-
; CHECK-NEXT: .byte 104
67-
; CHECK-NEXT: .byte 0
46+
; CHECK-NEXT: .byte 'a,'b,'c,'d,'e,'f,'g,'h,0000
6847

6948
; CHECKOBJ: 00000010 <.rodata.str2.2>:
7049
; CHECKOBJ-NEXT: 10: 01 08 01 10

llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,7 @@
5353
; CHECK64-NEXT: .vbyte 8, 0x408c200000000000
5454
; CHECK-NEXT: .globl const_chrarray
5555
; CHECK-NEXT: const_chrarray:
56-
; CHECK-NEXT: .byte 97
57-
; CHECK-NEXT: .byte 98
58-
; CHECK-NEXT: .byte 99
59-
; CHECK-NEXT: .byte 100
56+
; CHECK-NEXT: .byte 'a,'b,'c,'d
6057
; CHECK-NEXT: .globl const_dblarr
6158
; CHECK-NEXT: .align 3
6259
; CHECK-NEXT: const_dblarr:

0 commit comments

Comments
 (0)