-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][debug] Handle allocatable strings. #95906
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The allocatable strings also use DIStringType but provide dwarf expression to find the location and length of the string. With this change in place, the debugging of the allocatable strings looks like this: character(len=:), allocatable :: first character(len=:), allocatable :: second character(len=:), allocatable :: third first = 'Mount' second = 'Everest' third = first // " " // second print *, third (gdb) p third $1 = "" (gdb) n 18 print *, third (gdb) p third $2 = 'Mount Everest' (gdb) ptype third type = character (13)
@llvm/pr-subscribers-flang-fir-hlfir Author: Abid Qadeer (abidh) ChangesThe allocatable strings also use DIStringType but provide dwarf expressions to find the location and length of the string. With this change in place, the debugging of the allocatable strings looks like this: character(len=:), allocatable :: first first = 'Mount' (gdb) p third Full diff: https://github.com/llvm/llvm-project/pull/95906.diff 4 Files Affected:
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
index 407ecc8e327b4..996b2c43a1159 100644
--- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
@@ -61,9 +61,11 @@ DebugTypeGenerator::DebugTypeGenerator(mlir::ModuleOp m)
// descriptors like lower_bound and extent for each dimension.
mlir::Type llvmDimsType = getDescFieldTypeModel<kDimsPosInBox>()(context);
mlir::Type llvmPtrType = getDescFieldTypeModel<kAddrPosInBox>()(context);
+ mlir::Type llvmLenType = getDescFieldTypeModel<kElemLenPosInBox>()(context);
dimsOffset = getComponentOffset<kDimsPosInBox>(*dl, context, llvmDimsType);
dimsSize = dl->getTypeSize(llvmDimsType);
ptrSize = dl->getTypeSize(llvmPtrType);
+ lenOffset = getComponentOffset<kElemLenPosInBox>(*dl, context, llvmLenType);
}
static mlir::LLVM::DITypeAttr genBasicType(mlir::MLIRContext *context,
@@ -192,10 +194,8 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType(
mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType(
fir::CharacterType charTy, mlir::LLVM::DIFileAttr fileAttr,
- mlir::LLVM::DIScopeAttr scope, mlir::Location loc) {
+ mlir::LLVM::DIScopeAttr scope, mlir::Location loc, bool allocatable) {
mlir::MLIRContext *context = module.getContext();
- if (!charTy.hasConstantLen())
- return genPlaceholderType(context);
// DWARF 5 says the following about the character encoding in 5.1.1.2.
// "DW_ATE_ASCII and DW_ATE_UCS specify encodings for the Fortran 2003
@@ -205,16 +205,38 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType(
if (charTy.getFKind() != 1)
encoding = llvm::dwarf::DW_ATE_UCS;
+ uint64_t sizeInBits = 0;
+ mlir::LLVM::DIExpressionAttr lenExpr = nullptr;
+ mlir::LLVM::DIExpressionAttr locExpr = nullptr;
+
+ if (allocatable) {
+ llvm::SmallVector<mlir::LLVM::DIExpressionElemAttr> ops;
+ auto addOp = [&](unsigned opc, llvm::ArrayRef<uint64_t> vals) {
+ ops.push_back(mlir::LLVM::DIExpressionElemAttr::get(context, opc, vals));
+ };
+ addOp(llvm::dwarf::DW_OP_push_object_address, {});
+ addOp(llvm::dwarf::DW_OP_plus_uconst, {lenOffset});
+ lenExpr = mlir::LLVM::DIExpressionAttr::get(context, ops);
+ ops.clear();
+
+ addOp(llvm::dwarf::DW_OP_push_object_address, {});
+ addOp(llvm::dwarf::DW_OP_deref, {});
+ locExpr = mlir::LLVM::DIExpressionAttr::get(context, ops);
+ } else if (charTy.hasConstantLen()) {
+ sizeInBits =
+ charTy.getLen() * kindMapping.getCharacterBitsize(charTy.getFKind());
+ } else {
+ return genPlaceholderType(context);
+ }
+
// FIXME: Currently the DIStringType in llvm does not have the option to set
// type of the underlying character. This restricts out ability to represent
// string with non-default characters. Please see issue #95440 for more
// details.
return mlir::LLVM::DIStringTypeAttr::get(
context, llvm::dwarf::DW_TAG_string_type,
- mlir::StringAttr::get(context, ""),
- charTy.getLen() * kindMapping.getCharacterBitsize(charTy.getFKind()),
- /*alignInBits=*/0, /*stringLength=*/nullptr,
- /*stringLengthExp=*/nullptr, /*stringLocationExp=*/nullptr, encoding);
+ mlir::StringAttr::get(context, ""), sizeInBits, /*alignInBits=*/0,
+ /*stringLength=*/nullptr, lenExpr, locExpr, encoding);
}
mlir::LLVM::DITypeAttr DebugTypeGenerator::convertPointerLikeType(
@@ -229,6 +251,9 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertPointerLikeType(
if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(elTy))
return convertBoxedSequenceType(seqTy, fileAttr, scope, loc, genAllocated,
genAssociated);
+ if (auto charTy = mlir::dyn_cast_or_null<fir::CharacterType>(elTy))
+ return convertCharacterType(charTy, fileAttr, scope, loc,
+ /*allocatable=*/true);
mlir::LLVM::DITypeAttr elTyAttr = convertType(elTy, fileAttr, scope, loc);
@@ -274,7 +299,8 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr,
} else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(Ty)) {
return convertSequenceType(seqTy, fileAttr, scope, loc);
} else if (auto charTy = mlir::dyn_cast_or_null<fir::CharacterType>(Ty)) {
- return convertCharacterType(charTy, fileAttr, scope, loc);
+ return convertCharacterType(charTy, fileAttr, scope, loc,
+ /*allocatable=*/false);
} else if (auto boxTy = mlir::dyn_cast_or_null<fir::BoxType>(Ty)) {
auto elTy = boxTy.getElementType();
if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(elTy))
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
index 7816363e98821..567f26aa41912 100644
--- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
@@ -48,7 +48,8 @@ class DebugTypeGenerator {
mlir::LLVM::DITypeAttr convertCharacterType(fir::CharacterType charTy,
mlir::LLVM::DIFileAttr fileAttr,
mlir::LLVM::DIScopeAttr scope,
- mlir::Location loc);
+ mlir::Location loc,
+ bool allocatable);
mlir::LLVM::DITypeAttr
convertPointerLikeType(mlir::Type elTy, mlir::LLVM::DIFileAttr fileAttr,
@@ -60,6 +61,7 @@ class DebugTypeGenerator {
std::uint64_t dimsSize;
std::uint64_t dimsOffset;
std::uint64_t ptrSize;
+ std::uint64_t lenOffset;
};
} // namespace fir
diff --git a/flang/test/Integration/debug-char-type-1.f90 b/flang/test/Integration/debug-char-type-1.f90
index a0aebd3125c6e..5068663aa9e28 100644
--- a/flang/test/Integration/debug-char-type-1.f90
+++ b/flang/test/Integration/debug-char-type-1.f90
@@ -2,6 +2,7 @@
module helper
character(len=40) :: str
+ character(len=:), allocatable :: str2
end module helper
program test
@@ -11,11 +12,14 @@ program test
first = '3.14 = π'
second = 'Fortran'
str = 'Hello World!'
+ str2 = 'A quick brown fox jumps over a lazy dog'
end program test
! CHECK-DAG: !DIGlobalVariable(name: "str"{{.*}}type: ![[TY40:[0-9]+]]{{.*}})
! CHECK-DAG: ![[TY40]] = !DIStringType(size: 320, encoding: DW_ATE_ASCII)
+! CHECK-DAG: !DIGlobalVariable(name: "str2"{{.*}}type: ![[TY:[0-9]+]]{{.*}})
+! CHECK-DAG: ![[TY]] = !DIStringType(stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8), stringLocationExpression: !DIExpression(DW_OP_push_object_address, DW_OP_deref), encoding: DW_ATE_ASCII)
! CHECK-DAG: !DILocalVariable(name: "first"{{.*}}type: ![[TY8:[0-9]+]])
! CHECK-DAG: ![[TY8]] = !DIStringType(size: 256, encoding: DW_ATE_UCS)
! CHECK-DAG: !DILocalVariable(name: "second"{{.*}}type: ![[TY10:[0-9]+]])
-! CHECK-DAG: ![[TY10]] = !DIStringType(size: 80, encoding: DW_ATE_ASCII)
\ No newline at end of file
+! CHECK-DAG: ![[TY10]] = !DIStringType(size: 80, encoding: DW_ATE_ASCII)
diff --git a/flang/test/Transforms/debug-char-type-1.fir b/flang/test/Transforms/debug-char-type-1.fir
index cdce3b7b8b334..630b52d96cb85 100644
--- a/flang/test/Transforms/debug-char-type-1.fir
+++ b/flang/test/Transforms/debug-char-type-1.fir
@@ -9,6 +9,12 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<>} {
%0 = fir.zero_bits !fir.char<4,20>
fir.has_value %0 : !fir.char<4,20>
} loc(#loc1)
+ fir.global @_QMhelperEstr3 : !fir.box<!fir.heap<!fir.char<1,?>>> {
+ %c0 = arith.constant 0 : index
+ %0 = fir.zero_bits !fir.heap<!fir.char<1,?>>
+ %1 = fir.embox %0 typeparams %c0 : (!fir.heap<!fir.char<1,?>>, index) -> !fir.box<!fir.heap<!fir.char<1,?>>>
+ fir.has_value %1 : !fir.box<!fir.heap<!fir.char<1,?>>>
+ } loc(#loc1)
}
#loc1 = loc("string.f90":1:1)
@@ -16,4 +22,5 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<>} {
// CHECK-DAG: #llvm.di_global_variable<{{.*}}name = "str1"{{.*}}type = #[[TY1]]{{.*}}>
// CHECK-DAG: #[[TY2:.*]] = #llvm.di_string_type<tag = DW_TAG_string_type, name = "", sizeInBits = 640, encoding = DW_ATE_UCS>
// CHECK-DAG: #llvm.di_global_variable<{{.*}}name = "str2"{{.*}}type = #[[TY2]]{{.*}}>
-
+// CHECK-DAG: #[[TY3:.*]] = #llvm.di_string_type<tag = DW_TAG_string_type{{.*}}stringLengthExp = <[DW_OP_push_object_address, DW_OP_plus_uconst(8)]>, stringLocationExp = <[DW_OP_push_object_address, DW_OP_deref]>, encoding = DW_ATE_ASCII>
+// CHECK-DAG: #llvm.di_global_variable<{{.*}}name = "str3"{{.*}}type = #[[TY3]]{{.*}}>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, minor comment/question inlined.
@@ -192,10 +194,8 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( | |||
|
|||
mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType( | |||
fir::CharacterType charTy, mlir::LLVM::DIFileAttr fileAttr, | |||
mlir::LLVM::DIScopeAttr scope, mlir::Location loc) { | |||
mlir::LLVM::DIScopeAttr scope, mlir::Location loc, bool allocatable) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect you will have to make this name a bit more generic (something like 'hasDescriptor') for later patch with debug info for CHARACTER POINTER (and assumed shape character arrays).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is good observation. I have fixed the name.
A question unrelated to this PR. You mentioned assumed shape character array. Do you mean something like str
in the ff
below. I have been working on it and it had to follow typeparams
on fircg.ext_declare
to get the length. Essentially I create an artificial variable in debug info which will point to 2nd result of unboxchar
and tell dwarf that this variable contains the length of the string. The length of the string in DIStringType
can be described by an integer, an expression or a variable.
I was wondering if my approach to get length of the string in this case seems right to you?
program test
character(len=20) :: first;
first="Hello World!"
call ff(first)
contains
subroutine ff(str)
character(len=*) :: str
str="Bye"
print *, str
end subroutine ff
end program test
func.func private @_QFPff(%arg0: !fir.ref<!fir.char<1,?>> {fir.bindc_name = "str"}, %arg1: i64 ) {
%0 = fir.emboxchar %arg0, %arg1 : (!fir.ref<!fir.char<1,?>>, i64) -> !fir.boxchar<1>
%1 = fir.undefined !fir.dscope
%2:2 = fir.unboxchar %0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
%3 = fircg.ext_declare %2#0 typeparams %2#1 dummy_scope %1 {uniq_name = "_QFFffEstr"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,?>> loc(#loc8)
...
}```
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant more the assumed shape array cases where the length is inside the decsriptor:
subroutine test(c)
character(*) :: c(:)
c = "hello"
print *, c(1)
end subroutine
I was wondering if my approach to get length of the string in this case seems right to you?
When you say "create an artificial variable", you mean to create alloca for it, or to use something llvm.dbg.value? I would favor the dbg.value if that works. In any case, yes, the typeparams
argument is the one to use for the length here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is indeed a tricky case. The issue here is that same descriptor is need to generate the length expression of the array type and also its members which are character types. I am not sure at the moment the best way to handle it. I tried this case with gfortran and classic flang and I could not get the correct value of c
in GDB for both of them. I will open a ticket to track this issue.
Rename allocatable parameter to hasDescriptor.
Any further comment on this? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM once Jean is happy
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, LGTM
The handling of `PointerType` is similar to `HeapType`. The only difference is that allocated flag is generated for `HeapType` and associated flag for `PointerType`. The tests for pointer to allocatable strings are disabled for now. I will enable them once #95906 is merged. The debugging in GDB looks like this: integer, pointer :: par2(:) integer, target, allocatable :: ar2(:) integer, target :: sc integer, pointer :: psc allocate(ar2(4)) par2 => ar2 psc => sc 19 par2 => ar2 (gdb) p par2 $3 = <not associated> (gdb) n 20 do i=1,5 (gdb) p par2 $4 = (0, 0, 0, 0) (gdb) ptype par2 type = integer (4) (gdb) p sc $5 = 3 (gdb) p psc $6 = (PTR TO -> ( integer )) 0x7fffffffda24 (gdb) p *psc $7 = 3
The allocatable strings also use DIStringType but provide dwarf expressions to find the location and length of the string. With this change in place, the debugging of the allocatable strings looks like this: character(len=:), allocatable :: first character(len=:), allocatable :: second character(len=:), allocatable :: third first = 'Mount' second = 'Everest' third = first // " " // second print *, third (gdb) p third $1 = "" (gdb) n 18 print *, third (gdb) p third $2 = 'Mount Everest' (gdb) ptype third type = character (13)
The handling of `PointerType` is similar to `HeapType`. The only difference is that allocated flag is generated for `HeapType` and associated flag for `PointerType`. The tests for pointer to allocatable strings are disabled for now. I will enable them once llvm#95906 is merged. The debugging in GDB looks like this: integer, pointer :: par2(:) integer, target, allocatable :: ar2(:) integer, target :: sc integer, pointer :: psc allocate(ar2(4)) par2 => ar2 psc => sc 19 par2 => ar2 (gdb) p par2 $3 = <not associated> (gdb) n 20 do i=1,5 (gdb) p par2 $4 = (0, 0, 0, 0) (gdb) ptype par2 type = integer (4) (gdb) p sc $5 = 3 (gdb) p psc $6 = (PTR TO -> ( integer )) 0x7fffffffda24 (gdb) p *psc $7 = 3
The allocatable strings also use DIStringType but provide dwarf expressions to find the location and length of the string. With this change in place, the debugging of the allocatable strings looks like this:
character(len=:), allocatable :: first
character(len=:), allocatable :: second
character(len=:), allocatable :: third
first = 'Mount'
second = 'Everest'
third = first // " " // second
print *, third
(gdb) p third
$1 = ""
(gdb) n
18 print *, third
(gdb) p third
$2 = 'Mount Everest'
(gdb) ptype third
type = character (13)