Skip to content

Commit df8dda6

Browse files
committed
Add methods to data extractor for extracting bytes and fixed length C strings.
Summary: These modificaitons will be used in D74883. Fixed length C strings can have trailing NULLs or sometimes spaces (BSD archive files), so the fixed length C string defaults to stripping trailing NULLs, but can have the arguments specify to remove one or more kinds of spaces if needed. This is used to extract fixed length C strings from ELF NOTEs in D74883. Reviewers: labath, dblaikie, aprantl Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D74991
1 parent 7f9f027 commit df8dda6

File tree

3 files changed

+118
-0
lines changed

3 files changed

+118
-0
lines changed

llvm/include/llvm/Support/DataExtractor.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,62 @@ class DataExtractor {
141141
/// a default-initialized StringRef will be returned.
142142
StringRef getCStrRef(uint64_t *offset_ptr) const;
143143

144+
/// Extract a fixed length string from \a *OffsetPtr and consume \a Length
145+
/// bytes.
146+
///
147+
/// Returns a StringRef for the string from the data at the offset
148+
/// pointed to by \a OffsetPtr. A fixed length C string will be extracted
149+
/// and the \a OffsetPtr will be advanced by \a Length bytes.
150+
///
151+
/// \param[in,out] OffsetPtr
152+
/// A pointer to an offset within the data that will be advanced
153+
/// by the appropriate number of bytes if the value is extracted
154+
/// correctly. If the offset is out of bounds or there are not
155+
/// enough bytes to extract this value, the offset will be left
156+
/// unmodified.
157+
///
158+
/// \param[in] Length
159+
/// The length of the fixed length string to extract. If there are not
160+
/// enough bytes in the data to extract the full string, the offset will
161+
/// be left unmodified.
162+
///
163+
/// \param[in] TrimChars
164+
/// A set of characters to trim from the end of the string. Fixed length
165+
/// strings are commonly either NULL terminated by one or more zero
166+
/// bytes. Some clients have one or more spaces at the end of the string,
167+
/// but a good default is to trim the NULL characters.
168+
///
169+
/// \return
170+
/// A StringRef for the C string value in the data. If the offset
171+
/// pointed to by \a OffsetPtr is out of bounds, or if the
172+
/// offset plus the length of the C string is out of bounds,
173+
/// a default-initialized StringRef will be returned.
174+
StringRef getFixedLengthString(uint64_t *OffsetPtr,
175+
uint64_t Length, StringRef TrimChars = {"\0", 1}) const;
176+
177+
/// Extract a fixed number of bytes from the specified offset.
178+
///
179+
/// Returns a StringRef for the bytes from the data at the offset
180+
/// pointed to by \a OffsetPtr. A fixed length C string will be extracted
181+
/// and the \a OffsetPtr will be advanced by \a Length bytes.
182+
///
183+
/// \param[in,out] OffsetPtr
184+
/// A pointer to an offset within the data that will be advanced
185+
/// by the appropriate number of bytes if the value is extracted
186+
/// correctly. If the offset is out of bounds or there are not
187+
/// enough bytes to extract this value, the offset will be left
188+
/// unmodified.
189+
///
190+
/// \param[in] Length
191+
/// The number of bytes to extract. If there are not enough bytes in the
192+
/// data to extract all of the bytes, the offset will be left unmodified.
193+
///
194+
/// \return
195+
/// A StringRef for the extracted bytes. If the offset pointed to by
196+
/// \a OffsetPtr is out of bounds, or if the offset plus the length
197+
/// is out of bounds, a default-initialized StringRef will be returned.
198+
StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length) const;
199+
144200
/// Extract an unsigned integer of size \a byte_size from \a
145201
/// *offset_ptr.
146202
///

llvm/lib/Support/DataExtractor.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,21 @@ StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const {
171171
return StringRef();
172172
}
173173

174+
StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
175+
uint64_t Length,
176+
StringRef TrimChars) const {
177+
StringRef Bytes(getBytes(OffsetPtr, Length));
178+
return Bytes.trim(TrimChars);
179+
}
180+
181+
StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length) const {
182+
if (!isValidOffsetForDataOfSize(*OffsetPtr, Length))
183+
return StringRef();
184+
StringRef Result = Data.substr(*OffsetPtr, Length);
185+
*OffsetPtr += Length;
186+
return Result;
187+
}
188+
174189
uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr,
175190
llvm::Error *Err) const {
176191
assert(*offset_ptr <= Data.size());

llvm/unittests/Support/DataExtractorTest.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,4 +278,51 @@ TEST(DataExtractorTest, size) {
278278
DataExtractor DE2(ArrayRef<uint8_t>(Data), false, 8);
279279
EXPECT_EQ(DE2.size(), sizeof(Data));
280280
}
281+
282+
TEST(DataExtractorTest, FixedLengthString) {
283+
const char Data[] = "hello\x00\x00\x00world \thola\x00";
284+
DataExtractor DE(StringRef(Data, sizeof(Data)-1), false, 8);
285+
uint64_t Offset = 0;
286+
StringRef Str;
287+
// Test extracting too many bytes doesn't modify Offset and returns None.
288+
Str = DE.getFixedLengthString(&Offset, sizeof(Data));
289+
EXPECT_TRUE(Str.empty());
290+
EXPECT_EQ(Offset, 0u);
291+
292+
// Test extracting a fixed width C string with trailing NULL characters.
293+
Str = DE.getFixedLengthString(&Offset, 8);
294+
EXPECT_EQ(Offset, 8u);
295+
EXPECT_EQ(Str.size(), 5u);
296+
EXPECT_EQ(Str, "hello");
297+
// Test extracting a fixed width C string with trailing space and tab
298+
// characters.
299+
Str = DE.getFixedLengthString(&Offset, 8, " \t");
300+
EXPECT_EQ(Offset, 16u);
301+
EXPECT_EQ(Str.size(), 5u);
302+
EXPECT_EQ(Str, "world");
303+
// Now extract a normal C string.
304+
Str = DE.getCStrRef(&Offset);
305+
EXPECT_EQ(Str.size(), 4u);
306+
EXPECT_EQ(Str, "hola");
307+
}
308+
309+
310+
TEST(DataExtractorTest, GetBytes) {
311+
// Use data with an embedded NULL character for good measure.
312+
const char Data[] = "\x01\x02\x00\x04";
313+
StringRef Bytes(Data, sizeof(Data)-1);
314+
DataExtractor DE(Bytes, false, 8);
315+
uint64_t Offset = 0;
316+
StringRef Str;
317+
// Test extracting too many bytes doesn't modify Offset and returns None.
318+
Str = DE.getBytes(&Offset, sizeof(Data));
319+
EXPECT_TRUE(Str.empty());
320+
EXPECT_EQ(Offset, 0u);
321+
// Test extracting 4 bytes from the stream.
322+
Str = DE.getBytes(&Offset, 4);
323+
EXPECT_EQ(Offset, 4u);
324+
EXPECT_EQ(Str.size(), 4u);
325+
EXPECT_EQ(Str, Bytes);
326+
}
327+
281328
}

0 commit comments

Comments
 (0)