Skip to content

Commit 9c6eaa5

Browse files
authored
Merge pull request RustPython#3164 from moreal/correct-pystr-startswith
Fix `PyStr::startswith`
2 parents 9ee566b + 319914d commit 9c6eaa5

File tree

5 files changed

+79
-26
lines changed

5 files changed

+79
-26
lines changed

Lib/test/test_htmlparser.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,6 @@ def test_get_starttag_text(self):
266266
("starttag", "foo:bar", [("one", "1"), ("two", "2")]),
267267
("starttag_text", s)])
268268

269-
# TODO: RUSTPYTHON
270-
@unittest.expectedFailure
271269
def test_cdata_content(self):
272270
contents = [
273271
'<!-- not a comment --> &not-an-entity-ref;',

vm/src/anystr.rs

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,32 @@ pub struct StartsEndsWithArgs {
6161
}
6262

6363
impl StartsEndsWithArgs {
64-
fn get_value(self, len: usize) -> (PyObjectRef, std::ops::Range<usize>) {
65-
let range = adjust_indices(self.start, self.end, len);
64+
pub fn get_value(self, len: usize) -> (PyObjectRef, Option<std::ops::Range<usize>>) {
65+
let range = if self.start.is_some() || self.end.is_some() {
66+
Some(adjust_indices(self.start, self.end, len))
67+
} else {
68+
None
69+
};
6670
(self.affix, range)
6771
}
72+
73+
#[inline]
74+
pub fn prepare<'s, S, F>(self, s: &'s S, len: usize, substr: F) -> Option<(PyObjectRef, &'s S)>
75+
where
76+
S: ?Sized + AnyStr<'s>,
77+
F: Fn(&'s S, std::ops::Range<usize>) -> &'s S,
78+
{
79+
let (affix, range) = self.get_value(len);
80+
let substr = if let Some(range) = range {
81+
if !range.is_normal() {
82+
return None;
83+
}
84+
substr(s, range)
85+
} else {
86+
s
87+
};
88+
Some((affix, substr))
89+
}
6890
}
6991

7092
fn saturate_to_isize(py_int: PyIntRef) -> isize {
@@ -145,7 +167,9 @@ pub trait AnyStr<'s>: 's {
145167
// FIXME: get_chars is expensive for str
146168
fn get_chars(&self, range: std::ops::Range<usize>) -> &Self;
147169
fn bytes_len(&self) -> usize;
148-
// fn chars_len(&self) -> usize; // cannot access to cache here
170+
// NOTE: str::chars().count() consumes the O(n) time. But pystr::char_len does cache.
171+
// So using chars_len directly is too expensive and the below method shouldn't be implemented.
172+
// fn chars_len(&self) -> usize;
149173
fn is_empty(&self) -> bool;
150174

151175
fn py_add(&self, other: &Self) -> Self::Container {
@@ -191,7 +215,7 @@ pub trait AnyStr<'s>: 's {
191215
#[inline]
192216
fn py_startsendswith<T, F>(
193217
&self,
194-
args: StartsEndsWithArgs,
218+
affix: PyObjectRef,
195219
func_name: &str,
196220
py_type_name: &str,
197221
func: F,
@@ -201,14 +225,9 @@ pub trait AnyStr<'s>: 's {
201225
T: TryFromObject,
202226
F: Fn(&Self, &T) -> bool,
203227
{
204-
let (affix, range) = args.get_value(self.bytes_len());
205-
if !range.is_normal() {
206-
return Ok(false);
207-
}
208-
let value = self.get_bytes(range);
209228
single_or_tuple_any(
210229
affix,
211-
&|s: &T| Ok(func(value, s)),
230+
&|s: &T| Ok(func(self, s)),
212231
&|o| {
213232
format!(
214233
"{} first arg must be {} or a tuple of {}, not {}",

vm/src/builtins/bytearray.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,14 @@ impl PyByteArray {
435435

436436
#[pymethod]
437437
fn endswith(&self, options: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
438-
self.borrow_buf().py_startsendswith(
439-
options,
438+
let borrowed = self.borrow_buf();
439+
let (affix, substr) =
440+
match options.prepare(&*borrowed, borrowed.len(), |s, r| s.get_bytes(r)) {
441+
Some(x) => x,
442+
None => return Ok(false),
443+
};
444+
substr.py_startsendswith(
445+
affix,
440446
"endswith",
441447
"bytes",
442448
|s, x: &PyBytesInner| s.ends_with(&x.elements[..]),
@@ -450,8 +456,14 @@ impl PyByteArray {
450456
options: anystr::StartsEndsWithArgs,
451457
vm: &VirtualMachine,
452458
) -> PyResult<bool> {
453-
self.borrow_buf().py_startsendswith(
454-
options,
459+
let borrowed = self.borrow_buf();
460+
let (affix, substr) =
461+
match options.prepare(&*borrowed, borrowed.len(), |s, r| s.get_bytes(r)) {
462+
Some(x) => x,
463+
None => return Ok(false),
464+
};
465+
substr.py_startsendswith(
466+
affix,
455467
"startswith",
456468
"bytes",
457469
|s, x: &PyBytesInner| s.starts_with(&x.elements[..]),

vm/src/builtins/bytes.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,13 @@ impl PyBytes {
275275

276276
#[pymethod]
277277
fn endswith(&self, options: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
278-
self.inner.elements[..].py_startsendswith(
279-
options,
278+
let (affix, substr) =
279+
match options.prepare(&self.inner.elements[..], self.len(), |s, r| s.get_bytes(r)) {
280+
Some(x) => x,
281+
None => return Ok(false),
282+
};
283+
substr.py_startsendswith(
284+
affix,
280285
"endswith",
281286
"bytes",
282287
|s, x: &PyBytesInner| s.ends_with(&x.elements[..]),
@@ -290,8 +295,13 @@ impl PyBytes {
290295
options: anystr::StartsEndsWithArgs,
291296
vm: &VirtualMachine,
292297
) -> PyResult<bool> {
293-
self.inner.elements[..].py_startsendswith(
294-
options,
298+
let (affix, substr) =
299+
match options.prepare(&self.inner.elements[..], self.len(), |s, r| s.get_bytes(r)) {
300+
Some(x) => x,
301+
None => return Ok(false),
302+
};
303+
substr.py_startsendswith(
304+
affix,
295305
"startswith",
296306
"bytes",
297307
|s, x: &PyBytesInner| s.starts_with(&x.elements[..]),

vm/src/builtins/pystr.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -705,9 +705,14 @@ impl PyStr {
705705
}
706706

707707
#[pymethod]
708-
fn endswith(&self, args: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
709-
self.as_str().py_startsendswith(
710-
args,
708+
fn endswith(&self, options: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
709+
let (affix, substr) =
710+
match options.prepare(self.as_str(), self.len(), |s, r| s.get_chars(r)) {
711+
Some(x) => x,
712+
None => return Ok(false),
713+
};
714+
substr.py_startsendswith(
715+
affix,
711716
"endswith",
712717
"str",
713718
|s, x: &PyStrRef| s.ends_with(x.as_str()),
@@ -716,9 +721,18 @@ impl PyStr {
716721
}
717722

718723
#[pymethod]
719-
fn startswith(&self, args: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
720-
self.as_str().py_startsendswith(
721-
args,
724+
fn startswith(
725+
&self,
726+
options: anystr::StartsEndsWithArgs,
727+
vm: &VirtualMachine,
728+
) -> PyResult<bool> {
729+
let (affix, substr) =
730+
match options.prepare(self.as_str(), self.len(), |s, r| s.get_chars(r)) {
731+
Some(x) => x,
732+
None => return Ok(false),
733+
};
734+
substr.py_startsendswith(
735+
affix,
722736
"startswith",
723737
"str",
724738
|s, x: &PyStrRef| s.starts_with(x.as_str()),

0 commit comments

Comments
 (0)