Skip to content

Commit b5abad5

Browse files
miss-islingtonserhiy-storchaka
authored andcommitted
[2.7] bpo-32861: urllib.robotparser fix incomplete __str__ methods. (GH-5711) (GH-6795)
The robotparser's __str__ representation now includes wildcard entries. (cherry picked from commit c3fa1f2) Co-authored-by: Michael Lazar <[email protected]>.
1 parent 84fc6c5 commit b5abad5

File tree

4 files changed

+34
-1
lines changed

4 files changed

+34
-1
lines changed

Lib/robotparser.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,10 @@ def can_fetch(self, useragent, url):
160160

161161

162162
def __str__(self):
163-
return ''.join([str(entry) + "\n" for entry in self.entries])
163+
entries = self.entries
164+
if self.default_entry is not None:
165+
entries = entries + [self.default_entry]
166+
return '\n'.join(map(str, entries)) + '\n'
164167

165168

166169
class RuleLine:

Lib/test/test_robotparser.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,31 @@ class DefaultEntryTest(BaseRobotTest, unittest.TestCase):
136136
bad = ['/cyberworld/map/index.html']
137137

138138

139+
class StringFormattingTest(BaseRobotTest, unittest.TestCase):
140+
robots_txt = """\
141+
User-agent: *
142+
Crawl-delay: 1
143+
Request-rate: 3/15
144+
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
145+
146+
# Cybermapper knows where to go.
147+
User-agent: cybermapper
148+
Disallow: /some/path
149+
"""
150+
151+
expected_output = """\
152+
User-agent: cybermapper
153+
Disallow: /some/path
154+
155+
User-agent: *
156+
Disallow: /cyberworld/map/
157+
158+
"""
159+
160+
def test_string_formatting(self):
161+
self.assertEqual(str(self.parser), self.expected_output)
162+
163+
139164
class RobotHandler(BaseHTTPRequestHandler):
140165

141166
def do_GET(self):
@@ -226,6 +251,7 @@ def test_main():
226251
UseFirstUserAgentWildcardTest,
227252
EmptyQueryStringTest,
228253
DefaultEntryTest,
254+
StringFormattingTest,
229255
PasswordProtectedSiteTestCase,
230256
NetworkTestCase)
231257

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,7 @@ Ben Laurie
807807
Simon Law
808808
Julia Lawall
809809
Chris Lawrence
810+
Michael Lazar
810811
Brian Leair
811812
Mathieu Leduc-Hamel
812813
Amandine Lee
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The urllib.robotparser's ``__str__`` representation now includes wildcard
2+
entries and the "Crawl-delay" and "Request-rate" fields. Patch by
3+
Michael Lazar.

0 commit comments

Comments
 (0)