Skip to content

Commit dbb6e22

Browse files
gh-125926: Fix urllib.parse.urljoin() for base URI with undefined authority (GH-125989)
Although this goes beyond the application of RFC 3986, urljoin() should support relative base URIs for backward compatibility.
1 parent 223d3dc commit dbb6e22

File tree

3 files changed

+78
-2
lines changed

3 files changed

+78
-2
lines changed

Lib/test/test_urlparse.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,78 @@ def test_urljoins(self):
623623
self.checkJoin(RFC1808_BASE, 'https:;', 'https:;')
624624
self.checkJoin(RFC1808_BASE, 'https:;x', 'https:;x')
625625

626+
def test_urljoins_relative_base(self):
627+
# According to RFC 3986, Section 5.1, a base URI must conform to
628+
# the absolute-URI syntax rule (Section 4.3). But urljoin() lacks
629+
# a context to establish missed components of the relative base URI.
630+
# It still has to return a sensible result for backwards compatibility.
631+
# The following tests are figments of the imagination and artifacts
632+
# of the current implementation that are not based on any standard.
633+
self.checkJoin('', '', '')
634+
self.checkJoin('', '//', '//', relroundtrip=False)
635+
self.checkJoin('', '//v', '//v')
636+
self.checkJoin('', '//v/w', '//v/w')
637+
self.checkJoin('', '/w', '/w')
638+
self.checkJoin('', '///w', '///w', relroundtrip=False)
639+
self.checkJoin('', 'w', 'w')
640+
641+
self.checkJoin('//', '', '//')
642+
self.checkJoin('//', '//', '//')
643+
self.checkJoin('//', '//v', '//v')
644+
self.checkJoin('//', '//v/w', '//v/w')
645+
self.checkJoin('//', '/w', '///w')
646+
self.checkJoin('//', '///w', '///w')
647+
self.checkJoin('//', 'w', '///w')
648+
649+
self.checkJoin('//a', '', '//a')
650+
self.checkJoin('//a', '//', '//a')
651+
self.checkJoin('//a', '//v', '//v')
652+
self.checkJoin('//a', '//v/w', '//v/w')
653+
self.checkJoin('//a', '/w', '//a/w')
654+
self.checkJoin('//a', '///w', '//a/w')
655+
self.checkJoin('//a', 'w', '//a/w')
656+
657+
for scheme in '', 'http:':
658+
self.checkJoin('http:', scheme + '', 'http:')
659+
self.checkJoin('http:', scheme + '//', 'http:')
660+
self.checkJoin('http:', scheme + '//v', 'http://v')
661+
self.checkJoin('http:', scheme + '//v/w', 'http://v/w')
662+
self.checkJoin('http:', scheme + '/w', 'http:/w')
663+
self.checkJoin('http:', scheme + '///w', 'http:/w')
664+
self.checkJoin('http:', scheme + 'w', 'http:/w')
665+
666+
self.checkJoin('http://', scheme + '', 'http://')
667+
self.checkJoin('http://', scheme + '//', 'http://')
668+
self.checkJoin('http://', scheme + '//v', 'http://v')
669+
self.checkJoin('http://', scheme + '//v/w', 'http://v/w')
670+
self.checkJoin('http://', scheme + '/w', 'http:///w')
671+
self.checkJoin('http://', scheme + '///w', 'http:///w')
672+
self.checkJoin('http://', scheme + 'w', 'http:///w')
673+
674+
self.checkJoin('http://a', scheme + '', 'http://a')
675+
self.checkJoin('http://a', scheme + '//', 'http://a')
676+
self.checkJoin('http://a', scheme + '//v', 'http://v')
677+
self.checkJoin('http://a', scheme + '//v/w', 'http://v/w')
678+
self.checkJoin('http://a', scheme + '/w', 'http://a/w')
679+
self.checkJoin('http://a', scheme + '///w', 'http://a/w')
680+
self.checkJoin('http://a', scheme + 'w', 'http://a/w')
681+
682+
self.checkJoin('/b/c', '', '/b/c')
683+
self.checkJoin('/b/c', '//', '/b/c')
684+
self.checkJoin('/b/c', '//v', '//v')
685+
self.checkJoin('/b/c', '//v/w', '//v/w')
686+
self.checkJoin('/b/c', '/w', '/w')
687+
self.checkJoin('/b/c', '///w', '/w')
688+
self.checkJoin('/b/c', 'w', '/b/w')
689+
690+
self.checkJoin('///b/c', '', '///b/c')
691+
self.checkJoin('///b/c', '//', '///b/c')
692+
self.checkJoin('///b/c', '//v', '//v')
693+
self.checkJoin('///b/c', '//v/w', '//v/w')
694+
self.checkJoin('///b/c', '/w', '///w')
695+
self.checkJoin('///b/c', '///w', '///w')
696+
self.checkJoin('///b/c', 'w', '///b/w')
697+
626698
def test_RFC2732(self):
627699
str_cases = [
628700
('http://Test.python.org:5432/foo/', 'test.python.org', 5432),

Lib/urllib/parse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,9 +577,9 @@ def urljoin(base, url, allow_fragments=True):
577577

578578
if scheme is None:
579579
scheme = bscheme
580-
if scheme != bscheme or scheme not in uses_relative:
580+
if scheme != bscheme or (scheme and scheme not in uses_relative):
581581
return _coerce_result(url)
582-
if scheme in uses_netloc:
582+
if not scheme or scheme in uses_netloc:
583583
if netloc:
584584
return _coerce_result(_urlunsplit(scheme, netloc, path,
585585
query, fragment))
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix :func:`urllib.parse.urljoin` for base URI with undefined authority.
2+
Although :rfc:`3986` only specify reference resolution for absolute base
3+
URI, :func:`!urljoin` should continue to return sensible result for relative
4+
base URI.

0 commit comments

Comments
 (0)