Skip to content

Commit 8e83737

Browse files
miss-islingtonencukouvstinnerfrenzymadness
authored
[3.11] gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) (GH-108209)
gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) (cherry picked from commit acbd3f9) Co-authored-by: Petr Viktorin <[email protected]> Co-authored-by: Victor Stinner <[email protected]> Co-authored-by: Lumír 'Frenzy' Balhar <[email protected]>
1 parent dd0a1f9 commit 8e83737

File tree

4 files changed

+156
-9
lines changed

4 files changed

+156
-9
lines changed

Doc/library/tarfile.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,11 @@ A ``TarInfo`` object has the following public data attributes:
732732
Name of the target file name, which is only present in :class:`TarInfo` objects
733733
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
734734

735+
For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
736+
that contains the link.
737+
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
738+
the archive.
739+
735740

736741
.. attribute:: TarInfo.uid
737742
:type: int

Lib/tarfile.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ def __init__(self, tarinfo):
741741
class AbsoluteLinkError(FilterError):
742742
def __init__(self, tarinfo):
743743
self.tarinfo = tarinfo
744-
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
744+
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
745745

746746
class LinkOutsideDestinationError(FilterError):
747747
def __init__(self, tarinfo, path):
@@ -801,7 +801,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
801801
if member.islnk() or member.issym():
802802
if os.path.isabs(member.linkname):
803803
raise AbsoluteLinkError(member)
804-
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
804+
if member.issym():
805+
target_path = os.path.join(dest_path,
806+
os.path.dirname(name),
807+
member.linkname)
808+
else:
809+
target_path = os.path.join(dest_path,
810+
member.linkname)
811+
target_path = os.path.realpath(target_path)
805812
if os.path.commonpath([target_path, dest_path]) != dest_path:
806813
raise LinkOutsideDestinationError(member, target_path)
807814
return new_attrs

Lib/test/test_tarfile.py

Lines changed: 139 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3256,10 +3256,12 @@ def __exit__(self, *exc):
32563256
self.bio = None
32573257

32583258
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
3259-
mode=None, **kwargs):
3259+
mode=None, size=None, **kwargs):
32603260
"""Add a member to the test archive. Call within `with`."""
32613261
name = str(name)
32623262
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
3263+
if size is not None:
3264+
tarinfo.size = size
32633265
if mode:
32643266
tarinfo.mode = _filemode_to_int(mode)
32653267
if symlink_to is not None:
@@ -3335,7 +3337,8 @@ def check_context(self, tar, filter):
33353337
raise self.raised_exception
33363338
self.assertEqual(self.expected_paths, set())
33373339

3338-
def expect_file(self, name, type=None, symlink_to=None, mode=None):
3340+
def expect_file(self, name, type=None, symlink_to=None, mode=None,
3341+
size=None):
33393342
"""Check a single file. See check_context."""
33403343
if self.raised_exception:
33413344
raise self.raised_exception
@@ -3364,6 +3367,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
33643367
self.assertTrue(path.is_fifo())
33653368
else:
33663369
raise NotImplementedError(type)
3370+
if size is not None:
3371+
self.assertEqual(path.stat().st_size, size)
33673372
for parent in path.parents:
33683373
self.expected_paths.discard(parent)
33693374

@@ -3410,8 +3415,15 @@ def test_parent_symlink(self):
34103415
# Test interplaying symlinks
34113416
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
34123417
with ArchiveMaker() as arc:
3418+
3419+
# `current` links to `.` which is both:
3420+
# - the destination directory
3421+
# - `current` itself
34133422
arc.add('current', symlink_to='.')
3423+
3424+
# effectively points to ./../
34143425
arc.add('parent', symlink_to='current/..')
3426+
34153427
arc.add('parent/evil')
34163428

34173429
if os_helper.can_symlink():
@@ -3453,9 +3465,46 @@ def test_parent_symlink(self):
34533465
def test_parent_symlink2(self):
34543466
# Test interplaying symlinks
34553467
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
3468+
3469+
# Posix and Windows have different pathname resolution:
3470+
# either symlink or a '..' component resolve first.
3471+
# Let's see which we are on.
3472+
if os_helper.can_symlink():
3473+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3474+
os.mkdir(testpath)
3475+
3476+
# testpath/current links to `.` which is all of:
3477+
# - `testpath`
3478+
# - `testpath/current`
3479+
# - `testpath/current/current`
3480+
# - etc.
3481+
os.symlink('.', os.path.join(testpath, 'current'))
3482+
3483+
# we'll test where `testpath/current/../file` ends up
3484+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3485+
pass
3486+
3487+
if os.path.exists(os.path.join(testpath, 'file')):
3488+
# Windows collapses 'current\..' to '.' first, leaving
3489+
# 'testpath\file'
3490+
dotdot_resolves_early = True
3491+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3492+
# Posix resolves 'current' to '.' first, leaving
3493+
# 'testpath/../file'
3494+
dotdot_resolves_early = False
3495+
else:
3496+
raise AssertionError('Could not determine link resolution')
3497+
34563498
with ArchiveMaker() as arc:
3499+
3500+
# `current` links to `.` which is both the destination directory
3501+
# and `current` itself
34573502
arc.add('current', symlink_to='.')
3503+
3504+
# `current/parent` is also available as `./parent`,
3505+
# and effectively points to `./../`
34583506
arc.add('current/parent', symlink_to='..')
3507+
34593508
arc.add('parent/evil')
34603509

34613510
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3469,6 +3518,7 @@ def test_parent_symlink2(self):
34693518

34703519
with self.check_context(arc.open(), 'tar'):
34713520
if os_helper.can_symlink():
3521+
# Fail when extracting a file outside destination
34723522
self.expect_exception(
34733523
tarfile.OutsideDestinationError,
34743524
"'parent/evil' would be extracted to "
@@ -3479,10 +3529,24 @@ def test_parent_symlink2(self):
34793529
self.expect_file('parent/evil')
34803530

34813531
with self.check_context(arc.open(), 'data'):
3482-
self.expect_exception(
3483-
tarfile.LinkOutsideDestinationError,
3484-
"""'current/parent' would link to ['"].*['"], """
3485-
+ "which is outside the destination")
3532+
if os_helper.can_symlink():
3533+
if dotdot_resolves_early:
3534+
# Fail when extracting a file outside destination
3535+
self.expect_exception(
3536+
tarfile.OutsideDestinationError,
3537+
"'parent/evil' would be extracted to "
3538+
+ """['"].*evil['"], which is outside """
3539+
+ "the destination")
3540+
else:
3541+
# Fail as soon as we have a symlink outside the destination
3542+
self.expect_exception(
3543+
tarfile.LinkOutsideDestinationError,
3544+
"'current/parent' would link to "
3545+
+ """['"].*outerdir['"], which is outside """
3546+
+ "the destination")
3547+
else:
3548+
self.expect_file('current/')
3549+
self.expect_file('parent/evil')
34863550

34873551
@symlink_test
34883552
def test_absolute_symlink(self):
@@ -3512,12 +3576,30 @@ def test_absolute_symlink(self):
35123576
with self.check_context(arc.open(), 'data'):
35133577
self.expect_exception(
35143578
tarfile.AbsoluteLinkError,
3515-
"'parent' is a symlink to an absolute path")
3579+
"'parent' is a link to an absolute path")
3580+
3581+
def test_absolute_hardlink(self):
3582+
# Test hardlink to an absolute path
3583+
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
3584+
with ArchiveMaker() as arc:
3585+
arc.add('parent', hardlink_to=self.outerdir / 'foo')
3586+
3587+
with self.check_context(arc.open(), 'fully_trusted'):
3588+
self.expect_exception(KeyError, ".*foo. not found")
3589+
3590+
with self.check_context(arc.open(), 'tar'):
3591+
self.expect_exception(KeyError, ".*foo. not found")
3592+
3593+
with self.check_context(arc.open(), 'data'):
3594+
self.expect_exception(
3595+
tarfile.AbsoluteLinkError,
3596+
"'parent' is a link to an absolute path")
35163597

35173598
@symlink_test
35183599
def test_sly_relative0(self):
35193600
# Inspired by 'relative0' in jwilk/traversal-archives
35203601
with ArchiveMaker() as arc:
3602+
# points to `../../tmp/moo`
35213603
arc.add('../moo', symlink_to='..//tmp/moo')
35223604

35233605
try:
@@ -3568,6 +3650,56 @@ def test_sly_relative2(self):
35683650
+ """['"].*moo['"], which is outside the """
35693651
+ "destination")
35703652

3653+
@symlink_test
3654+
def test_deep_symlink(self):
3655+
# Test that symlinks and hardlinks inside a directory
3656+
# point to the correct file (`target` of size 3).
3657+
# If links aren't supported we get a copy of the file.
3658+
with ArchiveMaker() as arc:
3659+
arc.add('targetdir/target', size=3)
3660+
# a hardlink's linkname is relative to the archive
3661+
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
3662+
'targetdir', 'target'))
3663+
# a symlink's linkname is relative to the link's directory
3664+
arc.add('linkdir/symlink', symlink_to=os.path.join(
3665+
'..', 'targetdir', 'target'))
3666+
3667+
for filter in 'tar', 'data', 'fully_trusted':
3668+
with self.check_context(arc.open(), filter):
3669+
self.expect_file('targetdir/target', size=3)
3670+
self.expect_file('linkdir/hardlink', size=3)
3671+
if os_helper.can_symlink():
3672+
self.expect_file('linkdir/symlink', size=3,
3673+
symlink_to='../targetdir/target')
3674+
else:
3675+
self.expect_file('linkdir/symlink', size=3)
3676+
3677+
@symlink_test
3678+
def test_chains(self):
3679+
# Test chaining of symlinks/hardlinks.
3680+
# Symlinks are created before the files they point to.
3681+
with ArchiveMaker() as arc:
3682+
arc.add('linkdir/symlink', symlink_to='hardlink')
3683+
arc.add('symlink2', symlink_to=os.path.join(
3684+
'linkdir', 'hardlink2'))
3685+
arc.add('targetdir/target', size=3)
3686+
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
3687+
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
3688+
3689+
for filter in 'tar', 'data', 'fully_trusted':
3690+
with self.check_context(arc.open(), filter):
3691+
self.expect_file('targetdir/target', size=3)
3692+
self.expect_file('linkdir/hardlink', size=3)
3693+
self.expect_file('linkdir/hardlink2', size=3)
3694+
if os_helper.can_symlink():
3695+
self.expect_file('linkdir/symlink', size=3,
3696+
symlink_to='hardlink')
3697+
self.expect_file('symlink2', size=3,
3698+
symlink_to='linkdir/hardlink2')
3699+
else:
3700+
self.expect_file('linkdir/symlink', size=3)
3701+
self.expect_file('symlink2', size=3)
3702+
35713703
def test_modes(self):
35723704
# Test how file modes are extracted
35733705
# (Note that the modes are ignored on platforms without working chmod)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`tarfile.data_filter` now takes the location of symlinks into account
2+
when determining their target, so it will no longer reject some valid
3+
tarballs with ``LinkOutsideDestinationError``.

0 commit comments

Comments
 (0)