Skip to content

Commit 42deeab

Browse files
encukouvstinnerfrenzymadness
authored
[3.9] gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) (#108274)
(cherry picked from commit acbd3f9) Co-authored-by: Petr Viktorin <[email protected]> Co-authored-by: Victor Stinner <[email protected]> Co-authored-by: Lumír 'Frenzy' Balhar <[email protected]>
1 parent 4a79328 commit 42deeab

File tree

4 files changed

+154
-9
lines changed

4 files changed

+154
-9
lines changed

Doc/library/tarfile.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,11 @@ A ``TarInfo`` object has the following public data attributes:
732732
Name of the target file name, which is only present in :class:`TarInfo` objects
733733
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
734734

735+
For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
736+
that contains the link.
737+
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
738+
the archive.
739+
735740

736741
.. attribute:: TarInfo.uid
737742
:type: int

Lib/tarfile.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ def __init__(self, tarinfo):
740740
class AbsoluteLinkError(FilterError):
741741
def __init__(self, tarinfo):
742742
self.tarinfo = tarinfo
743-
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
743+
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
744744

745745
class LinkOutsideDestinationError(FilterError):
746746
def __init__(self, tarinfo, path):
@@ -800,7 +800,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
800800
if member.islnk() or member.issym():
801801
if os.path.isabs(member.linkname):
802802
raise AbsoluteLinkError(member)
803-
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
803+
if member.issym():
804+
target_path = os.path.join(dest_path,
805+
os.path.dirname(name),
806+
member.linkname)
807+
else:
808+
target_path = os.path.join(dest_path,
809+
member.linkname)
810+
target_path = os.path.realpath(target_path)
804811
if os.path.commonpath([target_path, dest_path]) != dest_path:
805812
raise LinkOutsideDestinationError(member, target_path)
806813
return new_attrs

Lib/test/test_tarfile.py

Lines changed: 137 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3169,10 +3169,12 @@ def __exit__(self, *exc):
31693169
self.bio = None
31703170

31713171
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
3172-
mode=None, **kwargs):
3172+
mode=None, size=None, **kwargs):
31733173
"""Add a member to the test archive. Call within `with`."""
31743174
name = str(name)
31753175
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
3176+
if size is not None:
3177+
tarinfo.size = size
31763178
if mode:
31773179
tarinfo.mode = _filemode_to_int(mode)
31783180
if symlink_to is not None:
@@ -3236,7 +3238,8 @@ def check_context(self, tar, filter):
32363238
raise self.raised_exception
32373239
self.assertEqual(self.expected_paths, set())
32383240

3239-
def expect_file(self, name, type=None, symlink_to=None, mode=None):
3241+
def expect_file(self, name, type=None, symlink_to=None, mode=None,
3242+
size=None):
32403243
"""Check a single file. See check_context."""
32413244
if self.raised_exception:
32423245
raise self.raised_exception
@@ -3270,6 +3273,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
32703273
self.assertTrue(path.is_fifo())
32713274
else:
32723275
raise NotImplementedError(type)
3276+
if size is not None:
3277+
self.assertEqual(path.stat().st_size, size)
32733278
for parent in path.parents:
32743279
self.expected_paths.discard(parent)
32753280

@@ -3315,8 +3320,15 @@ def test_parent_symlink(self):
33153320
# Test interplaying symlinks
33163321
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
33173322
with ArchiveMaker() as arc:
3323+
3324+
# `current` links to `.` which is both:
3325+
# - the destination directory
3326+
# - `current` itself
33183327
arc.add('current', symlink_to='.')
3328+
3329+
# effectively points to ./../
33193330
arc.add('parent', symlink_to='current/..')
3331+
33203332
arc.add('parent/evil')
33213333

33223334
if support.can_symlink():
@@ -3357,9 +3369,46 @@ def test_parent_symlink(self):
33573369
def test_parent_symlink2(self):
33583370
# Test interplaying symlinks
33593371
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
3372+
3373+
# Posix and Windows have different pathname resolution:
3374+
# either symlink or a '..' component resolve first.
3375+
# Let's see which we are on.
3376+
if support.can_symlink():
3377+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3378+
os.mkdir(testpath)
3379+
3380+
# testpath/current links to `.` which is all of:
3381+
# - `testpath`
3382+
# - `testpath/current`
3383+
# - `testpath/current/current`
3384+
# - etc.
3385+
os.symlink('.', os.path.join(testpath, 'current'))
3386+
3387+
# we'll test where `testpath/current/../file` ends up
3388+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3389+
pass
3390+
3391+
if os.path.exists(os.path.join(testpath, 'file')):
3392+
# Windows collapses 'current\..' to '.' first, leaving
3393+
# 'testpath\file'
3394+
dotdot_resolves_early = True
3395+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3396+
# Posix resolves 'current' to '.' first, leaving
3397+
# 'testpath/../file'
3398+
dotdot_resolves_early = False
3399+
else:
3400+
raise AssertionError('Could not determine link resolution')
3401+
33603402
with ArchiveMaker() as arc:
3403+
3404+
# `current` links to `.` which is both the destination directory
3405+
# and `current` itself
33613406
arc.add('current', symlink_to='.')
3407+
3408+
# `current/parent` is also available as `./parent`,
3409+
# and effectively points to `./../`
33623410
arc.add('current/parent', symlink_to='..')
3411+
33633412
arc.add('parent/evil')
33643413

33653414
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3373,6 +3422,7 @@ def test_parent_symlink2(self):
33733422

33743423
with self.check_context(arc.open(), 'tar'):
33753424
if support.can_symlink():
3425+
# Fail when extracting a file outside destination
33763426
self.expect_exception(
33773427
tarfile.OutsideDestinationError,
33783428
"'parent/evil' would be extracted to "
@@ -3383,10 +3433,24 @@ def test_parent_symlink2(self):
33833433
self.expect_file('parent/evil')
33843434

33853435
with self.check_context(arc.open(), 'data'):
3386-
self.expect_exception(
3387-
tarfile.LinkOutsideDestinationError,
3388-
"""'current/parent' would link to ['"].*['"], """
3389-
+ "which is outside the destination")
3436+
if support.can_symlink():
3437+
if dotdot_resolves_early:
3438+
# Fail when extracting a file outside destination
3439+
self.expect_exception(
3440+
tarfile.OutsideDestinationError,
3441+
"'parent/evil' would be extracted to "
3442+
+ """['"].*evil['"], which is outside """
3443+
+ "the destination")
3444+
else:
3445+
# Fail as soon as we have a symlink outside the destination
3446+
self.expect_exception(
3447+
tarfile.LinkOutsideDestinationError,
3448+
"'current/parent' would link to "
3449+
+ """['"].*outerdir['"], which is outside """
3450+
+ "the destination")
3451+
else:
3452+
self.expect_file('current/')
3453+
self.expect_file('parent/evil')
33903454

33913455
def test_absolute_symlink(self):
33923456
# Test symlink to an absolute path
@@ -3415,11 +3479,29 @@ def test_absolute_symlink(self):
34153479
with self.check_context(arc.open(), 'data'):
34163480
self.expect_exception(
34173481
tarfile.AbsoluteLinkError,
3418-
"'parent' is a symlink to an absolute path")
3482+
"'parent' is a link to an absolute path")
3483+
3484+
def test_absolute_hardlink(self):
3485+
# Test hardlink to an absolute path
3486+
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
3487+
with ArchiveMaker() as arc:
3488+
arc.add('parent', hardlink_to=self.outerdir / 'foo')
3489+
3490+
with self.check_context(arc.open(), 'fully_trusted'):
3491+
self.expect_exception(KeyError, ".*foo. not found")
3492+
3493+
with self.check_context(arc.open(), 'tar'):
3494+
self.expect_exception(KeyError, ".*foo. not found")
3495+
3496+
with self.check_context(arc.open(), 'data'):
3497+
self.expect_exception(
3498+
tarfile.AbsoluteLinkError,
3499+
"'parent' is a link to an absolute path")
34193500

34203501
def test_sly_relative0(self):
34213502
# Inspired by 'relative0' in jwilk/traversal-archives
34223503
with ArchiveMaker() as arc:
3504+
# points to `../../tmp/moo`
34233505
arc.add('../moo', symlink_to='..//tmp/moo')
34243506

34253507
try:
@@ -3469,6 +3551,54 @@ def test_sly_relative2(self):
34693551
+ """['"].*moo['"], which is outside the """
34703552
+ "destination")
34713553

3554+
def test_deep_symlink(self):
3555+
# Test that symlinks and hardlinks inside a directory
3556+
# point to the correct file (`target` of size 3).
3557+
# If links aren't supported we get a copy of the file.
3558+
with ArchiveMaker() as arc:
3559+
arc.add('targetdir/target', size=3)
3560+
# a hardlink's linkname is relative to the archive
3561+
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
3562+
'targetdir', 'target'))
3563+
# a symlink's linkname is relative to the link's directory
3564+
arc.add('linkdir/symlink', symlink_to=os.path.join(
3565+
'..', 'targetdir', 'target'))
3566+
3567+
for filter in 'tar', 'data', 'fully_trusted':
3568+
with self.check_context(arc.open(), filter):
3569+
self.expect_file('targetdir/target', size=3)
3570+
self.expect_file('linkdir/hardlink', size=3)
3571+
if support.can_symlink():
3572+
self.expect_file('linkdir/symlink', size=3,
3573+
symlink_to='../targetdir/target')
3574+
else:
3575+
self.expect_file('linkdir/symlink', size=3)
3576+
3577+
def test_chains(self):
3578+
# Test chaining of symlinks/hardlinks.
3579+
# Symlinks are created before the files they point to.
3580+
with ArchiveMaker() as arc:
3581+
arc.add('linkdir/symlink', symlink_to='hardlink')
3582+
arc.add('symlink2', symlink_to=os.path.join(
3583+
'linkdir', 'hardlink2'))
3584+
arc.add('targetdir/target', size=3)
3585+
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
3586+
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
3587+
3588+
for filter in 'tar', 'data', 'fully_trusted':
3589+
with self.check_context(arc.open(), filter):
3590+
self.expect_file('targetdir/target', size=3)
3591+
self.expect_file('linkdir/hardlink', size=3)
3592+
self.expect_file('linkdir/hardlink2', size=3)
3593+
if support.can_symlink():
3594+
self.expect_file('linkdir/symlink', size=3,
3595+
symlink_to='hardlink')
3596+
self.expect_file('symlink2', size=3,
3597+
symlink_to='linkdir/hardlink2')
3598+
else:
3599+
self.expect_file('linkdir/symlink', size=3)
3600+
self.expect_file('symlink2', size=3)
3601+
34723602
def test_modes(self):
34733603
# Test how file modes are extracted
34743604
# (Note that the modes are ignored on platforms without working chmod)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`tarfile.data_filter` now takes the location of symlinks into account
2+
when determining their target, so it will no longer reject some valid
3+
tarballs with ``LinkOutsideDestinationError``.

0 commit comments

Comments
 (0)