@@ -338,6 +338,22 @@ def _EndRecData(fpin):
338
338
# Unable to find a valid end of central directory structure
339
339
return None
340
340
341
+ def _sanitize_filename (filename ):
342
+ """Terminate the file name at the first null byte and
343
+ ensure paths always use forward slashes as the directory separator."""
344
+
345
+ # Terminate the file name at the first null byte. Null bytes in file
346
+ # names are used as tricks by viruses in archives.
347
+ null_byte = filename .find (chr (0 ))
348
+ if null_byte >= 0 :
349
+ filename = filename [0 :null_byte ]
350
+ # This is used to ensure paths in generated ZIP files always use
351
+ # forward slashes as the directory separator, as required by the
352
+ # ZIP format specification.
353
+ if os .sep != "/" and os .sep in filename :
354
+ filename = filename .replace (os .sep , "/" )
355
+ return filename
356
+
341
357
342
358
class ZipInfo (object ):
343
359
"""Class with attributes describing each file in the ZIP archive."""
@@ -368,16 +384,9 @@ class ZipInfo (object):
368
384
def __init__ (self , filename = "NoName" , date_time = (1980 ,1 ,1 ,0 ,0 ,0 )):
369
385
self .orig_filename = filename # Original file name in archive
370
386
371
- # Terminate the file name at the first null byte. Null bytes in file
372
- # names are used as tricks by viruses in archives.
373
- null_byte = filename .find (chr (0 ))
374
- if null_byte >= 0 :
375
- filename = filename [0 :null_byte ]
376
- # This is used to ensure paths in generated ZIP files always use
377
- # forward slashes as the directory separator, as required by the
378
- # ZIP format specification.
379
- if os .sep != "/" and os .sep in filename :
380
- filename = filename .replace (os .sep , "/" )
387
+ # Terminate the file name at the first null byte and
388
+ # ensure paths always use forward slashes as the directory separator.
389
+ filename = _sanitize_filename (filename )
381
390
382
391
self .filename = filename # Normalized file name
383
392
self .date_time = date_time # year, month, day, hour, min, sec
@@ -482,7 +491,7 @@ def _encodeFilenameFlags(self):
482
491
except UnicodeEncodeError :
483
492
return self .filename .encode ('utf-8' ), self .flag_bits | _MASK_UTF_FILENAME
484
493
485
- def _decodeExtra (self ):
494
+ def _decodeExtra (self , filename_crc ):
486
495
# Try to decode the extra field.
487
496
extra = self .extra
488
497
unpack = struct .unpack
@@ -508,6 +517,21 @@ def _decodeExtra(self):
508
517
except struct .error :
509
518
raise BadZipFile (f"Corrupt zip64 extra field. "
510
519
f"{ field } not found." ) from None
520
+ elif tp == 0x7075 :
521
+ data = extra [4 :ln + 4 ]
522
+ # Unicode Path Extra Field
523
+ try :
524
+ up_version , up_name_crc = unpack ('<BL' , data [:5 ])
525
+ if up_version == 1 and up_name_crc == filename_crc :
526
+ up_unicode_name = data [5 :].decode ('utf-8' )
527
+ if up_unicode_name :
528
+ self .filename = _sanitize_filename (up_unicode_name )
529
+ else :
530
+ warnings .warn ("Empty unicode path extra field (0x7075)" , stacklevel = 2 )
531
+ except struct .error as e :
532
+ raise BadZipFile ("Corrupt unicode path extra field (0x7075)" ) from e
533
+ except UnicodeDecodeError as e :
534
+ raise BadZipFile ('Corrupt unicode path extra field (0x7075): invalid utf-8 bytes' ) from e
511
535
512
536
extra = extra [ln + 4 :]
513
537
@@ -1409,6 +1433,7 @@ def _RealGetContents(self):
1409
1433
if self .debug > 2 :
1410
1434
print (centdir )
1411
1435
filename = fp .read (centdir [_CD_FILENAME_LENGTH ])
1436
+ orig_filename_crc = crc32 (filename )
1412
1437
flags = centdir [_CD_FLAG_BITS ]
1413
1438
if flags & _MASK_UTF_FILENAME :
1414
1439
# UTF-8 file names extension
@@ -1432,8 +1457,7 @@ def _RealGetContents(self):
1432
1457
x ._raw_time = t
1433
1458
x .date_time = ( (d >> 9 )+ 1980 , (d >> 5 )& 0xF , d & 0x1F ,
1434
1459
t >> 11 , (t >> 5 )& 0x3F , (t & 0x1F ) * 2 )
1435
-
1436
- x ._decodeExtra ()
1460
+ x ._decodeExtra (orig_filename_crc )
1437
1461
x .header_offset = x .header_offset + concat
1438
1462
self .filelist .append (x )
1439
1463
self .NameToInfo [x .filename ] = x
0 commit comments