@@ -550,6 +550,53 @@ def test_extractfile_attrs(self):
550550 self .assertIs (fobj .seekable (), True )
551551
552552
553+ class ReadSizeRecorder (io .BytesIO ):
554+ # Records the largest size ever passed to read(), so a test can check
555+ # that tarfile does not request far more data than the archive holds
556+ # (which on a real file would pre-allocate it).
557+ def __init__ (self , * args , ** kwargs ):
558+ super ().__init__ (* args , ** kwargs )
559+ self .max_read_size = 0
560+
561+ def read (self , size = - 1 ):
562+ if size is not None and size >= 0 :
563+ self .max_read_size = max (self .max_read_size , size )
564+ return super ().read (size )
565+
566+
567+ @support .cpython_only
568+ class ExtendedHeaderMemoryTest (unittest .TestCase ):
569+ # gh-151497: the size of a GNU long name/link or a pax extended header is
570+ # read from the archive and is untrusted. A crafted header can claim a
571+ # size far larger than the file actually contains; opening such an archive
572+ # must not try to read (and so pre-allocate) the claimed size in one go.
573+
574+ def crafted_archive (self , hdrtype ):
575+ tarinfo = tarfile .TarInfo ("A" )
576+ tarinfo .type = hdrtype
577+ tarinfo .size = 0xFFFFFFFF # ~4 GiB claimed in a 512-byte header
578+ return tarinfo .tobuf (format = tarfile .GNU_FORMAT )
579+
580+ def check (self , hdrtype ):
581+ fobj = ReadSizeRecorder (self .crafted_archive (hdrtype ))
582+ try :
583+ with tarfile .open (fileobj = fobj , mode = "r:" ) as tar :
584+ tar .getmembers ()
585+ except tarfile .ReadError :
586+ pass # a truncated header is fine; we only check the allocation
587+ # The bogus ~4 GiB size must never reach a single read() call.
588+ self .assertLessEqual (fobj .max_read_size , tarfile ._EXTHEADER_READ_CHUNK )
589+
590+ def test_gnu_longname_oversized_size (self ):
591+ self .check (tarfile .GNUTYPE_LONGNAME )
592+
593+ def test_gnu_longlink_oversized_size (self ):
594+ self .check (tarfile .GNUTYPE_LONGLINK )
595+
596+ def test_pax_header_oversized_size (self ):
597+ self .check (tarfile .XHDTYPE )
598+
599+
553600class MiscReadTestBase (CommonReadTest ):
554601 is_stream = False
555602
0 commit comments