@@ -254,68 +254,93 @@ def read(self, size=-1):
254254 # copied once, and another copy of a part of it when it creates the unconsumed
255255 # tail. We have to use it to hand in the appropriate amount of bytes during
256256 # the next read.
257- tail = self ._zip .unconsumed_tail
258- if tail :
259- # move the window, make it as large as size demands. For code-clarity,
260- # we just take the chunk from our map again instead of reusing the unconsumed
261- # tail. The latter one would safe some memory copying, but we could end up
262- # with not getting enough data uncompressed, so we had to sort that out as well.
263- # Now we just assume the worst case, hence the data is uncompressed and the window
264- # needs to be as large as the uncompressed bytes we want to read.
265- self ._cws = self ._cwe - len (tail )
266- self ._cwe = self ._cws + size
267- else :
268- cws = self ._cws
269- self ._cws = self ._cwe
270- self ._cwe = cws + size
271- # END handle tail
272-
273- # if window is too small, make it larger so zip can decompress something
274- if self ._cwe - self ._cws < 8 :
275- self ._cwe = self ._cws + 8
276- # END adjust winsize
277-
278- # takes a slice, but doesn't copy the data, it says ...
279- indata = self ._m [self ._cws :self ._cwe ]
280-
281- # get the actual window end to be sure we don't use it for computations
282- self ._cwe = self ._cws + len (indata )
283- dcompdat = self ._zip .decompress (indata , size )
284- # update the amount of compressed bytes read
285- # We feed possibly overlapping chunks, which is why the unconsumed tail
286- # has to be taken into consideration, as well as the unused data
287- # if we hit the end of the stream
288- # NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
289- # They are thorough, and I assume it is truly working.
290- # Why is this logic as convoluted as it is ? Please look at the table in
291- # https://github.com/gitpython-developers/gitdb/issues/19 to learn about the test-results.
292- # Basically, on py2.6, you want to use branch 1, whereas on all other python version, the second branch
293- # will be the one that works.
294- # However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
295- # table in the github issue. This is it ... it was the only way I could make this work everywhere.
296- # IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
297- if getattr (zlib , 'ZLIB_RUNTIME_VERSION' , zlib .ZLIB_VERSION ) in ('1.2.7' , '1.2.5' ) and not sys .platform == 'darwin' :
298- unused_datalen = len (self ._zip .unconsumed_tail )
299- else :
300- unused_datalen = len (self ._zip .unconsumed_tail ) + len (self ._zip .unused_data )
301- # # end handle very special case ...
302-
303- self ._cbr += len (indata ) - unused_datalen
304- self ._br += len (dcompdat )
257+ #
258+ # Decompress in a loop until we have produced `size` bytes or run out
259+ # of progress. Iteration (instead of recursion) keeps the call bounded
260+ # for streams that consume many input bytes per produced output byte
261+ # (e.g. zlib stored blocks of length zero); the previous recursive
262+ # form blew the stack on inputs > ~1500 empty blocks (issue #120
263+ # follow-up).
264+ dcompdat = b''
265+ while True :
266+ tail = self ._zip .unconsumed_tail
267+ remaining = size - len (dcompdat )
268+ if tail :
269+ # move the window, make it as large as size demands. For code-clarity,
270+ # we just take the chunk from our map again instead of reusing the unconsumed
271+ # tail. The latter one would save some memory copying, but we could end up
272+ # with not getting enough data uncompressed, so we had to sort that out as well.
273+ # Now we just assume the worst case, hence the data is uncompressed and the window
274+ # needs to be as large as the uncompressed bytes we want to read.
275+ self ._cws = self ._cwe - len (tail )
276+ self ._cwe = self ._cws + remaining
277+ else :
278+ cws = self ._cws
279+ self ._cws = self ._cwe
280+ self ._cwe = cws + remaining
281+ # END handle tail
282+
283+ # if window is too small, make it larger so zip can decompress something
284+ if self ._cwe - self ._cws < 8 :
285+ self ._cwe = self ._cws + 8
286+ # END adjust winsize
287+
288+ # takes a slice, but doesn't copy the data, it says ...
289+ indata = self ._m [self ._cws :self ._cwe ]
290+
291+ # get the actual window end to be sure we don't use it for computations
292+ self ._cwe = self ._cws + len (indata )
293+ chunk = self ._zip .decompress (indata , remaining )
294+ # update the amount of compressed bytes read
295+ # We feed possibly overlapping chunks, which is why the unconsumed tail
296+ # has to be taken into consideration, as well as the unused data
297+ # if we hit the end of the stream
298+ # NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
299+ # They are thorough, and I assume it is truly working.
300+ # Why is this logic as convoluted as it is ? Please look at the table in
301+ # https://github.com/gitpython-developers/gitdb/issues/19 to learn about the test-results.
302+ # Basically, on py2.6, you want to use branch 1, whereas on all other python version, the second branch
303+ # will be the one that works.
304+ # However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
305+ # table in the github issue. This is it ... it was the only way I could make this work everywhere.
306+ # IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
307+ if getattr (zlib , 'ZLIB_RUNTIME_VERSION' , zlib .ZLIB_VERSION ) in ('1.2.7' , '1.2.5' ) and not sys .platform == 'darwin' :
308+ unused_datalen = len (self ._zip .unconsumed_tail )
309+ else :
310+ unused_datalen = len (self ._zip .unconsumed_tail ) + len (self ._zip .unused_data )
311+ # # end handle very special case ...
312+
313+ consumed = len (indata ) - unused_datalen
314+ self ._cbr += consumed
315+ self ._br += len (chunk )
316+ if chunk :
317+ if not isinstance (dcompdat , bytearray ):
318+ dcompdat = bytearray (dcompdat )
319+ dcompdat .extend (chunk )
320+
321+ # Stop when we have enough or there is no path to more output.
322+ # `chunk` may legitimately be empty mid-stream when zlib is
323+ # consuming header / dictionary frames; in that case we keep
324+ # iterating as long as we are still feeding zlib new bytes
325+ # (consumed > 0) and zlib has not flagged end-of-stream. The
326+ # compressed_bytes_read() scrub loop drives this same code with
327+ # _br manipulated to 0 past zip EOF; it terminates here because
328+ # `getattr(_zip, 'eof', False)` is True or no compressed bytes
329+ # are consumed. The empty-block recursion attack from issue #120
330+ # follow-up is bounded by the iteration; each empty block does
331+ # consume input, so the loop walks the stream forward a constant
332+ # amount per iteration without growing the call stack.
333+ if len (dcompdat ) >= size or self ._br >= self ._s :
334+ break
335+ zip_eof = getattr (self ._zip , 'eof' , False )
336+ if not chunk and (zip_eof or len (indata ) == 0 or consumed == 0 ):
337+ break
338+ # END iterative decompress
305339
306340 if dat :
307341 dcompdat = dat + dcompdat
308342 # END prepend our cached data
309343
310- # it can happen, depending on the compression, that we get less bytes
311- # than ordered as it needs the final portion of the data as well.
312- # Recursively resolve that.
313- # Note: dcompdat can be empty even though we still appear to have bytes
314- # to read, if we are called by compressed_bytes_read - it manipulates
315- # us to empty the stream
316- if dcompdat and (len (dcompdat ) - len (dat )) < size and self ._br < self ._s :
317- dcompdat += self .read (size - len (dcompdat ))
318- # END handle special case
319344 return dcompdat
320345
321346
0 commit comments