Now using the async zlib module if it is available to allow performance gains through multi-threading.

Byron · Byron · commit 0ef86550179b · 2010-06-13T13:44:10.000+02:00
diff --git a/ext/async b/ext/async
@@ -1 +1 @@
-Subproject commit 8cfa2542ed623627b5e2e91072368209710e9370
+Subproject commit 77bf7bef748b019a3a59693cef6d955f74b358ad
diff --git a/fun.py b/fun.py
@@ -6,7 +6,7 @@
 	BadObjectType
 	)
 
-import zlib
+from util import zlib
 decompressobj = zlib.decompressobj
 
 
diff --git a/stream.py b/stream.py
@@ -1,4 +1,4 @@
-import zlib
+
 from cStringIO import StringIO
 import errno
 
@@ -7,7 +7,8 @@
 		to_bin_sha,
 		make_sha,
 		write, 
-		close
+		close,
+		zlib
 	)
 
 __all__ = ('OInfo', 'OStream', 'IStream', 'InvalidOInfo', 'InvalidOStream', 
diff --git a/test/lib.py b/test/lib.py
@@ -3,9 +3,9 @@
 	OStream, 
 	)
 from gitdb.stream import Sha1Writer
+from gitdb.util import zlib
 
 import sys
-import zlib
 import random
 from array import array
 from cStringIO import StringIO
diff --git a/test/performance/test_stream.py b/test/performance/test_stream.py
@@ -1,5 +1,4 @@
 """Performance data streaming performance"""
-
 from lib import TestBigRepoR
 from gitdb.db import *
 from gitdb.stream import *
@@ -53,7 +52,7 @@ def __init__(self, *args):
 
 class TestObjDBPerformance(TestBigRepoR):
 	
-	large_data_size_bytes = 1000*1000*10		# some MiB should do it
+	large_data_size_bytes = 1000*1000*50		# some MiB should do it
 	moderate_data_size_bytes = 1000*1000*1		# just 1 MiB
 	
 	@with_rw_directory
@@ -147,33 +146,39 @@ def istream_iter():
 		
 		print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
 		
-		
 		# decompress multiple at once, by reading them
+		# chunk size is not important as the stream will not really be decompressed
+		
+		# until its read
 		istream_reader = IteratorReader(iter([ i.sha for i in istreams ]))
 		ostream_reader = ldb.stream_async(istream_reader)
 		
 		chunk_task = TestStreamReader(ostream_reader, "chunker", None)
 		output_reader = pool.add_task(chunk_task)
+		output_reader.task().max_chunksize = 1
 		
 		st = time()
 		assert len(output_reader.read(nsios)) == nsios
 		elapsed = time() - st
 		
-		print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
+		print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
 		
 		# store the files, and read them back. For the reading, we use a task 
 		# as well which is chunked into one item per task. Reading all will
 		# very quickly result in two threads handling two bytestreams of 
 		# chained compression/decompression streams
 		reader = IteratorReader(istream_iter())
 		istream_reader = ldb.store_async(reader)
+		istream_reader.task().max_chunksize = 1
 		
 		istream_to_sha = lambda items: [ i.sha for i in items ]
 		istream_reader.set_post_cb(istream_to_sha)
 		
 		ostream_reader = ldb.stream_async(istream_reader)
+		
 		chunk_task = TestStreamReader(ostream_reader, "chunker", None)
 		output_reader = pool.add_task(chunk_task)
+		output_reader.max_chunksize = 1
 		
 		st = time()
 		assert len(output_reader.read(nsios)) == nsios
diff --git a/test/test_stream.py b/test/test_stream.py
@@ -13,14 +13,14 @@
 	NULL_HEX_SHA
 	)
 
+from gitdb.util import zlib
 from gitdb.typ import (
 	str_blob_type
 	)
 
 from cStringIO import StringIO
 import tempfile
 import os
-import zlib
 
 
 
diff --git a/util.py b/util.py
@@ -2,6 +2,12 @@
 import os
 import errno
 
+try:
+	import async.mod.zlib as zlib
+except ImportError:
+	import zlib
+# END try async zlib
+
 from async import ThreadPool
 
 try:

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@`
`6`	`6`	`BadObjectType`
`7`	`7`	`)`
`8`	`8`
`9`		`-import zlib`
	`9`	`+from util import zlib`
`10`	`10`	`decompressobj = zlib.decompressobj`
`11`	`11`
`12`	`12`
Original file line number	Diff line number	Diff line change
`@@ -13,14 +13,14 @@`
`13`	`13`	`NULL_HEX_SHA`
`14`	`14`	`)`
`15`	`15`
	`16`	`+from gitdb.util import zlib`
`16`	`17`	`from gitdb.typ import (`
`17`	`18`	`str_blob_type`
`18`	`19`	`)`
`19`	`20`
`20`	`21`	`from cStringIO import StringIO`
`21`	`22`	`import tempfile`
`22`	`23`	`import os`
`23`		`-import zlib`
`24`	`24`
`25`	`25`
`26`	`26`