Skip to content

Commit 0ef8655

Browse files
committed
Now using the async zlib module if it is available to allow performance gains through multi-threading.
1 parent 05cee2e commit 0ef8655

7 files changed

Lines changed: 22 additions & 10 deletions

File tree

ext/async

Submodule async updated from 8cfa254 to 77bf7be

fun.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
BadObjectType
77
)
88

9-
import zlib
9+
from util import zlib
1010
decompressobj = zlib.decompressobj
1111

1212

stream.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import zlib
1+
22
from cStringIO import StringIO
33
import errno
44

@@ -7,7 +7,8 @@
77
to_bin_sha,
88
make_sha,
99
write,
10-
close
10+
close,
11+
zlib
1112
)
1213

1314
__all__ = ('OInfo', 'OStream', 'IStream', 'InvalidOInfo', 'InvalidOStream',

test/lib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
OStream,
44
)
55
from gitdb.stream import Sha1Writer
6+
from gitdb.util import zlib
67

78
import sys
8-
import zlib
99
import random
1010
from array import array
1111
from cStringIO import StringIO

test/performance/test_stream.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
"""Performance data streaming performance"""
2-
32
from lib import TestBigRepoR
43
from gitdb.db import *
54
from gitdb.stream import *
@@ -53,7 +52,7 @@ def __init__(self, *args):
5352

5453
class TestObjDBPerformance(TestBigRepoR):
5554

56-
large_data_size_bytes = 1000*1000*10 # some MiB should do it
55+
large_data_size_bytes = 1000*1000*50 # some MiB should do it
5756
moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
5857

5958
@with_rw_directory
@@ -147,33 +146,39 @@ def istream_iter():
147146

148147
print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
149148

150-
151149
# decompress multiple at once, by reading them
150+
# chunk size is not important as the stream will not really be decompressed
151+
152+
# until its read
152153
istream_reader = IteratorReader(iter([ i.sha for i in istreams ]))
153154
ostream_reader = ldb.stream_async(istream_reader)
154155

155156
chunk_task = TestStreamReader(ostream_reader, "chunker", None)
156157
output_reader = pool.add_task(chunk_task)
158+
output_reader.task().max_chunksize = 1
157159

158160
st = time()
159161
assert len(output_reader.read(nsios)) == nsios
160162
elapsed = time() - st
161163

162-
print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
164+
print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
163165

164166
# store the files, and read them back. For the reading, we use a task
165167
# as well which is chunked into one item per task. Reading all will
166168
# very quickly result in two threads handling two bytestreams of
167169
# chained compression/decompression streams
168170
reader = IteratorReader(istream_iter())
169171
istream_reader = ldb.store_async(reader)
172+
istream_reader.task().max_chunksize = 1
170173

171174
istream_to_sha = lambda items: [ i.sha for i in items ]
172175
istream_reader.set_post_cb(istream_to_sha)
173176

174177
ostream_reader = ldb.stream_async(istream_reader)
178+
175179
chunk_task = TestStreamReader(ostream_reader, "chunker", None)
176180
output_reader = pool.add_task(chunk_task)
181+
output_reader.max_chunksize = 1
177182

178183
st = time()
179184
assert len(output_reader.read(nsios)) == nsios

test/test_stream.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
NULL_HEX_SHA
1414
)
1515

16+
from gitdb.util import zlib
1617
from gitdb.typ import (
1718
str_blob_type
1819
)
1920

2021
from cStringIO import StringIO
2122
import tempfile
2223
import os
23-
import zlib
2424

2525

2626

util.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
import os
33
import errno
44

5+
try:
6+
import async.mod.zlib as zlib
7+
except ImportError:
8+
import zlib
9+
# END try async zlib
10+
511
from async import ThreadPool
612

713
try:

0 commit comments

Comments
 (0)