Skip to content

Commit 92e6770

Browse files
committed
Added MemoryDB including initial test, moved ZippedShaWriter into stream module, it was just a test helper previously
1 parent 92ca2e4 commit 92e6770

8 files changed

Lines changed: 169 additions & 25 deletions

File tree

base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ def __init__(self, *args):
4141
def sha(self):
4242
return self[0]
4343

44+
@property
45+
def hexsha(self):
46+
""":return: our sha, hex encoded, 40 bytes"""
47+
return to_hex_sha(self[0])
48+
49+
@property
50+
def binsha(self):
51+
""":return: our sha as binary, 20 bytes"""
52+
return to_bin_sha(self[0])
53+
4454
@property
4555
def type(self):
4656
return self[1]

db/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
from base import *
33
from loose import *
4+
from mem import *
45
from pack import *
56
from git import *
67
from ref import *

db/mem.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""Contains the MemoryDatabase implementation"""
2+
from loose import LooseObjectDB
3+
from base import (
4+
ObjectDBR,
5+
ObjectDBW
6+
)
7+
8+
from gitdb.base import OStream
9+
from gitdb.util import to_bin_sha
10+
from gitdb.exc import (
11+
BadObject,
12+
UnsupportedOperation
13+
)
14+
from gitdb.stream import (
15+
ZippedStoreShaWriter,
16+
DecompressMemMapReader,
17+
)
18+
19+
__all__ = ("MemoryDB", )
20+
21+
class MemoryDB(ObjectDBR, ObjectDBW):
22+
"""A memory database stores everything to memory, providing fast IO and object
23+
retrieval. It should be used to buffer results and obtain SHAs before writing
24+
it to the actual physical storage, as it allows to query whether object already
25+
exists in the target storage before introducing actual IO
26+
27+
:note: memory is currently not threadsafe, hence the async methods cannot be used
28+
for storing"""
29+
30+
def __init__(self):
31+
super(MemoryDB, self).__init__()
32+
self._db = LooseObjectDB("path/doesnt/matter")
33+
34+
# maps 20 byte shas to their OStream objects
35+
self._cache = dict()
36+
37+
def set_ostream(self, stream):
38+
raise UnsupportedOperation("MemoryDB's always stream into memory")
39+
40+
def store(self, istream):
41+
zstream = ZippedStoreShaWriter()
42+
self._db.set_ostream(zstream)
43+
44+
istream = self._db.store(istream)
45+
zstream.close() # close to flush
46+
zstream.seek(0)
47+
48+
# don't provide a size, the stream is written in object format, hence the
49+
# header needs decompression
50+
decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
51+
self._cache[istream.binsha] = OStream(istream.sha, istream.type, istream.size, decomp_stream)
52+
53+
return istream
54+
55+
def store_async(self, reader):
56+
raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access")
57+
58+
def has_object(self, sha):
59+
return to_bin_sha(sha) in self._cache
60+
61+
def info(self, sha):
62+
# we always return streams, which are infos as well
63+
return self.stream(sha)
64+
65+
def stream(self, sha):
66+
sha = to_bin_sha(sha)
67+
try:
68+
ostream = self._cache[sha]
69+
# rewind stream for the next one to read
70+
ostream.stream.seek(0)
71+
return ostream
72+
except KeyError:
73+
raise BadObject(sha)
74+
# END exception handling
75+
76+
def size(self):
77+
return len(self._cache)
78+
79+
def sha_iter(self):
80+
return self._cache.iterkeys()

stream.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,39 @@ def sha(self, as_hex = False):
499499

500500
#} END interface
501501

502+
503+
class ZippedStoreShaWriter(Sha1Writer):
504+
"""Remembers everything someone writes to it and generates a sha"""
505+
__slots__ = ('buf', 'zip')
506+
def __init__(self):
507+
Sha1Writer.__init__(self)
508+
self.buf = StringIO()
509+
self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
510+
511+
def __getattr__(self, attr):
512+
return getattr(self.buf, attr)
513+
514+
def write(self, data):
515+
alen = Sha1Writer.write(self, data)
516+
self.buf.write(self.zip.compress(data))
517+
return alen
518+
519+
def close(self):
520+
self.buf.write(self.zip.flush())
521+
522+
def seek(self, offset, whence=os.SEEK_SET):
523+
"""Seeking currently only supports to rewind written data
524+
Multiple writes are not supported"""
525+
if offset != 0 or whence != os.SEEK_SET:
526+
raise ValueError("Can only seek to position 0")
527+
# END handle offset
528+
self.buf.seek(0)
529+
530+
def getvalue(self):
531+
""":return: string value from the current stream position to the end"""
532+
return self.buf.getvalue()
533+
534+
502535
class FDCompressedSha1Writer(Sha1Writer):
503536
"""Digests data written to it, making the sha available, then compress the
504537
data and write it to the file descriptor

test/db/lib.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from async import IteratorReader
2222
from cStringIO import StringIO
23+
from struct import pack
2324

2425

2526
__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path')
@@ -29,9 +30,35 @@ class TestDBBase(TestBase):
2930

3031
# data
3132
two_lines = "1234\nhello world"
32-
3333
all_data = (two_lines, )
3434

35+
def _assert_object_writing_simple(self, db):
36+
# write a bunch of objects and query their streams and info
37+
null_objs = db.size()
38+
ni = 250
39+
for i in xrange(ni):
40+
data = pack(">L", i)
41+
istream = IStream(str_blob_type, len(data), StringIO(data))
42+
new_istream = db.store(istream)
43+
assert new_istream is istream
44+
assert db.has_object(istream.sha)
45+
46+
info = db.info(istream.sha)
47+
assert isinstance(info, OInfo)
48+
assert info.type == istream.type and info.size == istream.size
49+
50+
stream = db.stream(istream.sha)
51+
assert isinstance(stream, OStream)
52+
assert stream.sha == info.sha and stream.type == info.type
53+
assert stream.read() == data
54+
# END for each item
55+
56+
assert db.size() == null_objs + ni
57+
shas = list(db.sha_iter())
58+
assert len(shas) == db.size()
59+
assert len(shas[0]) == 20
60+
61+
3562
def _assert_object_writing(self, db):
3663
"""General tests to verify object writing, compatible to ObjectDBW
3764
:note: requires write access to the database"""

test/db/test_mem.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from lib import *
2+
from gitdb.db import MemoryDB
3+
4+
class TestMemoryDB(TestDBBase):
5+
6+
def test_writing(self):
7+
mdb = MemoryDB()
8+
9+
# write data
10+
self._assert_object_writing_simple(mdb)

test/lib.py

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
from gitdb import (
33
OStream,
44
)
5-
from gitdb.stream import Sha1Writer
5+
from gitdb.stream import (
6+
Sha1Writer,
7+
ZippedStoreShaWriter
8+
)
9+
610
from gitdb.util import zlib
711

812
import sys
@@ -140,26 +144,5 @@ def _assert(self):
140144
assert self.args
141145
assert self.myarg
142146

143-
144-
class ZippedStoreShaWriter(Sha1Writer):
145-
"""Remembers everything someone writes to it"""
146-
__slots__ = ('buf', 'zip')
147-
def __init__(self):
148-
Sha1Writer.__init__(self)
149-
self.buf = StringIO()
150-
self.zip = zlib.compressobj(1) # fastest
151-
152-
def __getattr__(self, attr):
153-
return getattr(self.buf, attr)
154-
155-
def write(self, data):
156-
alen = Sha1Writer.write(self, data)
157-
self.buf.write(self.zip.compress(data))
158-
return alen
159-
160-
def close(self):
161-
self.buf.write(self.zip.flush())
162-
163-
164147
#} END stream utilitiess
165148

test/test_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ def test_streams(self):
3131

3232
# test pack info
3333
# provides type_id
34-
pinfo = OPackInfo(0, 1, blob_id, s)
34+
pinfo = OPackInfo(0, blob_id, s)
3535
assert pinfo.type == str_blob_type
3636
assert pinfo.type_id == blob_id
3737
assert pinfo.pack_offset == 0
3838

39-
dpinfo = ODeltaPackInfo(0, 1, blob_id, s, sha)
39+
dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
4040
assert dpinfo.type == str_blob_type
4141
assert dpinfo.type_id == blob_id
4242
assert dpinfo.delta_info == sha

0 commit comments

Comments
 (0)