gitpython-developers
diff --git a/‎db.py‎
Lines changed: 2 additions & 3 deletions b/‎db.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎exc.py‎
Lines changed: 14 additions & 0 deletions b/‎exc.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎fun.py‎
Lines changed: 1 addition & 1 deletion b/‎fun.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stream.py‎
Lines changed: 3 additions & 3 deletions b/‎stream.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎test/lib.py‎
Lines changed: 65 additions & 2 deletions b/‎test/lib.py‎
Lines changed: 65 additions & 2 deletions
diff --git a/‎test/performance/lib.py‎
Lines changed: 49 additions & 0 deletions b/‎test/performance/lib.py‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎test/performance/test_db.py‎
Lines changed: 15 additions & 0 deletions b/‎test/performance/test_db.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎test/performance/test_stream.py‎
Lines changed: 91 additions & 0 deletions b/‎test/performance/test_stream.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎test/test_db.py‎
Lines changed: 15 additions & 13 deletions b/‎test/test_db.py‎
Lines changed: 15 additions & 13 deletions
@@ -1,6 +1,5 @@
 """Contains implementations of database retrieveing objects"""
-from git.utils import IndexFileSHA1Writer
-from git.errors import (
+from exc import (
 	InvalidDBRoot, 
 	BadObject, 
 	BadObjectType
@@ -14,7 +13,7 @@
 		OInfo
 	)
 
-from utils import (
+from util import (
 		ENOENT,
 		to_hex_sha,
 		exists,
 
@@ -0,0 +1,14 @@
+"""Module with common exceptions"""
+
+class ODBError(Exception):
+	"""All errors thrown by the object database"""
+	
+class InvalidDBRoot(ODBError):
+	"""Thrown if an object database cannot be initialized at the given path"""
+	
+class BadObject(ODBError):
+	"""The object with the given SHA does not exist"""
+	
+class BadObjectType(ODBError):
+	"""The object had an unsupported type"""
+
@@ -2,7 +2,7 @@
 Keeping this code separate from the beginning makes it easier to out-source
 it into c later, if required"""
 
-from git.errors import (
+from exc import (
 	BadObjectType
 	)
 
 
@@ -1,11 +1,11 @@
 import zlib
 from cStringIO import StringIO
-from git.utils import make_sha
 import errno
 
-from utils import (
+from util import (
 		to_hex_sha,
-		to_bin_sha, 
+		to_bin_sha,
+		make_sha,
 		write, 
 		close
 	)
 
@@ -1,12 +1,75 @@
 """Utilities used in ODB testing"""
-from git.odb import (
+from gitdb import (
 	OStream, 
 	)
-from git.odb.stream import Sha1Writer
+from gitdb.stream import Sha1Writer
 
+import sys
 import zlib
+import random
+from array import array
 from cStringIO import StringIO
 
+import unittest
+import tempfile
+import shutil
+import os
+
+
+#{ Bases
+
+class TestBase(unittest.TestCase):
+	"""Base class for all tests"""
+	
+
+#} END bases
+
+#{ Decorators
+
+def with_rw_directory(func):
+	"""Create a temporary directory which can be written to, remove it if the 
+	test suceeds, but leave it otherwise to aid additional debugging"""
+	def wrapper(self):
+		path = tempfile.mktemp(suffix=func.__name__)
+		os.mkdir(path)
+		try:
+			return func(self, path)
+		except Exception:
+			print >> sys.stderr, "Test %s.%s failed, output is at %r" % (type(self).__name__, func.__name__, path)
+			raise
+		else:
+			shutil.rmtree(path)
+		# END handle exception
+	# END wrapper
+	
+	wrapper.__name__ = func.__name__
+	return wrapper
+
+
+#} END decorators
+
+#{ Routines
+
+def make_bytes(size_in_bytes, randomize=False):
+	""":return: string with given size in bytes
+	:param randomize: try to produce a very random stream"""
+	actual_size = size_in_bytes / 4
+	producer = xrange(actual_size)
+	if randomize:
+		producer = list(producer)
+		random.shuffle(producer)
+	# END randomize
+	a = array('i', producer)
+	return a.tostring()
+
+
+def make_object(type, data):
+	""":return: bytes resembling an uncompressed object"""
+	odata = "blob %i\0" % len(data)
+	return odata + data
+
+#} END routines
+
 #{ Stream Utilities
 
 class DummyStream(object):
 
@@ -0,0 +1,49 @@
+"""Contains library functions"""
+import os
+from gitdb.test.lib import *
+import shutil
+import tempfile
+
+
+#{ Invvariants
+k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE"
+#} END invariants
+
+
+#{ Utilities
+def resolve_or_fail(env_var):
+	""":return: resolved environment variable or raise EnvironmentError"""
+	try:
+		return os.environ[env_var]
+	except KeyError:
+		raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var)
+	# END exception handling
+
+#} END utilities
+
+
+#{ Base Classes 
+
+class TestBigRepoR(TestBase):
+	"""TestCase providing access to readonly 'big' repositories using the following 
+	member variables:
+	
+	* gitrepopath
+	
+	 * read-only base path of the git source repository, i.e. .../git/.git"""
+	 
+	#{ Invariants
+	head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca'
+	head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
+	#} END invariants 
+	
+	@classmethod
+	def setUpAll(cls):
+		try:
+			super(TestBigRepoR, cls).setUpAll()
+		except AttributeError:
+			pass
+		cls.gitrepopath = resolve_or_fail(k_env_git_repo)
+
+		
+#} END base classes
@@ -0,0 +1,15 @@
+"""Performance tests for object store"""
+
+import sys
+from time import time
+
+from lib import (
+	TestBigRepoR
+	)
+
+class TestGitDBPerformance(TestBigRepoR):
+	
+	def test_random_access(self):
+		pass
+		# TODO: use the actual db for this
+		
@@ -0,0 +1,91 @@
+"""Performance data streaming performance"""
+
+from lib import TestBigRepoR
+from gitdb.db import *
+from gitdb.stream import *
+
+from cStringIO import StringIO
+from time import time
+import os
+import sys
+import stat
+import subprocess
+
+
+from lib import (
+	TestBigRepoR,
+	make_bytes,
+	with_rw_directory
+	)
+
+
+def make_memory_file(size_in_bytes, randomize=False):
+	""":return: tuple(size_of_stream, stream)
+	:param randomize: try to produce a very random stream"""
+	d = make_bytes(size_in_bytes, randomize)
+	return len(d), StringIO(d)
+
+
+class TestObjDBPerformance(TestBigRepoR):
+	
+	large_data_size_bytes = 1000*1000*10		# some MiB should do it
+	moderate_data_size_bytes = 1000*1000*1		# just 1 MiB
+	
+	@with_rw_directory
+	def test_large_data_streaming(self, path):
+		ldb = LooseObjectDB(path)
+		
+		for randomize in range(2):
+			desc = (randomize and 'random ') or ''
+			print >> sys.stderr, "Creating %s data ..." % desc
+			st = time()
+			size, stream = make_memory_file(self.large_data_size_bytes, randomize)
+			elapsed = time() - st
+			print >> sys.stderr, "Done (in %f s)" % elapsed
+			
+			# writing - due to the compression it will seem faster than it is 
+			st = time()
+			sha = ldb.store(IStream('blob', size, stream)).sha
+			elapsed_add = time() - st
+			assert ldb.has_object(sha)
+			db_file = ldb.readable_db_object_path(sha)
+			fsize_kib = os.path.getsize(db_file) / 1000
+			
+			
+			size_kib = size / 1000
+			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
+			
+			# reading all at once
+			st = time()
+			ostream = ldb.stream(sha)
+			shadata = ostream.read()
+			elapsed_readall = time() - st
+			
+			stream.seek(0)
+			assert shadata == stream.getvalue()
+			print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
+			
+			
+			# reading in chunks of 1 MiB
+			cs = 512*1000
+			chunks = list()
+			st = time()
+			ostream = ldb.stream(sha)
+			while True:
+				data = ostream.read(cs)
+				chunks.append(data)
+				if len(data) < cs:
+					break
+			# END read in chunks
+			elapsed_readchunks = time() - st
+			
+			stream.seek(0)
+			assert ''.join(chunks) == stream.getvalue()
+			
+			cs_kib = cs / 1000
+			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
+			
+			# del db file so git has something to do
+			os.remove(db_file)
+			
+		# END for each randomization factor
@@ -1,12 +1,14 @@
 """Test for object db"""
-from test.testlib import *
-from lib import ZippedStoreShaWriter
-
-from git.odb import *
-from git.odb.stream import Sha1Writer
-from git import Blob
-from git.errors import BadObject
+from lib import (
+	with_rw_directory,
+	ZippedStoreShaWriter,
+	TestBase
+	)
 
+from gitdb import *
+from gitdb.stream import Sha1Writer
+from gitdb.exc import BadObject
+from gitdb.typ import str_blob_type
 
 from cStringIO import StringIO
 import os
@@ -36,7 +38,7 @@ def _assert_object_writing(self, db):
 				prev_ostream = db.set_ostream(ostream)
 				assert type(prev_ostream) in ostreams or prev_ostream in ostreams 
 
-				istream = IStream(Blob.type, len(data), StringIO(data))
+				istream = IStream(str_blob_type, len(data), StringIO(data))
 
 				# store returns same istream instance, with new sha set
 				my_istream = db.store(istream)
@@ -48,12 +50,12 @@ def _assert_object_writing(self, db):
 				# verify data - the slow way, we want to run code
 				if not dry_run:
 					info = db.info(sha)
-					assert Blob.type == info.type
+					assert str_blob_type == info.type
 					assert info.size == len(data)
 
 					ostream = db.stream(sha)
 					assert ostream.read() == data
-					assert ostream.type == Blob.type
+					assert ostream.type == str_blob_type
 					assert ostream.size == len(data)
 				else:
 					self.failUnlessRaises(BadObject, db.info, sha)
@@ -81,9 +83,9 @@ def _assert_object_writing(self, db):
 			# END for each data set
 		# END for each dry_run mode
 
-	@with_bare_rw_repo
-	def test_writing(self, rwrepo):
-		ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
+	@with_rw_directory
+	def test_writing(self, path):
+		ldb = LooseObjectDB(path)
 
 		# write data
 		self._assert_object_writing(ldb)
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`Keeping this code separate from the beginning makes it easier to out-source`
`3`	`3`	`it into c later, if required"""`
`4`	`4`
`5`		`-from git.errors import (`
	`5`	`+from exc import (`
`6`	`6`	`BadObjectType`
`7`	`7`	`)`
`8`	`8`