Skip to content

Commit 1364759

Browse files
committed
Merge branch 'unicode'
2 parents 741dfaa + 0019d7d commit 1364759

File tree

5 files changed

+60
-5
lines changed

5 files changed

+60
-5
lines changed

doc/source/changes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
Changelog
33
=========
44

5+
0.3.0 Beta 3
6+
============
7+
* Added unicode support for author names. Commit.author.name is now unicode instead of string.
8+
59
0.3.0 Beta 2
610
============
711
* Added python 2.4 support

lib/git/ext/gitdb

Submodule gitdb updated from 425ecf0 to 78665b1

lib/git/objects/commit.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,14 @@ def _serialize(self, stream):
368368
write("parent %s\n" % p)
369369

370370
a = self.author
371+
aname = a.name
372+
if isinstance(aname, unicode):
373+
aname = aname.encode(self.encoding)
374+
# END handle unicode in name
375+
371376
c = self.committer
372377
fmt = "%s %s <%s> %s %s\n"
373-
write(fmt % ("author", a.name, a.email,
378+
write(fmt % ("author", aname, a.email,
374379
self.authored_date,
375380
altz_to_utctz_str(self.author_tz_offset)))
376381

@@ -425,12 +430,19 @@ def _deserialize(self, stream):
425430
readline()
426431
# END handle encoding
427432

433+
# decode the authors name
434+
try:
435+
self.author.name = self.author.name.decode(self.encoding)
436+
except UnicodeDecodeError:
437+
print >> sys.stderr, "Failed to decode author name: %s" % self.author.name
438+
# END handle author's encoding
439+
428440
# a stream from our data simply gives us the plain message
429441
# The end of our message stream is marked with a newline that we strip
430442
self.message = stream.read()
431443
try:
432444
self.message = self.message.decode(self.encoding)
433-
except Exception:
445+
except UnicodeDecodeError:
434446
print >> sys.stderr, "Failed to decode message: %s" % self.message
435447
# END exception handling
436448
return self

test/git/performance/test_odb.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,18 @@ def test_random_access(self):
4949
st = time()
5050
nb = 0
5151
too_many = 15000
52+
data_bytes = 0
5253
for blob_list in blobs_per_commit:
5354
for blob in blob_list:
54-
blob.data_stream.read()
55+
data_bytes += len(blob.data_stream.read())
5556
# END for each blobsha
5657
nb += len(blob_list)
5758
if nb > too_many:
5859
break
5960
# END for each bloblist
6061
elapsed = time() - st
6162

62-
print >> sys.stderr, "%s: Retrieved %i blob and their data in %g s ( %f blobs / s )" % (type(repo.odb), nb, elapsed, nb / elapsed)
63+
print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed)
6364
results[2].append(elapsed)
6465
# END for each repo type
6566

test/git/test_commit.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# test_commit.py
23
# Copyright (C) 2008, 2009 Michael Trier ([email protected]) and contributors
34
#
@@ -108,6 +109,14 @@ def check_entries(d):
108109
assert commit.committer_tz_offset == 14400, commit.committer_tz_offset
109110
assert commit.message == "initial project\n"
110111

112+
def test_unicode_actor(self):
113+
# assure we can parse unicode actors correctly
114+
name = "Üäöß ÄußÉ".decode("utf-8")
115+
assert len(name) == 9
116+
special = Actor._from_string(u"%s <[email protected]>" % name)
117+
assert special.name == name
118+
assert isinstance(special.name, unicode)
119+
111120
def test_traversal(self):
112121
start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff")
113122
first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781")
@@ -233,3 +242,32 @@ def test_serialization(self, rwrepo):
233242
# create all commits of our repo
234243
assert_commit_serialization(rwrepo, '0.1.6')
235244

245+
def test_serialization_unicode_support(self):
246+
assert Commit.default_encoding.lower() == 'utf-8'
247+
248+
# create a commit with unicode in the message, and the author's name
249+
# Verify its serialization and deserialization
250+
cmt = self.rorepo.commit('0.1.6')
251+
assert isinstance(cmt.message, unicode) # it automatically decodes it as such
252+
assert isinstance(cmt.author.name, unicode) # same here
253+
254+
cmt.message = "üäêèß".decode("utf-8")
255+
assert len(cmt.message) == 5
256+
257+
cmt.author.name = "äüß".decode("utf-8")
258+
assert len(cmt.author.name) == 3
259+
260+
cstream = StringIO()
261+
cmt._serialize(cstream)
262+
cstream.seek(0)
263+
assert len(cstream.getvalue())
264+
265+
ncmt = Commit(self.rorepo, cmt.binsha)
266+
ncmt._deserialize(cstream)
267+
268+
assert cmt.author.name == ncmt.author.name
269+
assert cmt.message == ncmt.message
270+
# actually, it can't be printed in a shell as repr wants to have ascii only
271+
# it appears
272+
cmt.author.__repr__()
273+

0 commit comments

Comments
 (0)