Skip to content

Commit 394ed70

Browse files
committed
unicode handling in messages and trees was improved. Messages are now written according to the encoding of the commit object, and decoded using that information as well. Trees will encode and decode their names with utf8
1 parent 192472f commit 394ed70

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

lib/git/ext/gitdb

Submodule gitdb updated from 18152fe to 425ecf0

lib/git/objects/commit.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
altzone
3232
)
3333
import os
34+
import sys
3435

3536
__all__ = ('Commit', )
3637

@@ -381,7 +382,13 @@ def _serialize(self, stream):
381382
write("encoding %s\n" % self.encoding)
382383

383384
write("\n")
384-
write(self.message)
385+
386+
# write plain bytes, be sure its encoded according to our encoding
387+
if isinstance(self.message, unicode):
388+
write(self.message.encode(self.encoding))
389+
else:
390+
write(self.message)
391+
# END handle encoding
385392
return self
386393

387394
def _deserialize(self, stream):
@@ -421,6 +428,11 @@ def _deserialize(self, stream):
421428
# a stream from our data simply gives us the plain message
422429
# The end of our message stream is marked with a newline that we strip
423430
self.message = stream.read()
431+
try:
432+
self.message = self.message.decode(self.encoding)
433+
except Exception:
434+
print >> sys.stderr, "Failed to decode message: %s" % self.message
435+
# END exception handling
424436
return self
425437

426438
#} END serializable implementation

lib/git/objects/fun.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,14 @@ def tree_entries_from_data(data):
6666
while data[i] != '\0':
6767
i += 1
6868
# END while not reached NULL
69+
70+
# default encoding for strings in git is utf8
71+
# Only use the respective unicode object if the byte stream was encoded
6972
name = data[ns:i]
73+
name_enc = name.decode("utf-8")
74+
if len(name) > len(name_enc):
75+
name = name_enc
76+
# END handle encoding
7077

7178
# byte is NULL, get next 20
7279
i += 1

0 commit comments

Comments
 (0)