From c10635fad6da9c4ff0fa7905b5f0107cb3616bb6 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 26 Aug 2013 18:02:48 +0100
Subject: [PATCH] Add examples using urllib2 and urllib.request for HTTP
 Content-Type

I've seen many use html5lib completely ignoring any HTTP-layer given
character encoding. Would be better to lead them in the right direction.
---
 README.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/README.rst b/README.rst
index ddcfa672..9e0a0f74 100644
--- a/README.rst
+++ b/README.rst
@@ -41,6 +41,29 @@ a treebuilder:
   with open("mydocument.html", "rb") as f:
       lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
 
+When using with ``urllib2`` (Python 2), the charset from HTTP should be
+pass into html5lib as follows:
+
+.. code-block:: python
+
+  from contextlib import closing
+  from urllib2 import urlopen
+  import html5lib
+
+  with closing(urlopen("http://example.com/")) as f:
+      document = html5lib.parse(f, encoding=f.info().getparam("charset"))
+
+When using with ``urllib.request`` (Python 3), the charset from HTTP
+should be pass into html5lib as follows:
+
+.. code-block:: python
+
+  from urllib.request import urlopen
+  import html5lib
+
+  with urlopen("http://example.com/") as f:
+      document = html5lib.parse(f, encoding=f.info().get_content_charset())
+
 To have more control over the parser, create a parser object explicitly.
 For instance, to make the parser raise exceptions on parse errors, use: