Further HTTP/1.1 work.

Lukasa · Lukasa · commit 01d412c83a4e · 2015-03-08T09:20:38.000Z
diff --git a/hyper/common/headers.py b/hyper/common/headers.py
@@ -7,7 +7,7 @@
 """
 import collections
 
-from hyper.compat import unicode, bytes, imap
+from hyper.common.util import to_bytestring, to_bytestring_tuple
 
 
 class HTTPHeaderMap(collections.MutableMapping):
@@ -64,18 +64,18 @@ def __init__(self, *args, **kwargs):
         self._items = []
 
         for arg in args:
-            self._items.extend(map(lambda x: _to_bytestring_tuple(*x), arg))
+            self._items.extend(map(lambda x: to_bytestring_tuple(*x), arg))
 
         for k, v in kwargs.items():
-            self._items.append(_to_bytestring_tuple(k, v))
+            self._items.append(to_bytestring_tuple(k, v))
 
     def __getitem__(self, key):
         """
         Unlike the dict __getitem__, this returns a list of items in the order
         they were added. These items are returned in 'canonical form', meaning
         that comma-separated values are split into multiple values.
         """
-        key = _to_bytestring(key)
+        key = to_bytestring(key)
         values = []
 
         for k, v in self._items:
@@ -91,15 +91,15 @@ def __setitem__(self, key, value):
         """
         Unlike the dict __setitem__, this appends to the list of items.
         """
-        self._items.append(_to_bytestring_tuple(key, value))
+        self._items.append(to_bytestring_tuple(key, value))
 
     def __delitem__(self, key):
         """
         Sadly, __delitem__ is kind of stupid here, but the best we can do is
         delete all headers with a given key. To correctly achieve the 'KeyError
         on missing key' logic from dictionaries, we need to do this slowly.
         """
-        key = _to_bytestring(key)
+        key = to_bytestring(key)
         indices = []
         for (i, (k, v)) in enumerate(self._items):
             if _keys_equal(k, key):
@@ -135,7 +135,7 @@ def __contains__(self, key):
         """
         If any header is present with this key, returns True.
         """
-        key = _to_bytestring(key)
+        key = to_bytestring(key)
         return any(_keys_equal(key, k) for k, _ in self._items)
 
     def keys(self):
@@ -205,26 +205,6 @@ def canonical_form(k, v):
             yield k, sub_val.strip()
 
 
-def _to_bytestring(element):
-    """
-    Converts a single string to a bytestring, encoding via UTF-8 if needed.
-    """
-    if isinstance(element, unicode):
-        return element.encode('utf-8')
-    elif isinstance(element, bytes):
-        return element
-    else:
-        raise ValueError("Non string type.")
-
-
-def _to_bytestring_tuple(*x):
-    """
-    Converts the given strings to a bytestring if necessary, returning a
-    tuple.
-    """
-    return tuple(imap(_to_bytestring, x))
-
-
 def _keys_equal(x, y):
     """
     Returns 'True' if the two keys are equal by the laws of HTTP headers.
diff --git a/hyper/common/util.py b/hyper/common/util.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+hyper/common/util
+~~~~~~~~~~~~~~~~~
+
+General utility functions for use with hyper.
+"""
+from hyper.compat import unicode, bytes, imap
+
+def to_bytestring(element):
+    """
+    Converts a single string to a bytestring, encoding via UTF-8 if needed.
+    """
+    if isinstance(element, unicode):
+        return element.encode('utf-8')
+    elif isinstance(element, bytes):
+        return element
+    else:
+        raise ValueError("Non string type.")
+
+
+def to_bytestring_tuple(*x):
+    """
+    Converts the given strings to a bytestring if necessary, returning a
+    tuple. Uses ``to_bytestring``.
+    """
+    return tuple(imap(to_bytestring, x))
diff --git a/hyper/http11/connection.py b/hyper/http11/connection.py
@@ -7,13 +7,26 @@
 """
 import io
 import logging
+import re
 import socket
 
 from .response import HTTP11Response
 from ..http20.bufsocket import BufferedSocket
+from ..common.headers import HTTPHeaderMap
+from ..common.util import to_bytestring
 
 log = logging.getLogger(__name__)
 
+# This regular expression provides a fairly generous parsing of a HTTP status
+# line. It matches any amount of leading LWS, followed by a three-digit status
+# code, followed by LWS, followed by a reason phrase (allowing only space
+# separators), followed by the version (must be HTTP/1.1).
+#
+# For now this is a 'good enough' way to parse the status line. We may want a
+# dedicated parsing implementation later on, though it'd have to be faster than
+# this regex to be worthwhile.
+STATUS_LINE_REGEX = re.compile(rb'[ \t]*(?P<code>\d{3})[ \t]+(?P<reason>[\S ]+)[ \t]+HTTP/1\.1[ \t]*\r?\n')
+
 
 class HTTP11Connection(object):
     """
@@ -72,6 +85,9 @@ def request(self, method, url, body=None, headers={}):
         :param headers: (optional) The headers to send on the request.
         :returns: Nothing.
         """
+        method = to_bytestring(method)
+        url = to_bytestring(url)
+
         if self._sock is None:
             self.connect()
 
@@ -101,10 +117,18 @@ def get_response(self):
         This is an early beta, so the response object is pretty stupid. That's
         ok, we'll fix it later.
         """
-        headers = {}
-
-        # First read the header line and drop it on the floor.
-        self._sock.readline()
+        headers = HTTPHeaderMap()
+
+        # First read the header line and 'parse' it. This particular part of
+        # the response can safely be parsed by regular expression, so do that.
+        status_line = self._sock.readline()
+        match = STATUS_LINE_REGEX.match(status_line)
+        if match is None:
+            raise RuntimeError("Invalid status line")
+
+        code, reason = int(match.group('code')), match.group('reason')
+        print(code)
+        print(reason)
 
         while True:
             line = self._sock.readline().tobytes()
diff --git a/hyper/http11/response.py b/hyper/http11/response.py
@@ -64,9 +64,9 @@ def __init__(self, headers, sock):
         #: The status code returned by the server.
         self.status = 0
 
-        # The response headers. These are determined upon creation, assigned
-        # once, and never assigned again.
-        self._headers = headers
+        #: The response headers. These are determined upon creation, assigned
+        #: once, and never assigned again.
+        self.headers = headers
 
         # The response trailers. These are always intially ``None``.
         self._trailers = None
@@ -78,6 +78,19 @@ def __init__(self, headers, sock):
         # may need to buffer some for incomplete reads.
         self._data_buffer = b''
 
+        # This object is used for decompressing gzipped request bodies. Right
+        # now we only support gzip because that's all the RFC mandates of us.
+        # Later we'll add support for more encodings.
+        # This 16 + MAX_WBITS nonsense is to force gzip. See this
+        # Stack Overflow answer for more:
+        # http://stackoverflow.com/a/2695466/1401686
+        if b'gzip' in self.headers.get('content-encoding', []):
+            self._decompressobj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+        elif b'deflate' in self.headers.get('content-encoding', []):
+            self._decompressobj = DeflateDecoder()
+        else:
+            self._decompressobj = None
+
     def read(self, amt=None, decode_content=True):
         """
         Reads the response body, or up to the next ``amt`` bytes.
@@ -94,13 +107,13 @@ def read(self, amt=None, decode_content=True):
         # then, read content-length. This obviously doesn't work longer term,
         # we need to do some content-length processing there.
         if amt is None:
-            amt = self.headers.get(b'content-length', 0)
+            amt = int(self.headers.get(b'content-length', [0])[0])
 
         # Return early if we've lost our connection.
         if self._sock is None:
             return b''
 
-        data = self._sock.read(amt)
+        data = self._sock.recv(amt).tobytes()
 
         # We may need to decode the body.
         if decode_content and self._decompressobj and data: