77httplib/http.client.
88"""
99import logging
10+ import zlib
1011
1112log = logging .getLogger (__name__ )
1213
@@ -74,6 +75,28 @@ def __init__(self, code, reason, headers, sock):
7475 # The socket this response is being sent over.
7576 self ._sock = sock
7677
78+ # Whether we expect the connection to be closed. If we do, we don't
79+ # bother checking for content-length, we just keep reading until
80+ # we no longer can.
81+ self ._expect_close = False
82+ if b'close' in self .headers .get ('connection' , []):
83+ self ._expect_close = True
84+
85+ # The expected length of the body.
86+ try :
87+ self ._length = int (self .headers .get (b'content-length' , [0 ])[0 ])
88+ except KeyError :
89+ self ._length = None
90+
91+ # Whether we expect a chunked response.
92+ self ._chunked = b'chunked' in self .headers .get ('transfer-encoding' , [])
93+
94+ # One of the following must be true: we must expect that the connection
95+ # will be closed following the body, or that a content-length was sent,
96+ # or that we're getting a chunked response.
97+ # FIXME: Remove naked assert, replace with something better.
98+ assert self ._expect_close or self ._length is not None or self ._chunked
99+
77100 # This object is used for decompressing gzipped request bodies. Right
78101 # now we only support gzip because that's all the RFC mandates of us.
79102 # Later we'll add support for more encodings.
@@ -99,29 +122,70 @@ def read(self, amt=None, decode_content=True):
99122 ``True``, the actual amount of data returned may be different to
100123 the amount requested.
101124 """
102- # For now, just read what we're asked, unless we're not asked:
103- # then, read content-length. This obviously doesn't work longer term,
104- # we need to do some content-length processing there.
105- if amt is None :
106- amt = int (self .headers .get (b'content-length' , [0 ])[0 ])
107-
108125 # Return early if we've lost our connection.
109126 if self ._sock is None :
110127 return b''
111128
112- data = self ._sock .recv (amt ).tobytes ()
129+ # FIXME: Handle chunked transfer encoding
130+ assert not self ._chunked
131+
132+ # If we're asked to do a read without a length, we need to read
133+ # everything. That means either the entire content length, or until the
134+ # socket is closed, depending.
135+ if amt is None :
136+ if self ._length is not None :
137+ amt = self ._length
138+ elif self ._expect_close :
139+ return self ._read_expect_closed ()
140+ else :
141+ raise RuntimeError ("Unbounded read!" )
142+
143+ # Otherwise, we've been asked to do a bounded read. We should read no
144+ # more than the remaining length, obviously.
145+ # FIXME: Handle cases without _length
146+ assert self ._length is not None
147+ amt = min (amt , self ._length )
148+
149+ # If we are now going to read nothing, exit early.
150+ if not amt :
151+ return b''
152+
153+ # Now, issue reads until we read that length. This is to account for
154+ # the fact that it's possible that we'll be asked to read more than
155+ # 65kB in one shot.
156+ to_read = amt
157+ chunks = []
158+
159+ # Ideally I'd like this to read 'while to_read', but I want to be
160+ # defensive against the admittedly unlikely case that the socket
161+ # returns *more* data than I want.
162+ while to_read > 0 :
163+ chunk = self ._sock .recv (amt ).tobytes ()
164+
165+ # If we got an empty read, but were expecting more, the remote end
166+ # has hung up. Raise an exception.
167+ # FIXME: Real exception, not RuntimeError
168+ if not chunk :
169+ self .close ()
170+ raise RuntimeError ("Remote end hung up!" )
171+
172+ to_read -= len (chunk )
173+ chunks .append (chunk )
174+
175+ data = b'' .join (chunks )
176+ self ._length -= len (data )
113177
114178 # We may need to decode the body.
115179 if decode_content and self ._decompressobj and data :
116180 data = self ._decompressobj .decompress (data )
117181
118182 # If we're at the end of the request, we have some cleaning up to do.
119183 # Close the stream, and if necessary flush the buffer.
120- if decode_content and self ._decompressobj :
184+ if decode_content and self ._decompressobj and not self . _length :
121185 data += self ._decompressobj .flush ()
122186
123187 # We're at the end. Close the connection.
124- if not data :
188+ if not self . _length :
125189 self .close ()
126190
127191 return data
@@ -136,6 +200,31 @@ def close(self):
136200 self ._sock .close ()
137201 self ._sock = None
138202
203+ def _read_expect_closed (self ):
204+ """
205+ Implements the logic for an unbounded read on a socket that we expect
206+ to be closed by the remote end.
207+ """
208+ # In this case, just read until we cannot read anymore. Then, close the
209+ # socket, becuase we know we have to.
210+ chunks = []
211+ while True :
212+ chunk = self ._sock .recv (65535 ).tobytes ()
213+ if not chunk :
214+ break
215+
216+ chunks .append (chunk )
217+
218+ self .close ()
219+
220+ # We may need to decompress the data.
221+ data = b'' .join (chunks )
222+ if decode_content and self ._decompressobj :
223+ data = self ._decompressobj .decompress (data )
224+ data += self ._decompressobj .flush ()
225+
226+ return data
227+
139228 # The following methods implement the context manager protocol.
140229 def __enter__ (self ):
141230 return self
0 commit comments