11from . import support
22import unittest , codecs
33
4- from html5lib .inputstream import HTMLInputStream
4+ from html5lib .inputstream import HTMLInputStream , HTMLUnicodeInputStream , HTMLBinaryInputStream
55
6- class HTMLInputStreamShortChunk (HTMLInputStream ):
6+ class HTMLUnicodeInputStreamShortChunk (HTMLUnicodeInputStream ):
7+ _defaultChunkSize = 2
8+
9+ class HTMLBinaryInputStreamShortChunk (HTMLBinaryInputStream ):
710 _defaultChunkSize = 2
811
912class HTMLInputStreamTest (unittest .TestCase ):
1013
1114 def test_char_ascii (self ):
12- stream = HTMLInputStream ("'" , encoding = 'ascii' )
15+ stream = HTMLInputStream (b "'" , encoding = 'ascii' )
1316 self .assertEquals (stream .charEncoding [0 ], 'ascii' )
1417 self .assertEquals (stream .char (), "'" )
1518
16- def test_char_null (self ):
17- stream = HTMLInputStream ("\x00 " )
18- self .assertEquals (stream .char (), '\ufffd ' )
19-
2019 def test_char_utf8 (self ):
2120 stream = HTMLInputStream ('\u2018 ' .encode ('utf-8' ), encoding = 'utf-8' )
2221 self .assertEquals (stream .charEncoding [0 ], 'utf-8' )
@@ -30,7 +29,7 @@ def test_char_win1252(self):
3029 self .assertEquals (stream .char (), "\u2019 " )
3130
3231 def test_bom (self ):
33- stream = HTMLInputStream (codecs .BOM_UTF8 + "'" )
32+ stream = HTMLInputStream (codecs .BOM_UTF8 + b "'" )
3433 self .assertEquals (stream .charEncoding [0 ], 'utf-8' )
3534 self .assertEquals (stream .char (), "'" )
3635
@@ -40,7 +39,7 @@ def test_utf_16(self):
4039 self .assertEquals (len (stream .charsUntil (' ' , True )), 1025 )
4140
4241 def test_newlines (self ):
43- stream = HTMLInputStreamShortChunk (codecs .BOM_UTF8 + "a\n bb\r \n ccc\r ddddxe" )
42+ stream = HTMLBinaryInputStreamShortChunk (codecs .BOM_UTF8 + b "a\n bb\r \n ccc\r ddddxe" )
4443 self .assertEquals (stream .position (), (1 , 0 ))
4544 self .assertEquals (stream .charsUntil ('c' ), "a\n bb\n " )
4645 self .assertEquals (stream .position (), (3 , 0 ))
@@ -50,12 +49,12 @@ def test_newlines(self):
5049 self .assertEquals (stream .position (), (4 , 5 ))
5150
5251 def test_newlines2 (self ):
53- size = HTMLInputStream ._defaultChunkSize
52+ size = HTMLUnicodeInputStream ._defaultChunkSize
5453 stream = HTMLInputStream ("\r " * size + "\n " )
5554 self .assertEquals (stream .charsUntil ('x' ), "\n " * size )
5655
5756 def test_position (self ):
58- stream = HTMLInputStreamShortChunk (codecs .BOM_UTF8 + "a\n bb\n ccc\n ddde\n f\n gh" )
57+ stream = HTMLBinaryInputStreamShortChunk (codecs .BOM_UTF8 + b "a\n bb\n ccc\n ddde\n f\n gh" )
5958 self .assertEquals (stream .position (), (1 , 0 ))
6059 self .assertEquals (stream .charsUntil ('c' ), "a\n bb\n " )
6160 self .assertEquals (stream .position (), (3 , 0 ))
@@ -73,7 +72,7 @@ def test_position(self):
7372 self .assertEquals (stream .position (), (6 , 1 ))
7473
7574 def test_position2 (self ):
76- stream = HTMLInputStreamShortChunk ("abc\n d" )
75+ stream = HTMLUnicodeInputStreamShortChunk ("abc\n d" )
7776 self .assertEquals (stream .position (), (1 , 0 ))
7877 self .assertEquals (stream .char (), "a" )
7978 self .assertEquals (stream .position (), (1 , 1 ))
0 commit comments