diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2528606 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.pyc +__pycache__ +/lzstring.egg-info +/build/ +/dist/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..44a0774 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.3" + - "3.4" + - "3.5" + - "3.6" +# commands to install dependencies +install: + - python setup.py -q install +# commands to run tests +script: + - python test.py diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..5f9ee7d --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..64c63b3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE.md +include README.rst \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index a537054..0000000 --- a/README.md +++ /dev/null @@ -1,6 +0,0 @@ -lz-string-python -================ - -lz-string for python 3 - -Based on the LZ-String javascript found here: http://pieroxy.net/blog/pages/lz-string/index.html diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..4c586b8 --- /dev/null +++ b/README.rst @@ -0,0 +1,21 @@ +lz-string-python +================ + +lz-string for python 2/3 + +Based on the LZ-String javascript found here: http://pieroxy.net/blog/pages/lz-string/index.html + +Example +------- +:: + + >>> import lzstring + >>> x = lzstring.LZString() + >>> compressed = x.compressToBase64(u'你好') # 'gbyl9NI=' + >>> x.decompressFromBase64(compressed) # '你好' + +Installation +------------ +:: + + $ pip install lzstring diff --git a/lzstring.py b/lzstring.py deleted file mode 100644 index 18775de..0000000 --- a/lzstring.py +++ /dev/null @@ -1,672 +0,0 @@ -import math -import re - - -class LZString: - - def __init__(self): - self.keyStr = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" - - def compress(self, uncompressed): - - if uncompressed is None: - return '' - - value = 0 - context_dictionary = {} - context_dictionaryToCreate = {} - context_c = '' - context_wc = '' - context_w = '' - context_enlargeIn = 2 - - context_dictSize = 3 - context_numBits = 2 - context_data_string = '' - context_data_val = 0 - context_data_position = 0 - - uncompressed = uncompressed - - for ii in range(len(uncompressed)): - context_c = uncompressed[ii] - - if not context_c in context_dictionary: - context_dictionary[context_c] = context_dictSize - context_dictSize += 1 - context_dictionaryToCreate[context_c] = True - - context_wc = context_w + context_c - - if context_wc in context_dictionary: - context_w = context_wc - else: - if context_w in context_dictionaryToCreate: - if ord(context_w[0]) < 256: - for i in range(context_numBits): - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = ord(context_w[0]) - - for i in range(8): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - else: - value = 1 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | value - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = 0 - - value = ord(context_w[0]) - - for i in range(16): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionaryToCreate.pop(context_w, None) - #del context_dictionaryToCreate[context_w] - else: - value = context_dictionary[context_w] - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionary[context_wc] = context_dictSize - context_dictSize += 1 - context_w = context_c - if context_w != '': - if context_w in context_dictionaryToCreate: - if ord(context_w[0]) < 256: - for i in range(context_numBits): - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = ord(context_w[0]) - - for i in range(8): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - else: - value = 1 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | value - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = 0 - - value = ord(context_w[0]) - - for i in range(16): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - context_dictionaryToCreate.pop(context_w, None) - #del context_dictionaryToCreate[context_w] - else: - value = context_dictionary[context_w] - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - context_enlargeIn -= 1 - - if context_enlargeIn == 0: - context_enlargeIn = pow(2, context_numBits) - context_numBits += 1 - - value = 2 - - for i in range(context_numBits): - context_data_val = (context_data_val << 1) | (value & 1) - - if context_data_position == 15: - context_data_position = 0 - context_data_string += chr(context_data_val) - context_data_val = 0 - else: - context_data_position += 1 - - value = value >> 1 - - while True: - context_data_val = (context_data_val << 1) - - if context_data_position == 15: - context_data_string += chr(context_data_val) - break - else: - context_data_position += 1 - - return context_data_string - - def compressToBase64(self, string): - if string is None: - return '' - - output = '' - - chr1 = float('NaN') - chr2 = float('NaN') - chr3 = float('NaN') - enc1 = 0 - enc2 = 0 - enc3 = 0 - enc4 = 0 - - i = 0 - - string = self.compress(string) - strlen = len(string) - - while i < (strlen * 2): - if (i % 2) == 0: - chr1 = ord(string[int(i / 2)]) >> 8 - chr2 = ord(string[int(i / 2)]) & 255 - - if (i / 2) + 1 < strlen: - chr3 = ord(string[int((i / 2) + 1)]) >> 8 - else: - chr3 = float('NaN') - else: - chr1 = ord(string[int((i - 1) / 2)]) & 255 - if (i + 1) / 2 < strlen: - chr2 = ord(string[int((i + 1) / 2)]) >> 8 - chr3 = ord(string[int((i + 1) / 2)]) & 255 - else: - chr2 = float('NaN') - chr3 = float('NaN') - - i += 3 - - # python dont support bit operation with NaN like javascript - enc1 = chr1 >> 2 - enc2 = ((chr1 & 3) << 4) | (chr2 >> 4 if not math.isnan(chr2) else 0) - enc3 = ((chr2 & 15 if not math.isnan(chr2) else 0) << 2) | (chr3 >> 6 if not math.isnan(chr3) else 0) - enc4 = (chr3 if not math.isnan(chr3) else 0) & 63 - - if math.isnan(chr2): - enc3 = 64 - enc4 = 64 - elif math.isnan(chr3): - enc4 = 64 - - output += self.keyStr[enc1] + self.keyStr[enc2] + self.keyStr[enc3] + self.keyStr[enc4] - - return output - - def compressToUTF16(self, string): - - if string is None: - return '' - - output = '' - c = 0 - current = 0 - status = 0 - - string = self.compress(string) - - for i in range(len(string)): - c = ord(string[i]) - - if status == 0: - status += 1 - output += chr(((c >> 1) + 32)) - current = (c & 1) << 14 - elif status == 1: - status += 1 - output += chr(((current + (c >> 2)) + 32)) - current = (c & 3) << 13 - elif status == 2: - status += 1 - output += chr(((current + (c >> 3)) + 32)) - current = (c & 7) << 12 - elif status == 3: - status += 1 - output += chr(((current + (c >> 4)) + 32)) - current = (c & 15) << 11 - elif status == 4: - status += 1 - output += chr(((current + (c >> 5)) + 32)) - current = (c & 31) << 10 - elif status == 5: - status += 1 - output += chr(((current + (c >> 6)) + 32)) - current = (c & 63) << 9 - elif status == 6: - status += 1 - output += chr(((current + (c >> 7)) + 32)) - current = (c & 127) << 8 - elif status == 7: - status += 1 - output += chr(((current + (c >> 8)) + 32)) - current = (c & 255) << 7 - elif status == 8: - status += 1 - output += chr(((current + (c >> 9)) + 32)) - current = (c & 511) << 6 - elif status == 9: - status += 1 - output += chr(((current + (c >> 10)) + 32)) - current = (c & 1023) << 5 - elif status == 10: - status += 1 - output += chr(((current + (c >> 11)) + 32)) - current = (c & 2047) << 4 - elif status == 11: - status += 1 - output += chr(((current + (c >> 12)) + 32)) - current = (c & 4095) << 3 - elif status == 12: - status += 1 - output += chr(((current + (c >> 13)) + 32)) - current = (c & 8191) << 2 - elif status == 13: - status += 1 - output += chr(((current + (c >> 14)) + 32)) - current = (c & 16383) << 1 - elif status == 14: - status += 1 - output += chr(((current + (c >> 15)) + 32)) - output += chr((c & 32767) + 32) - - status = 0 - - output += chr(current + 32) - - return output - - #written by https://github.com/v-python - def decompressFromUTF16(self, string): - if not string: - return "" - - output = "" - status = 0 - i = 0 - - while i < len(string): - c = ord(string[i]) - 32 - i += 1 - - if status == 0: - status = 1 - current = c << 1 - elif status == 1: - status = 2 - output += chr(current + (c >> 14)) - current = (c & 16383) << 2 - elif status == 2: - status = 3 - output += chr(current + (c >> 13)) - current = (c & 8191) << 3 - elif status == 3: - status = 4 - output += chr(current + (c >> 12)) - current = (c & 4095) << 4 - elif status == 4: - status = 5 - output += chr(current + (c >> 11)) - current = (c & 2047) << 5 - elif status == 5: - status = 6 - output += chr(current + (c >> 10)) - current = (c & 1023) << 6 - elif status == 6: - status = 7 - output += chr(current + (c >> 9)) - current = (c & 511) << 7 - elif status == 7: - status = 8 - output += chr(current + (c >> 8)) - current = (c & 255) << 8 - elif status == 8: - status = 9 - output += chr(current + (c >> 7)) - current = (c & 127) << 9 - elif status == 9: - status = 10 - output += chr(current + (c >> 6)) - current = (c & 63) << 10 - elif status == 10: - status = 11 - output += chr(current + (c >> 5)) - current = (c & 31) << 11 - elif status == 11: - status = 12 - output += chr(current + (c >> 4)) - current = (c & 15) << 12 - elif status == 12: - status = 13 - output += chr(current + (c >> 3)) - current = (c & 7) << 13 - elif status == 13: - status = 14 - output += chr(current + (c >> 2)) - current = (c & 3) << 14 - elif status == 14: - status = 15 - output += chr(current + (c >> 1)) - current = (c & 1) << 15 - elif status == 15: - status = 0 - output += chr(current + c) - current = (c & 1) << 15 - - return self.decompress(output) - - def decompress(self, compressed): - - if (compressed is None) or (compressed == ''): - return '' - - dictionary = {} - enlargeIn = 4 - dictSize = 4 - numBits = 3 - (entry, result, w, c) = ('', '', '', '') - (i, nnext, bits, resb, maxpower, power) = (0, 0, 0, 0, 0, 0) - - data_string = compressed - data_val = ord(compressed[0]) - data_position = 32768 - data_index = 1 - - for i in range(3): - #dictionary[i] = i - dictionary[i] = '' - - bits = 0 - maxpower = pow(2, 2) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - nnext = bits - if nnext == 0: - bits = 0 - maxpower = pow(2, 8) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - c = chr(bits) - elif nnext == 1: - bits = 0 - maxpower = pow(2, 16) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - c = chr(bits) - elif nnext == 2: - return '' - - dictionary[3] = c - result = c - w = result - - while True: - if data_index > len(data_string): - return '' - - bits = 0 - maxpower = pow(2, numBits) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - c = bits - - if c == 0: - bits = 0 - maxpower = pow(2, 8) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - dictionary[dictSize] = chr(bits) - dictSize += 1 - c = dictSize - 1 - enlargeIn -= 1 - elif c == 1: - bits = 0 - maxpower = pow(2, 16) - power = 1 - - while power != maxpower: - resb = data_val & data_position - data_position >>= 1 - - if data_position == 0: - data_position = 32768 - data_val = ord(data_string[data_index]) - data_index += 1 - - bits |= (1 if resb > 0 else 0) * power - power <<= 1 - - dictionary[dictSize] = chr(bits) - dictSize += 1 - c = dictSize - 1 - enlargeIn -= 1 - elif c == 2: - return result - - if enlargeIn == 0: - enlargeIn = pow(2, numBits) - numBits += 1 - - if c in dictionary: - entry = dictionary[c] - else: - if c == dictSize: - entry = w + w[0] - else: - return None - - result += entry - - dictionary[dictSize] = w + entry[0] - dictSize += 1 - enlargeIn -= 1 - - w = entry - - if enlargeIn == 0: - enlargeIn = pow(2, numBits) - numBits += 1 - - def decompresFromBase64(self, iinput): - if iinput is None: - return '' - - output = "" - ol = 0 - output_ = '' - - i = 0 - - iinput = re.sub(r'[^A-Za-z0-9\+\/\=]', '', iinput) - - while i < len(iinput): - enc1 = self.keyStr.index(iinput[i]) - i += 1 - enc2 = self.keyStr.index(iinput[i]) - i += 1 - enc3 = self.keyStr.index(iinput[i]) - i += 1 - enc4 = self.keyStr.index(iinput[i]) - i += 1 - - chr1 = (enc1 << 2) | (enc2 >> 4) - chr2 = ((enc2 & 15) << 4) | (enc3 >> 2) - chr3 = ((enc3 & 3) << 6) | enc4 - - if (ol % 2) == 0: - output_ = chr1 << 8 - - if enc3 != 64: - output += chr(output_ | chr2) - - if enc4 != 64: - output_ = chr3 << 8 - else: - output = output + chr(output_ | chr1) - - if enc3 != 64: - output_ = chr2 << 8 - - if enc4 != 64: - output += chr(output_ | chr3) - - ol += 3 - - return self.decompress(output) diff --git a/lzstring/__init__.py b/lzstring/__init__.py new file mode 100644 index 0000000..cf91d62 --- /dev/null +++ b/lzstring/__init__.py @@ -0,0 +1,446 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- + +from __future__ import division +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import absolute_import +from builtins import range +from builtins import int +from builtins import chr +from future import standard_library +standard_library.install_aliases() +from builtins import object +import math +import re + + +keyStrBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" +keyStrUriSafe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$" +baseReverseDic = {}; + +class Object(object): + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + +def getBaseValue(alphabet, character): + if alphabet not in baseReverseDic: + baseReverseDic[alphabet] = {} + for i in range(len(alphabet)): + baseReverseDic[alphabet][alphabet[i]] = i + return baseReverseDic[alphabet][character] + + +def _compress(uncompressed, bitsPerChar, getCharFromInt): + if (uncompressed is None): + return "" + + context_dictionary = {} + context_dictionaryToCreate= {} + context_c = "" + context_wc = "" + context_w = "" + context_enlargeIn = 2 # Compensate for the first entry which should not count + context_dictSize = 3 + context_numBits = 2 + context_data = [] + context_data_val = 0 + context_data_position = 0 + + for ii in range(len(uncompressed)): + if isinstance(uncompressed, (bytes)): + context_c = chr(uncompressed[ii]) + else: + context_c = uncompressed[ii] + if context_c not in context_dictionary: + context_dictionary[context_c] = context_dictSize + context_dictSize += 1 + context_dictionaryToCreate[context_c] = True + + context_wc = context_w + context_c + if context_wc in context_dictionary: + context_w = context_wc + else: + if context_w in context_dictionaryToCreate: + if ord(context_w[0]) < 256: + for i in range(context_numBits): + context_data_val = (context_data_val << 1) + if context_data_position == bitsPerChar-1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = ord(context_w[0]) + for i in range(8): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + + else: + value = 1 + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | value + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = 0 + value = ord(context_w[0]) + for i in range(16): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + context_enlargeIn -= 1 + if context_enlargeIn == 0: + context_enlargeIn = math.pow(2, context_numBits) + context_numBits += 1 + del context_dictionaryToCreate[context_w] + else: + value = context_dictionary[context_w] + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + + context_enlargeIn -= 1 + if context_enlargeIn == 0: + context_enlargeIn = math.pow(2, context_numBits) + context_numBits += 1 + + # Add wc to the dictionary. + context_dictionary[context_wc] = context_dictSize + context_dictSize += 1 + context_w = str(context_c) + + # Output the code for w. + if context_w != "": + if context_w in context_dictionaryToCreate: + if ord(context_w[0]) < 256: + for i in range(context_numBits): + context_data_val = (context_data_val << 1) + if context_data_position == bitsPerChar-1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = ord(context_w[0]) + for i in range(8): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + else: + value = 1 + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | value + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = 0 + value = ord(context_w[0]) + for i in range(16): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + context_enlargeIn -= 1 + if context_enlargeIn == 0: + context_enlargeIn = math.pow(2, context_numBits) + context_numBits += 1 + del context_dictionaryToCreate[context_w] + else: + value = context_dictionary[context_w] + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + + context_enlargeIn -= 1 + if context_enlargeIn == 0: + context_enlargeIn = math.pow(2, context_numBits) + context_numBits += 1 + + # Mark the end of the stream + value = 2 + for i in range(context_numBits): + context_data_val = (context_data_val << 1) | (value & 1) + if context_data_position == bitsPerChar - 1: + context_data_position = 0 + context_data.append(getCharFromInt(context_data_val)) + context_data_val = 0 + else: + context_data_position += 1 + value = value >> 1 + + # Flush the last char + while True: + context_data_val = (context_data_val << 1) + if context_data_position == bitsPerChar - 1: + context_data.append(getCharFromInt(context_data_val)) + break + else: + context_data_position += 1 + + return "".join(context_data) + + +def _decompress(length, resetValue, getNextValue): + dictionary = {} + enlargeIn = 4 + dictSize = 4 + numBits = 3 + entry = "" + result = [] + + data = Object( + val=getNextValue(0), + position=resetValue, + index=1 + ) + + for i in range(3): + dictionary[i] = i + + bits = 0 + maxpower = math.pow(2, 2) + power = 1 + + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue + data.val = getNextValue(data.index) + data.index += 1 + + bits |= power if resb > 0 else 0 + power <<= 1; + + next = bits + if next == 0: + bits = 0 + maxpower = math.pow(2, 8) + power = 1 + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 + power <<= 1 + c = chr(bits) + elif next == 1: + bits = 0 + maxpower = math.pow(2, 16) + power = 1 + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue; + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 + power <<= 1 + c = chr(bits) + elif next == 2: + return "" + + dictionary[3] = c + w = c + result.append(c) + counter = 0 + while True: + counter += 1 + if data.index > length: + return "" + + bits = 0 + maxpower = math.pow(2, numBits) + power = 1 + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue; + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 + power <<= 1 + + c = bits + if c == 0: + bits = 0 + maxpower = math.pow(2, 8) + power = 1 + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 + power <<= 1 + + dictionary[dictSize] = chr(bits) + dictSize += 1 + c = dictSize - 1 + enlargeIn -= 1 + elif c == 1: + bits = 0 + maxpower = math.pow(2, 16) + power = 1 + while power != maxpower: + resb = data.val & data.position + data.position >>= 1 + if data.position == 0: + data.position = resetValue; + data.val = getNextValue(data.index) + data.index += 1 + bits |= power if resb > 0 else 0 + power <<= 1 + dictionary[dictSize] = chr(bits) + dictSize += 1 + c = dictSize - 1 + enlargeIn -= 1 + elif c == 2: + return "".join(result) + + + if enlargeIn == 0: + enlargeIn = math.pow(2, numBits) + numBits += 1 + + if c in dictionary: + entry = dictionary[c] + else: + if c == dictSize: + entry = w + w[0] + else: + return None + result.append(entry) + + # Add w+entry[0] to the dictionary. + dictionary[dictSize] = w + entry[0] + dictSize += 1 + enlargeIn -= 1 + + w = entry + if enlargeIn == 0: + enlargeIn = math.pow(2, numBits) + numBits += 1 + + +class LZString(object): + @staticmethod + def compress(uncompressed): + return _compress(uncompressed, 16, chr) + + @staticmethod + def compressToUint8Array(uncompressed): + return bytes([ord(x) for x in _compress(uncompressed, 8, chr)]) + + + @staticmethod + def compressToUTF16(uncompressed): + if uncompressed is None: + return "" + return _compress(uncompressed, 15, lambda a: chr(a+32)) + " " + + @staticmethod + def compressToBase64(uncompressed): + if uncompressed is None: + return "" + res = _compress(uncompressed, 6, lambda a: keyStrBase64[a]) + # To produce valid Base64 + end = len(res) % 4 + if end > 0: + res += "="*(4 - end) + return res + + @staticmethod + def compressToEncodedURIComponent(uncompressed): + if uncompressed is None: + return "" + return _compress(uncompressed, 6, lambda a: keyStrUriSafe[a]) + + @staticmethod + def decompress(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 32768, lambda index: ord(compressed[index])) + + @staticmethod + def decompressFromUint8Array(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 128, lambda index: compressed[index]) + + @staticmethod + def decompressFromUTF16(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 16384, lambda index: compressed[index] - 32) + + @staticmethod + def decompressFromBase64(compressed): + if compressed is None: + return "" + if compressed == "": + return None + return _decompress(len(compressed), 32, lambda index: getBaseValue(keyStrBase64, compressed[index])) + + @staticmethod + def decompressFromEncodedURIComponent(compressed): + if compressed is None: + return "" + if compressed == "": + return None + compressed = compressed.replace(" ", "+") + return _decompress(len(compressed), 32, lambda index: getBaseValue(keyStrUriSafe, compressed[index])) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3480374 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal=1 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..dca116d --- /dev/null +++ b/setup.py @@ -0,0 +1,35 @@ +from setuptools import setup + +import io + +long_description = io.open('README.rst', encoding='utf8').read() + +setup( + name='lzstring', + version='1.0.4', + description='lz-string for python', + author='Geza Kovacs', + author_email='geza0kovacs@gmail.com', + packages=['lzstring'], + package_dir={'lzstring': 'lzstring'}, + package_data={}, + long_description=long_description, + url='https://github.com/gkovacs/lz-string-python', + download_url='https://github.com/gkovacs/lz-string-python', + keywords=['lz-string', 'lzstring', 'compression'], + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.1', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + ], + install_requires=['future>=0.14.0'], +) \ No newline at end of file diff --git a/test.py b/test.py index 57ca491..b3f6f9e 100644 --- a/test.py +++ b/test.py @@ -1,45 +1,55 @@ -import json -import lzstring -import pprint - - -if __name__ == '__main__': - x = lzstring.LZString() - - s = 'Žluťoučký kůň úpěl ďábelské ódy!' - - # generated with original js lib - jsLzStringBase64 = 'r6ABsK6KaAD2aLCADWBfgBPQ9oCAlAZAvgDobEARlB4QAEOAjAUxAGd4BL5AZ4BMBPAQiAAA' - jsLzStringBase64Json = 'N4Ig5gNg9gzjCGAnAniAXKALgS0xApuiPgB7wC2ADgQASSwIogA0IA4tHACLYBu6WXASIBlFu04wAMthiYBEhgFEAdpiYYQASS6i2AWSniRURJgCCMPYfEcGAFXyJyozPBUATJB5pt8Kp3gIbAAvfB99JABrAFdKGil3MBj4MEJWcwBjRCgVZBc0EBEDIwyAIzLEfH5CrREAeRoADiaAdgBONABGdqaANltJLnwAMwVKJHgicxpyfDcAWnJouJoIJJS05hoYmHCaTCgabPx4THxZlfj1lWTU/BgaGBjMgAsaeEeuKEyAISgoFEAHSDBgifD4cwQGBQdAAbXYNlYAA0bABdAC+rDscHBhEKy0QsUoIAxZLJQAAA==' - - print('String for encode: ' + s) - print() - - print('Compress to base64:') - base2 = x.compressToBase64(s) - print('result: ' + base2) - print('result js: ' + jsLzStringBase64) - print('equals: ' + str(base2 == jsLzStringBase64)) - - print() - - print('Decompress from base64:') - print('result: ' + x.decompresFromBase64(base2)) - print('result from js: ' + x.decompresFromBase64(jsLzStringBase64)) - - print() - - jsonString = '{"glossary":{"title":"example glossary","GlossDiv":{"title":"S","GlossList":{"GlossEntry":{"ID":"SGML","SortAs":"SGML","GlossTerm":"Standard Generalized Markup Language","Acronym":"SGML","Abbrev":"ISO 8879:1986","GlossDef":{"para":"A meta-markup language, used to create markup languages such as DocBook.","GlossSeeAlso":["GML","XML"]},"GlossSee":"markup"}}}}}' - - print('Compress json to base64:') - jresult = x.compressToBase64(jsonString) - print('result: ' + jresult) - print() - print('result js: ' + jsLzStringBase64Json) - print() - print('equals: ' + str(jresult == jsLzStringBase64Json)) - - print() - - print('Decompress json from base64:') - pprint.pprint(json.loads(x.decompresFromBase64(jsLzStringBase64Json))) + #!/usr/bin/python + # -*- coding: utf-8 -*- + +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import str +from future import standard_library +standard_library.install_aliases() +import json +import lzstring +import pprint + + +if __name__ == '__main__': + x = lzstring.LZString() + + s = 'Žluťoučký kůň úpěl ďábelské ódy!' + + # generated with original js lib + jsLzStringBase64 = 'r6ABsK6KaAD2aLCADWBfgBPQ9oCAlAZAvgDobEARlB4QAEOAjAUxAGd4BL5AZ4BMBPAQiAAA' + jsLzStringBase64Json = 'N4Ig5gNg9gzjCGAnAniAXKALgS0xApuiPgB7wC2ADgQASSwIogA0IA4tHACLYBu6WXASIBlFu04wAMthiYBEhgFEAdpiYYQASS6i2AWSniRURJgCCMPYfEcGAFXyJyozPBUATJB5pt8Kp3gIbAAvfB99JABrAFdKGil3MBj4MEJWcwBjRCgVZBc0EBEDIwyAIzLEfH5CrREAeRoADiaAdgBONABGdqaANltJLnwAMwVKJHgicxpyfDcAWnJouJoIJJS05hoYmHCaTCgabPx4THxZlfj1lWTU/BgaGBjMgAsaeEeuKEyAISgoFEAHSDBgifD4cwQGBQdAAbXYNlYAA0bABdAC+rDscHBhEKy0QsUoIAxZLJQAAA==' + + print('String for encode: ' + s) + print() + + print('Compress to base64:') + base2 = x.compressToBase64(s) + print('result: ' + base2) + print('result js: ' + jsLzStringBase64) + print('equals: ' + str(base2 == jsLzStringBase64)) + + print() + + print('Decompress from base64:') + print('result: ' + x.decompressFromBase64(base2)) + print('result from js: ' + x.decompressFromBase64(jsLzStringBase64)) + + print() + + jsonString = '{"glossary":{"title":"example glossary","GlossDiv":{"title":"S","GlossList":{"GlossEntry":{"ID":"SGML","SortAs":"SGML","GlossTerm":"Standard Generalized Markup Language","Acronym":"SGML","Abbrev":"ISO 8879:1986","GlossDef":{"para":"A meta-markup language, used to create markup languages such as DocBook.","GlossSeeAlso":["GML","XML"]},"GlossSee":"markup"}}}}}' + + print('Compress json to base64:') + jresult = x.compressToBase64(jsonString) + print('result: ' + jresult) + print() + print('result js: ' + jsLzStringBase64Json) + print() + print('equals: ' + str(jresult == jsLzStringBase64Json)) + + print() + + print('Decompress json from base64:') + pprint.pprint(json.loads(x.decompressFromBase64(jsLzStringBase64Json)))