Module: LZString

Includes:
Base64, Custom, EncodedURI, UTF16, Uint8Array
Defined in:
lib/lzstring.rb,
lib/lzstring/uri.rb,
lib/lzstring/utf16.rb,
lib/lzstring/base64.rb,
lib/lzstring/custom.rb,
lib/lzstring/version.rb,
lib/lzstring/compress.rb,
lib/lzstring/decompress.rb,
lib/lzstring/encoded_uri.rb,
lib/lzstring/uint8_array.rb

Overview

Implementation of LZString compression algorithm for string compression

Defined Under Namespace

Modules: Base64, Custom, EncodedURI, URI, UTF16, Uint8Array Classes: Error

Constant Summary collapse

VERSION =

Current version of the LZString gem

"1.0.0".freeze

Constants included from EncodedURI

EncodedURI::KEY_STR_URI_SAFE

Class Method Summary collapse

Methods included from Base64

get_base_value, key_str_base64

Class Method Details

._compress(uncompressed, bits_per_char) {|Integer| ... } ⇒ String

Internal method for compression

Parameters:

  • uncompressed (String)

    Input string to compress

  • bits_per_char (Integer)

    Number of bits per character in the output

Yields:

  • (Integer)

    Block that converts an integer to a character

Yield Parameters:

  • code (Integer)

    Integer to convert to a character

Yield Returns:

  • (String)

    Character representation of the integer

Returns:

  • (String)

    Compressed string



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/lzstring/compress.rb', line 9

def self._compress(uncompressed, bits_per_char)
  return "" if uncompressed.nil? || uncompressed.empty?

  # Force input to UTF-8 encoding
  uncompressed = uncompressed.to_s.dup.force_encoding(Encoding::UTF_8)

  context_dictionary = {}
  context_dictionary_to_create = {}
  context_c = ""
  context_wc = ""
  context_w = ""
  context_enlarge_in = 2 # Compensate for the first entry which should not count
  context_dict_size = 3
  context_num_bits = 2
  context_data = []
  context_data_val = 0
  context_data_position = 0

  # Process each character (handling Unicode correctly)
  uncompressed.each_char do |c|
    context_c = c

    # Add to dictionary if not present
    unless context_dictionary.key?(context_c)
      context_dictionary[context_c] = context_dict_size
      context_dict_size += 1
      context_dictionary_to_create[context_c] = true
    end

    context_wc = context_w + context_c
    if context_dictionary.key?(context_wc)
      context_w = context_wc
    else
      if context_dictionary_to_create.key?(context_w)
        # Get code point value
        code_point = context_w[0].ord

        if code_point < 128
          # ASCII character - output numBits followed by 8 bits
          context_num_bits.times do |_i|
            context_data_val = (context_data_val << 1)
            if context_data_position == bits_per_char - 1
              context_data_position = 0
              context_data.push(yield(context_data_val))
              context_data_val = 0
            else
              context_data_position += 1
            end
          end

          value = code_point
          8.times do |_i|
            context_data_val = (context_data_val << 1) | (value & 1)
            if context_data_position == bits_per_char - 1
              context_data_position = 0
              context_data.push(yield(context_data_val))
              context_data_val = 0
            else
              context_data_position += 1
            end
            value >>= 1
          end
        else
          # Unicode character - output numBits with flag=1, followed by 16 bits
          value = 1
          context_num_bits.times do |_i|
            context_data_val = (context_data_val << 1) | value
            if context_data_position == bits_per_char - 1
              context_data_position = 0
              context_data.push(yield(context_data_val))
              context_data_val = 0
            else
              context_data_position += 1
            end
            value = 0
          end

          value = code_point
          16.times do |_i|
            context_data_val = (context_data_val << 1) | (value & 1)
            if context_data_position == bits_per_char - 1
              context_data_position = 0
              context_data.push(yield(context_data_val))
              context_data_val = 0
            else
              context_data_position += 1
            end
            value >>= 1
          end
        end

        context_enlarge_in -= 1
        if context_enlarge_in.zero?
          context_enlarge_in = 1 << context_num_bits # Math.pow(2, context_numBits)
          context_num_bits += 1
        end

        context_dictionary_to_create.delete(context_w)
      else
        value = context_dictionary[context_w]
        context_num_bits.times do |_i|
          context_data_val = (context_data_val << 1) | (value & 1)
          if context_data_position == bits_per_char - 1
            context_data_position = 0
            context_data.push(yield(context_data_val))
            context_data_val = 0
          else
            context_data_position += 1
          end
          value >>= 1
        end
      end

      context_enlarge_in -= 1
      if context_enlarge_in.zero?
        context_enlarge_in = 1 << context_num_bits # Math.pow(2, context_numBits)
        context_num_bits += 1
      end

      # Add wc to the dictionary
      context_dictionary[context_wc] = context_dict_size
      context_dict_size += 1
      context_w = context_c
    end
  end

  # Output the code for w
  if context_w != ""
    if context_dictionary_to_create.key?(context_w)
      # Get code point value
      code_point = context_w[0].ord

      if code_point < 128
        # ASCII character
        context_num_bits.times do |_i|
          context_data_val = (context_data_val << 1)
          if context_data_position == bits_per_char - 1
            context_data_position = 0
            context_data.push(yield(context_data_val))
            context_data_val = 0
          else
            context_data_position += 1
          end
        end

        value = code_point
        8.times do |_i|
          context_data_val = (context_data_val << 1) | (value & 1)
          if context_data_position == bits_per_char - 1
            context_data_position = 0
            context_data.push(yield(context_data_val))
            context_data_val = 0
          else
            context_data_position += 1
          end
          value >>= 1
        end
      else
        # Unicode character
        value = 1
        context_num_bits.times do |_i|
          context_data_val = (context_data_val << 1) | value
          if context_data_position == bits_per_char - 1
            context_data_position = 0
            context_data.push(yield(context_data_val))
            context_data_val = 0
          else
            context_data_position += 1
          end
          value = 0
        end

        value = code_point
        16.times do |_i|
          context_data_val = (context_data_val << 1) | (value & 1)
          if context_data_position == bits_per_char - 1
            context_data_position = 0
            context_data.push(yield(context_data_val))
            context_data_val = 0
          else
            context_data_position += 1
          end
          value >>= 1
        end
      end

      context_enlarge_in -= 1
      if context_enlarge_in.zero?
        context_enlarge_in = 1 << context_num_bits # Math.pow(2, context_numBits)
        context_num_bits += 1
      end

      context_dictionary_to_create.delete(context_w)
    else
      value = context_dictionary[context_w]
      context_num_bits.times do |_i|
        context_data_val = (context_data_val << 1) | (value & 1)
        if context_data_position == bits_per_char - 1
          context_data_position = 0
          context_data.push(yield(context_data_val))
          context_data_val = 0
        else
          context_data_position += 1
        end
        value >>= 1
      end
    end

    context_enlarge_in -= 1
    if context_enlarge_in.zero?
      context_enlarge_in = 1 << context_num_bits # Math.pow(2, context_numBits)
      context_num_bits += 1
    end
  end

  # Mark the end of the stream
  value = 2
  context_num_bits.times do |_i|
    context_data_val = (context_data_val << 1) | (value & 1)
    if context_data_position == bits_per_char - 1
      context_data_position = 0
      context_data.push(yield(context_data_val))
      context_data_val = 0
    else
      context_data_position += 1
    end
    value >>= 1
  end

  # Flush the last char
  loop do
    context_data_val = (context_data_val << 1)
    if context_data_position == bits_per_char - 1
      context_data.push(yield(context_data_val))
      break
    else
      context_data_position += 1
    end
  end

  context_data.join
end

._decompress(length, reset_value, &get_next_value) ⇒ String

Internal method for decompression

Parameters:

  • length (Integer)

    The length of compressed string

  • reset_value (Integer)

    The buffer size used for decompression

  • get_next_value (Block)

    Block that returns character code of character at given position

Returns:

  • (String)

    Decompressed string



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/lzstring/decompress.rb', line 7

def self._decompress(length, reset_value, &get_next_value)
  return "" if length.zero?

  begin
    dictionary = {}
    enlarge_in = 4
    dict_size = 4
    num_bits = 3
    entry = ""
    result = []

    # Initialize dictionary with first 3 entries
    3.times do |i|
      dictionary[i] = i.to_s
    end

    # Feed bits in, 1 at a time
    data_val = get_next_value.call(0)
    data_position = reset_value
    data_index = 1

    # Extract first code as a character
    bits = 0
    max_power = 2**2
    power = 1

    while power != max_power
      resb = data_val & data_position
      data_position >>= 1
      if data_position.zero?
        data_position = reset_value
        data_val = get_next_value.call(data_index)
        data_index += 1
      end

      bits |= (resb.positive? ? 1 : 0) * power
      power <<= 1
    end

    c = nil

    case bits
    when 0
      bits = 0
      max_power = 2**8
      power = 1

      while power != max_power
        resb = data_val & data_position
        data_position >>= 1
        if data_position.zero?
          data_position = reset_value
          data_val = get_next_value.call(data_index)
          data_index += 1
        end

        bits |= (resb.positive? ? 1 : 0) * power
        power <<= 1
      end

      # Convert to proper UTF-8 character
      c = begin
        bits.chr(Encoding::UTF_8)
      rescue
        "?"
      end
    when 1
      bits = 0
      max_power = 2**16
      power = 1

      while power != max_power
        resb = data_val & data_position
        data_position >>= 1
        if data_position.zero?
          data_position = reset_value
          data_val = get_next_value.call(data_index)
          data_index += 1
        end

        bits |= (resb.positive? ? 1 : 0) * power
        power <<= 1
      end

      # Convert to proper UTF-8 character
      c = begin
        bits.chr(Encoding::UTF_8)
      rescue
        "?"
      end
    when 2
      return ""
    end

    w = c
    result.push(c)
    dictionary[3] = c

    loop do
      return result.join.force_encoding(Encoding::UTF_8) if data_index > length

      # Read in bits for next code
      bits = 0
      max_power = 2**num_bits
      power = 1

      while power != max_power
        resb = data_val & data_position
        data_position >>= 1
        if data_position.zero?
          data_position = reset_value
          data_val = get_next_value.call(data_index)
          data_index += 1
        end

        bits |= (resb.positive? ? 1 : 0) * power
        power <<= 1
      end

      c = bits

      case c
      when 0
        bits = 0
        max_power = 2**8
        power = 1

        while power != max_power
          resb = data_val & data_position
          data_position >>= 1
          if data_position.zero?
            data_position = reset_value
            data_val = get_next_value.call(data_index)
            data_index += 1
          end

          bits |= (resb.positive? ? 1 : 0) * power
          power <<= 1
        end

        # Store character in dictionary
        begin
          dictionary[dict_size] = bits.chr(Encoding::UTF_8)
        rescue
          dictionary[dict_size] = "?"
        end
        dict_size += 1
        c = dict_size - 1
        enlarge_in -= 1
      when 1
        bits = 0
        max_power = 2**16
        power = 1

        while power != max_power
          resb = data_val & data_position
          data_position >>= 1
          if data_position.zero?
            data_position = reset_value
            data_val = get_next_value.call(data_index)
            data_index += 1
          end

          bits |= (resb.positive? ? 1 : 0) * power
          power <<= 1
        end

        # Store Unicode character in dictionary
        begin
          dictionary[dict_size] = bits.chr(Encoding::UTF_8)
        rescue
          dictionary[dict_size] = "?"
        end
        dict_size += 1
        c = dict_size - 1
        enlarge_in -= 1
      when 2
        # Final processing of result
        return result.join.force_encoding(Encoding::UTF_8)
      end

      if enlarge_in.zero?
        enlarge_in = 2**num_bits
        num_bits += 1
      end

      if dictionary[c]
        entry = dictionary[c]
      elsif c == dict_size
        entry = w + w[0]
      else
        return nil
      end

      result.push(entry)

      # Add w+entry[0] to the dictionary
      dictionary[dict_size] = w + entry[0]
      dict_size += 1
      enlarge_in -= 1

      if enlarge_in.zero?
        enlarge_in = 2**num_bits
        num_bits += 1
      end

      w = entry
    end
  rescue
    # Handle decompression errors gracefully
    nil
  end
end

.compress(input) ⇒ String

Standard compression using raw format

Parameters:

  • input (String)

    String to compress

Returns:

  • (String)

    Compressed string



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/lzstring.rb', line 19

def self.compress(input)
  return "" if input.nil? || input.empty?

  begin
    # Ensure UTF-8 encoding
    input = input.to_s.dup.force_encoding(Encoding::UTF_8)

    # Use 16 bits per character for output
    _compress(input, 16) do |code|
      # Convert integer code to character with fallback for invalid codes

      if code.between?(0, 0x10FFFF)
        code.chr(Encoding::UTF_8)
      else
        # Fallback for invalid code points
        "?"
      end
    rescue RangeError, ArgumentError
      # Fallback to safe character if we can't represent this code point
      "?"
    end
  rescue
    # Return empty string on error, matching JavaScript's behavior
    ""
  end
end

.compress_to_base64(input) ⇒ String

Make module methods available at the class level Compress a string to base64 encoding

Parameters:

  • input (String)

    String to compress

Returns:

  • (String)

    Base64 compressed string



98
99
100
# File 'lib/lzstring/base64.rb', line 98

def self.compress_to_base64(input)
  Base64.compress_to_base64(input)
end

.compress_to_custom(input, key_str) ⇒ String

Make module methods available at the class level Compress a string using a custom character set

Parameters:

  • input (String)

    String to compress

  • key_str (String)

    Custom character set to use for encoding

Returns:

  • (String)

    Compressed string using custom encoding



123
124
125
# File 'lib/lzstring/custom.rb', line 123

def self.compress_to_custom(input, key_str)
  Custom.compress_to_custom(input, key_str)
end

.compress_to_encoded_uri_component(input) ⇒ Object

Make module methods available at the class level



78
79
80
# File 'lib/lzstring/uri.rb', line 78

def self.compress_to_encoded_uri_component(input)
  URI.compress_to_encoded_uri_component(input)
end

.compress_to_uint8_array(input) ⇒ Array<Integer>

Make module methods available at the class level Compress a string to Uint8Array format

Parameters:

  • input (String)

    String to compress

Returns:

  • (Array<Integer>)

    Array of 8-bit integers



136
137
138
# File 'lib/lzstring/uint8_array.rb', line 136

def self.compress_to_uint8_array(input)
  Uint8Array.compress_to_uint8_array(input)
end

.compress_to_utf16(input) ⇒ String

Make module methods available at the class level Compress a string to UTF-16 encoding

Parameters:

  • input (String)

    String to compress

Returns:

  • (String)

    UTF-16 compressed string



92
93
94
# File 'lib/lzstring/utf16.rb', line 92

def self.compress_to_utf16(input)
  UTF16.compress_to_utf16(input)
end

.compressToEncodedURIComponentString

Make module methods available at the class level Compress a string to URI-component safe encoding

Parameters:

  • input (String)

    String to compress

Returns:

  • (String)

    URI-component safe compressed string



91
92
93
# File 'lib/lzstring/uri.rb', line 91

def self.compress_to_encoded_uri_component(input)
  URI.compress_to_encoded_uri_component(input)
end

.convert_from_uint8_array(uint8array, legacy = false) ⇒ String

Convert a Uint8Array to a string

Parameters:

  • uint8array (Array<Integer>)

    Array of 8-bit integers

  • legacy (Boolean) (defaults to: false)

    Whether to use legacy mode

Returns:

  • (String)

    String representation of the Uint8Array



160
161
162
# File 'lib/lzstring/uint8_array.rb', line 160

def self.convert_from_uint8_array(uint8array, legacy = false)
  Uint8Array.convert_from_uint8_array(uint8array, legacy)
end

.convert_to_uint8_array(string, legacy = false) ⇒ Array<Integer>

Convert a string to a Uint8Array

Parameters:

  • string (String)

    String to convert

  • legacy (Boolean) (defaults to: false)

    Whether to use legacy mode

Returns:

  • (Array<Integer>)

    Array of 8-bit integers



152
153
154
# File 'lib/lzstring/uint8_array.rb', line 152

def self.convert_to_uint8_array(string, legacy = false)
  Uint8Array.convert_to_uint8_array(string, legacy)
end

.decompress(compressed_str) ⇒ String?

Standard decompression using raw format

Parameters:

  • compressed_str (String)

    Compressed string

Returns:

  • (String, nil)

    Decompressed string or nil if decompression fails



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/lzstring.rb', line 49

def self.decompress(compressed_str)
  return "" if compressed_str.nil?
  return "" if compressed_str.empty?

  begin
    # Ensure the input is properly encoded
    input = compressed_str.to_s.dup.force_encoding(Encoding::UTF_8)

    # Use 32768 as the reset value, same as JavaScript
    result = _decompress(input.length, 32_768) do |index|
      if index < input.length
        # Get the code point value of the character
        input[index].ord
      else
        0
      end
    end

    # Ensure proper UTF-8 encoding of the result
    if result.is_a?(String)
      # Force UTF-8 encoding
      result.force_encoding(Encoding::UTF_8)

      # Check if the result is valid UTF-8, if not try to repair
      unless result.valid_encoding?
        # Replace invalid sequences with a replacement character
        result = result.encode(Encoding::UTF_8,
                               Encoding::UTF_8,
                               invalid: :replace,
                               undef: :replace,
                               replace: "?")
      end

    end
    result
  rescue
    # Return nil on error, matching JavaScript's behavior
    nil
  end
end

.decompress_from_base64(input) ⇒ String?

Decompress a string from base64 encoding

Parameters:

  • input (String)

    Base64 compressed string

Returns:

  • (String, nil)

    Decompressed string or nil if decompression fails



105
106
107
# File 'lib/lzstring/base64.rb', line 105

def self.decompress_from_base64(input)
  Base64.decompress_from_base64(input)
end

.decompress_from_custom(input, key_str) ⇒ String?

Decompress a string using a custom character set

Parameters:

  • input (String)

    Compressed string using custom encoding

  • key_str (String)

    Custom character set used for encoding

Returns:

  • (String, nil)

    Decompressed string or nil if decompression fails



131
132
133
# File 'lib/lzstring/custom.rb', line 131

def self.decompress_from_custom(input, key_str)
  Custom.decompress_from_custom(input, key_str)
end

.decompress_from_encoded_uri_component(input) ⇒ String?

Decompress a string from URI-component safe encoding

Parameters:

  • input (String)

    URI-component safe compressed string

Returns:

  • (String, nil)

    Decompressed string or nil if decompression fails



85
86
87
# File 'lib/lzstring/uri.rb', line 85

def self.decompress_from_encoded_uri_component(input)
  URI.decompress_from_encoded_uri_component(input)
end

.decompress_from_uint8_array(uint8array, legacy = false) ⇒ String?

Decompress a Uint8Array to a string

Parameters:

  • uint8array (Array<Integer>)

    Array of 8-bit integers

  • legacy (Boolean) (defaults to: false)

    Whether to use legacy mode

Returns:

  • (String, nil)

    Decompressed string or nil if decompression fails



144
145
146
# File 'lib/lzstring/uint8_array.rb', line 144

def self.decompress_from_uint8_array(uint8array, legacy = false)
  Uint8Array.decompress_from_uint8_array(uint8array, legacy)
end

.decompress_from_utf16(input) ⇒ String?

Decompress a string from UTF-16 encoding

Parameters:

  • input (String)

    UTF-16 compressed string

Returns:

  • (String, nil)

    Decompressed string or nil if decompression fails



99
100
101
# File 'lib/lzstring/utf16.rb', line 99

def self.decompress_from_utf16(input)
  UTF16.decompress_from_utf16(input)
end

.decompressFromEncodedURIComponentString?

Decompress a string from URI-component safe encoding

Parameters:

  • input (String)

    URI-component safe compressed string

Returns:

  • (String, nil)

    Decompressed string or nil if decompression fails



92
93
94
# File 'lib/lzstring/uri.rb', line 92

def self.decompress_from_encoded_uri_component(input)
  URI.decompress_from_encoded_uri_component(input)
end