|
|
|
@ -103,14 +103,10 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
|
|
|
|
|
# go through each qstr and print it out
|
|
|
|
|
for _, _, qstr in qstrs.values():
|
|
|
|
|
all_strings.append(qstr)
|
|
|
|
|
all_strings_concat = "".join(all_strings).encode("utf-8")
|
|
|
|
|
all_strings_concat = "".join(all_strings)
|
|
|
|
|
counts = collections.Counter(all_strings_concat)
|
|
|
|
|
# add other values
|
|
|
|
|
for i in range(256):
|
|
|
|
|
if i not in counts:
|
|
|
|
|
counts[i] = 0
|
|
|
|
|
cb = huffman.codebook(counts.items())
|
|
|
|
|
values = bytearray()
|
|
|
|
|
values = []
|
|
|
|
|
length_count = {}
|
|
|
|
|
renumbered = 0
|
|
|
|
|
last_l = None
|
|
|
|
@ -124,26 +120,27 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
|
|
|
|
|
if last_l:
|
|
|
|
|
renumbered <<= (l - last_l)
|
|
|
|
|
canonical[ch] = '{0:0{width}b}'.format(renumbered, width=l)
|
|
|
|
|
if chr(ch) in C_ESCAPES:
|
|
|
|
|
s = C_ESCAPES[chr(ch)]
|
|
|
|
|
else:
|
|
|
|
|
s = chr(ch)
|
|
|
|
|
print("//", ch, s, counts[ch], canonical[ch], renumbered)
|
|
|
|
|
s = C_ESCAPES.get(ch, ch)
|
|
|
|
|
print("//", ord(ch), s, counts[ch], canonical[ch], renumbered)
|
|
|
|
|
renumbered += 1
|
|
|
|
|
last_l = l
|
|
|
|
|
lengths = bytearray()
|
|
|
|
|
for i in range(1, max(length_count) + 1):
|
|
|
|
|
print("// length count", length_count)
|
|
|
|
|
for i in range(1, max(length_count) + 2):
|
|
|
|
|
lengths.append(length_count.get(i, 0))
|
|
|
|
|
print("// values", values, "lengths", len(lengths), lengths)
|
|
|
|
|
print("// estimated total memory size", len(lengths) + 2*len(values) + sum(len(cb[u]) for u in all_strings_concat))
|
|
|
|
|
print("//", values, lengths)
|
|
|
|
|
values_type = "uint16_t" if max(ord(u) for u in values) > 255 else "uint8_t"
|
|
|
|
|
with open(compression_filename, "w") as f:
|
|
|
|
|
f.write("const uint8_t lengths[] = {{ {} }};\n".format(", ".join(map(str, lengths))))
|
|
|
|
|
f.write("const uint8_t values[256] = {{ {} }};\n".format(", ".join(map(str, values))))
|
|
|
|
|
f.write("const {} values[] = {{ {} }};\n".format(values_type, ", ".join(str(ord(u)) for u in values)))
|
|
|
|
|
return values, lengths
|
|
|
|
|
|
|
|
|
|
def decompress(encoding_table, length, encoded):
|
|
|
|
|
values, lengths = encoding_table
|
|
|
|
|
#print(l, encoded)
|
|
|
|
|
dec = bytearray(length)
|
|
|
|
|
dec = []
|
|
|
|
|
this_byte = 0
|
|
|
|
|
this_bit = 7
|
|
|
|
|
b = encoded[this_byte]
|
|
|
|
@ -173,14 +170,14 @@ def decompress(encoding_table, length, encoded):
|
|
|
|
|
searched_length += lengths[bit_length]
|
|
|
|
|
|
|
|
|
|
v = values[searched_length + bits - max_code]
|
|
|
|
|
dec[i] = v
|
|
|
|
|
return dec
|
|
|
|
|
dec.append(v)
|
|
|
|
|
return ''.join(dec)
|
|
|
|
|
|
|
|
|
|
def compress(encoding_table, decompressed):
|
|
|
|
|
if not isinstance(decompressed, bytes):
|
|
|
|
|
if not isinstance(decompressed, str):
|
|
|
|
|
raise TypeError()
|
|
|
|
|
values, lengths = encoding_table
|
|
|
|
|
enc = bytearray(len(decompressed) * 2)
|
|
|
|
|
enc = bytearray(len(decompressed) * 3)
|
|
|
|
|
#print(decompressed)
|
|
|
|
|
#print(lengths)
|
|
|
|
|
current_bit = 7
|
|
|
|
@ -228,7 +225,7 @@ def compress(encoding_table, decompressed):
|
|
|
|
|
if current_bit != 7:
|
|
|
|
|
current_byte += 1
|
|
|
|
|
if current_byte > len(decompressed):
|
|
|
|
|
print("Note: compression increased length", repr(decompressed.decode('utf-8')), len(decompressed), current_byte, file=sys.stderr)
|
|
|
|
|
print("Note: compression increased length", repr(decompressed), len(decompressed), current_byte, file=sys.stderr)
|
|
|
|
|
return enc[:current_byte]
|
|
|
|
|
|
|
|
|
|
def qstr_escape(qst):
|
|
|
|
@ -347,9 +344,9 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
|
|
|
|
|
total_text_compressed_size = 0
|
|
|
|
|
for original, translation in i18ns:
|
|
|
|
|
translation_encoded = translation.encode("utf-8")
|
|
|
|
|
compressed = compress(encoding_table, translation_encoded)
|
|
|
|
|
compressed = compress(encoding_table, translation)
|
|
|
|
|
total_text_compressed_size += len(compressed)
|
|
|
|
|
decompressed = decompress(encoding_table, len(translation_encoded), compressed).decode("utf-8")
|
|
|
|
|
decompressed = decompress(encoding_table, len(translation_encoded), compressed)
|
|
|
|
|
for c in C_ESCAPES:
|
|
|
|
|
decompressed = decompressed.replace(c, C_ESCAPES[c])
|
|
|
|
|
print("TRANSLATION(\"{}\", {}, {{ {} }}) // {}".format(original, len(translation_encoded)+1, ", ".join(["0x{:02x}".format(x) for x in compressed]), decompressed))
|
|
|
|
|