From c4f3a02b3bd3709fd0e07ed07b372b3d5ec3815d Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Tue, 6 Aug 2019 07:38:49 -0500 Subject: [PATCH] makeqstrdata: permit longer "compressed" outputs It is possible for this routine to expand some inputs, and in fact it does for certan strings in the proposed Korean translation of CircuitPython (#1858). I did not determine what the maximum expansion is -- it's probably modest, like len()/7+2 bytes or something -- so I tried to just make enc[] an adequate over-allocation, and then ensured that all the strings in the proposed ko.po now worked. The worst actual expansion seems to be a string that goes from 65 UTF-8-encoded bytes to 68 compressed bytes (+4.6%). Only a few out of all strings are reported as non-compressed. --- py/makeqstrdata.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 5b5ec1c37..8e3835d86 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -180,7 +180,7 @@ def compress(encoding_table, decompressed): if not isinstance(decompressed, bytes): raise TypeError() values, lengths = encoding_table - enc = bytearray(len(decompressed)) + enc = bytearray(len(decompressed) * 2) #print(decompressed) #print(lengths) current_bit = 7 @@ -227,6 +227,8 @@ def compress(encoding_table, decompressed): current_bit -= 1 if current_bit != 7: current_byte += 1 + if current_byte > len(decompressed): + print("Note: compression increased length", repr(decompressed.decode('utf-8')), len(decompressed), current_byte, file=sys.stderr) return enc[:current_byte] def qstr_escape(qst):