Add errors, use strings

kristjanvalur · kristjanvalur · commit 886819534d40 · 2023-09-16T11:53:48.000Z
diff --git a/tests/resp.py b/tests/resp.py
@@ -6,19 +6,37 @@
 CRNL = b"\r\n"
 
 
-class VerbatimString(bytes):
+class VerbatimStr(str):
     """
     A string that is encoded as a resp3 verbatim string
     """
 
-    def __new__(cls, value: bytes, hint: str) -> "VerbatimString":
-        return bytes.__new__(cls, value)
+    def __new__(cls, value: str, hint: str) -> "VerbatimStr":
+        return str.__new__(cls, value)
 
-    def __init__(self, value: bytes, hint: str) -> None:
+    def __init__(self, value: str, hint: str) -> None:
         self.hint = hint
 
     def __repr__(self) -> str:
-        return f"VerbatimString({super().__repr__()}, {self.hint!r})"
+        return f"VerbatimStr({super().__repr__()}, {self.hint!r})"
+
+
+class ErrorStr(str):
+    """
+    A string to be encoded as a resp3 error
+    """
+
+    def __new__(cls, code: str, value: str) -> "ErrorStr":
+        return str.__new__(cls, value)
+
+    def __init__(self, code: str, value: str) -> None:
+        self.code = code.upper()
+
+    def __repr__(self) -> str:
+        return f"ErrorString({self.code!r}, {super().__repr__()})"
+
+    def __str__(self):
+        return f"{self.code} {super().__str__()}"
 
 
 class PushData(list):
@@ -30,19 +48,39 @@ def __repr__(self) -> str:
         return f"PushData({super().__repr__()})"
 
 
+class Attribute(dict):
+    """
+    A special type of map indicating data from a attribute response
+    """
+
+    def __repr__(self) -> str:
+        return f"Attribute({super().__repr__()})"
+
+
 class RespEncoder:
     """
-    A class for simple RESP protocol encoding for unit tests
+    A class for simple RESP protocol encoder for unit tests
     """
 
-    def __init__(self, protocol: int = 2, encoding: str = "utf-8") -> None:
+    def __init__(self, protocol: int = 2) -> None:
         self.protocol = protocol
-        self.encoding = encoding
+
+    def has_crnl(self, value: bytes) -> bool:
+        """check if either cr or nl is in the value"""
+        return b"\r" in value or b"\n" in value
+
+    def strip_crnl(self, value: bytes) -> bytes:
+        """remove any cr or nl from the value"""
+        return value.replace(b"\r", b"").replace(b"\n", b"")
+
+    def encodestrip(self, value: str) -> bytes:
+        return self.strip_crnl(value.encode())
 
     def encode(self, data: Any, hint: Optional[str] = None) -> bytes:
         if isinstance(data, dict):
             if self.protocol > 2:
-                result = f"%{len(data)}\r\n".encode()
+                code = "|" if isinstance(data, Attribute) else "%"
+                result = f"{code}{len(data)}\r\n".encode()
                 for key, val in data.items():
                     result += self.encode(key) + self.encode(val)
                 return result
@@ -54,10 +92,8 @@ def encode(self, data: Any, hint: Optional[str] = None) -> bytes:
                 return self.encode(mylist)
 
         elif isinstance(data, list):
-            if isinstance(data, PushData) and self.protocol > 2:
-                result = f">{len(data)}\r\n".encode()
-            else:
-                result = f"*{len(data)}\r\n".encode()
+            code = ">" if isinstance(data, PushData) and self.protocol > 2 else "*"
+            result = f"{code}{len(data)}\r\n".encode()
             for val in data:
                 result += self.encode(val)
             return result
@@ -71,11 +107,18 @@ def encode(self, data: Any, hint: Optional[str] = None) -> bytes:
             else:
                 return self.encode(list(data))
 
+        elif isinstance(data, ErrorStr):
+            enc = str(data).encode()
+            if self.protocol > 2:
+                if len(enc) > 80 or self.has_crnl(enc):
+                    return f"!{len(enc)}\r\n".encode() + enc + b"\r\n"
+            return b"-" + self.strip_crnl(enc) + b"\r\n"
+
         elif isinstance(data, str):
-            enc = data.encode(self.encoding)
+            enc = data.encode()
             # long strings or strings with control characters must be encoded as bulk
             # strings
-            if hint or len(enc) > 20 or b"\r" in enc or b"\n" in enc:
+            if hint or len(enc) > 80 or self.has_crnl(enc):
                 return self.encode_bulkstr(enc, hint)
             return b"+" + enc + b"\r\n"
 
@@ -158,7 +201,7 @@ def resp_parse(
 
     elif code == b"+":  # simple string
         # we decode them automatically
-        yield arg.decode(), rest
+        yield arg.decode(errors="surrogateescape"), rest
 
     elif code == b"$":  # bulk string
         count = int(arg)
@@ -168,8 +211,9 @@ def resp_parse(
             assert incoming is not None
             rest += incoming
         bulkstr = rest[:count]
-        # bulk strings are not decoded, could contain binary data
-        yield bulkstr, rest[expect:]
+        # we decode them automatically.  Can be encoded
+        # back to binary if necessary with "surrogatescape"
+        yield bulkstr.decode(errors="surrogateescape"), rest[expect:]
 
     elif code == b"=":  # verbatim strings
         count = int(arg)
@@ -179,9 +223,9 @@ def resp_parse(
             assert incoming is not None
             rest += incoming
         hint = rest[:3]
-        result = rest[4 : (count + 4)]
-        # verbatim strings are not decoded, could contain binary data
-        yield VerbatimString(result, hint.decode()), rest[expect:]
+        result = rest[4: (count + 4)]
+        yield VerbatimStr(result.decode(errors="surrogateescape"),
+                          hint.decode()), rest[expect:]
 
     elif code in b"*>":  # array or push data
         count = int(arg)
@@ -214,7 +258,7 @@ def resp_parse(
             result_set.add(value)
         yield result_set, rest
 
-    elif code == b"%":  # map
+    elif code in b"%|":  # map or attribute
         count = int(arg)
         result_map = {}
         for _ in range(count):
@@ -232,10 +276,29 @@ def resp_parse(
                     parsed = parser.send(incoming)
             value, rest = parsed
             result_map[key] = value
+        if code == b"|":
+            yield Attribute(result_map), rest
         yield result_map, rest
+
+    elif code == b"-":  # error
+        # we decode them automatically
+        decoded = arg.decode(errors="surrogateescape")
+        code, value = decoded.split(" ", 1)
+        yield ErrorStr(code, value), rest
+
+    elif code == b"!":  # resp3 error
+        count = int(arg)
+        expect = count + 2  # +2 for the trailing CRNL
+        while len(rest) < expect:
+            incoming = yield (None)
+            assert incoming is not None
+            rest += incoming
+        bulkstr = rest[:count]
+        decoded = bulkstr.decode(errors="surrogateescape")
+        code, value = decoded.split(" ", 1)
+        yield ErrorStr(code, value), rest[expect:]
+
     else:
-        if code in b"-!":
-            raise NotImplementedError(f"resp opcode '{code.decode()}' not implemented")
         raise ValueError(f"Unknown opcode '{code.decode()}'")
 
 
diff --git a/tests/test_resp.py b/tests/test_resp.py
@@ -1,6 +1,14 @@
 import pytest
 
-from .resp import PushData, VerbatimString, encode, parse_all, parse_chunks
+from .resp import (
+    Attribute,
+    ErrorStr,
+    PushData,
+    VerbatimStr,
+    encode,
+    parse_all,
+    parse_chunks,
+)
 
 
 @pytest.fixture(params=[2, 3])
@@ -13,9 +21,9 @@ def test_simple_str(self):
         assert encode("foo") == b"+foo\r\n"
 
     def test_long_str(self):
-        text = "fooling around with the sword in the mud"
-        assert len(text) == 40
-        assert encode(text) == b"$40\r\n" + text.encode() + b"\r\n"
+        text = 3 * "fooling around with the sword in the mud"
+        assert len(text) == 120
+        assert encode(text) == b"$120\r\n" + text.encode() + b"\r\n"
 
     # test strings with control characters
     def test_str_with_ctrl_chars(self):
@@ -66,6 +74,13 @@ def test_map(self, resp_version):
         else:
             assert data == b"%2\r\n:1\r\n:2\r\n:3\r\n:4\r\n"
 
+    def test_attribute(self, resp_version):
+        data = encode(Attribute({1: 2, 3: 4}), protocol=resp_version)
+        if resp_version == 2:
+            assert data == b"*4\r\n:1\r\n:2\r\n:3\r\n:4\r\n"
+        else:
+            assert data == b"|2\r\n:1\r\n:2\r\n:3\r\n:4\r\n"
+
     def test_nested_array(self):
         assert encode([1, [2, 3]]) == b"*2\r\n:1\r\n*2\r\n:2\r\n:3\r\n"
 
@@ -103,6 +118,14 @@ def test_bool(self, resp_version):
         else:
             assert data == b"f\r\n"
 
+    def test_errorstr(self, resp_version):
+        err = ErrorStr("foo", "bar\r\nbaz")
+        data = encode(err, protocol=resp_version)
+        if resp_version == 2:
+            assert data == b"-FOO barbaz\r\n"
+        else:
+            assert data == b"!12\r\nFOO bar\r\nbaz\r\n"
+
 
 @pytest.mark.parametrize("chunk_size", [0, 1, 2, -2])
 class TestParser:
@@ -111,7 +134,7 @@ def breakup_bytes(self, data, chunk_size=2):
         if chunk_size < 0:
             insert_empty = True
             chunk_size = -chunk_size
-        chunks = [data[i : i + chunk_size] for i in range(0, len(data), chunk_size)]
+        chunks = [data[i: i + chunk_size] for i in range(0, len(data), chunk_size)]
         if insert_empty:
             empty = len(chunks) * [b""]
             chunks = [item for pair in zip(chunks, empty) for item in pair]
@@ -154,7 +177,7 @@ def test_incomplete_list(self, chunk_size):
     def test_invalid_token(self, chunk_size):
         with pytest.raises(ValueError):
             self.parse_data(chunk_size, b")foo\r\n")
-        with pytest.raises(NotImplementedError):
+        with pytest.raises(ValueError):
             self.parse_data(chunk_size, b"!foo\r\n")
 
     def test_multiple_ints(self, chunk_size):
@@ -185,12 +208,30 @@ def test_simple_string(self, chunk_size):
 
     def test_bulk_string(self, chunk_size):
         parsed = parse_all(b"$3\r\nfoo\r\nbar")
-        assert parsed == ([b"foo"], b"bar")
+        assert parsed == (["foo"], b"bar")
 
     def test_bulk_string_with_ctrl_chars(self, chunk_size):
         parsed = self.parse_data(chunk_size, b"$8\r\nfoo\r\nbar\r\n")
-        assert parsed == ([b"foo\r\nbar"], b"")
+        assert parsed == (["foo\r\nbar"], b"")
 
-    def test_verbatim_string(self, chunk_size):
+    def test_verbatimstr(self, chunk_size):
         parsed = self.parse_data(chunk_size, b"=3\r\ntxt:foo\r\nbar")
-        assert parsed == ([VerbatimString(b"foo", "txt")], b"bar")
+        assert parsed == ([VerbatimStr("foo", "txt")], b"bar")
+
+    def test_errorstr(self, chunk_size):
+        parsed = self.parse_data(chunk_size, b"-FOO bar\r\nbaz")
+        assert parsed == ([ErrorStr("foo", "bar")], b"baz")
+
+    def test_errorstr_resp3(self, chunk_size):
+        parsed = self.parse_data(chunk_size, b"!12\r\nFOO bar\r\nbaz\r\n")
+        assert parsed == ([ErrorStr("foo", "bar\r\nbaz")], b"")
+
+    def test_attribute_map(self, chunk_size):
+        parsed = self.parse_data(chunk_size, b"|2\r\n:1\r\n:2\r\n:3\r\n:4\r\n")
+        assert parsed == ([Attribute({1: 2, 3: 4})], b"")
+
+    def test_surrogateescape(self, chunk_size):
+        data = b"foo\xff"
+        parsed = self.parse_data(chunk_size, b"$4\r\n" + data + b"\r\nbar")
+        assert parsed == ([data.decode(errors="surrogateescape")], b"bar")
+        assert parsed[0][0].encode("utf-8", "surrogateescape") == data