From 6e9f8035a1a49ac096bc2eae6ec4637b48e29048 Mon Sep 17 00:00:00 2001
From: Felix Lange <fjl@twurst.com>
Date: Tue, 14 Apr 2015 16:09:06 +0200
Subject: [PATCH] rlp: stricter validation of canonical integer format

All integers (including size information in type tags) need to be
encoded using the smallest possible encoding. This commit expands the
stricter validation introduced for *big.Int in commit 59597d23a5ee268
to all integer types and size tags.
---
 rlp/decode.go      | 97 ++++++++++++++++++++++++++++++----------------
 rlp/decode_test.go | 38 ++++++++++++++----
 2 files changed, 93 insertions(+), 42 deletions(-)

diff --git a/rlp/decode.go b/rlp/decode.go
index 42be31a2d..6bd13aa8f 100644
--- a/rlp/decode.go
+++ b/rlp/decode.go
@@ -109,7 +109,9 @@ func (err *decodeError) Error() string {
 func wrapStreamError(err error, typ reflect.Type) error {
 	switch err {
 	case ErrCanonInt:
-		return &decodeError{msg: "canon int error appends zero's", typ: typ}
+		return &decodeError{msg: "non-canonical integer (leading zero bytes)", typ: typ}
+	case ErrCanonSize:
+		return &decodeError{msg: "non-canonical size information", typ: typ}
 	case ErrExpectedList:
 		return &decodeError{msg: "expected input list", typ: typ}
 	case ErrExpectedString:
@@ -195,12 +197,10 @@ func decodeBigInt(s *Stream, val reflect.Value) error {
 		i = new(big.Int)
 		val.Set(reflect.ValueOf(i))
 	}
-
-	// Reject big integers which are zero appended
+	// Reject leading zero bytes
 	if len(b) > 0 && b[0] == 0 {
 		return wrapStreamError(ErrCanonInt, val.Type())
 	}
-
 	i.SetBytes(b)
 	return nil
 }
@@ -270,7 +270,7 @@ func decodeListSlice(s *Stream, val reflect.Value, elemdec decoder) error {
 func decodeListArray(s *Stream, val reflect.Value, elemdec decoder) error {
 	size, err := s.List()
 	if err != nil {
-		return err
+		return wrapStreamError(err, val.Type())
 	}
 	if size == 0 {
 		zero(val, 0)
@@ -474,10 +474,11 @@ var (
 	// has been reached during streaming.
 	EOL = errors.New("rlp: end of list")
 
-	// Other errors
+	// Actual Errors
 	ErrExpectedString = errors.New("rlp: expected String or Byte")
 	ErrExpectedList   = errors.New("rlp: expected List")
-	ErrCanonInt       = errors.New("rlp: expected Int")
+	ErrCanonInt       = errors.New("rlp: non-canonical (leading zero bytes) integer")
+	ErrCanonSize      = errors.New("rlp: non-canonical size information")
 	ErrElemTooLarge   = errors.New("rlp: element is larger than containing list")
 	ErrValueTooLarge  = errors.New("rlp: value size exceeds available input length")
 
@@ -519,6 +520,7 @@ type Stream struct {
 	kind    Kind   // kind of value ahead
 	size    uint64 // size of value ahead
 	byteval byte   // value of single byte in type tag
+	kinderr error  // error from last readKind
 	stack   []listpos
 }
 
@@ -619,13 +621,21 @@ func (s *Stream) uint(maxbits int) (uint64, error) {
 	}
 	switch kind {
 	case Byte:
+		if s.byteval == 0 {
+			return 0, ErrCanonInt
+		}
 		s.kind = -1 // rearm Kind
 		return uint64(s.byteval), nil
 	case String:
 		if size > uint64(maxbits/8) {
 			return 0, errUintOverflow
 		}
-		return s.readUint(byte(size))
+		v, err := s.readUint(byte(size))
+		if err == ErrCanonSize {
+			// Adjust error because we're not reading a size right now.
+			err = ErrCanonInt
+		}
+		return v, err
 	default:
 		return 0, ErrExpectedString
 	}
@@ -729,6 +739,7 @@ func (s *Stream) Reset(r io.Reader, inputLimit uint64) {
 	s.stack = s.stack[:0]
 	s.size = 0
 	s.kind = -1
+	s.kinderr = nil
 	if s.uintbuf == nil {
 		s.uintbuf = make([]byte, 8)
 	}
@@ -751,32 +762,31 @@ func (s *Stream) Kind() (kind Kind, size uint64, err error) {
 		tos = &s.stack[len(s.stack)-1]
 	}
 	if s.kind < 0 {
+		s.kinderr = nil
 		// Don't read further if we're at the end of the
 		// innermost list.
 		if tos != nil && tos.pos == tos.size {
 			return 0, 0, EOL
 		}
-		kind, size, err = s.readKind()
-		if err != nil {
-			return 0, 0, err
-		}
-		s.kind, s.size = kind, size
-	}
-	// Make sure size is reasonable. This is done always
-	// so Kind returns the same error when called multiple times.
-	if tos == nil {
-		// At toplevel, check that the value is smaller
-		// than the remaining input length.
-		if s.limited && s.size > s.remaining {
-			return 0, 0, ErrValueTooLarge
-		}
-	} else {
-		// Inside a list, check that the value doesn't overflow the list.
-		if s.size > tos.size-tos.pos {
-			return 0, 0, ErrElemTooLarge
+		s.kind, s.size, s.kinderr = s.readKind()
+		if s.kinderr == nil {
+			if tos == nil {
+				// At toplevel, check that the value is smaller
+				// than the remaining input length.
+				if s.limited && s.size > s.remaining {
+					s.kinderr = ErrValueTooLarge
+				}
+			} else {
+				// Inside a list, check that the value doesn't overflow the list.
+				if s.size > tos.size-tos.pos {
+					s.kinderr = ErrElemTooLarge
+				}
+			}
 		}
 	}
-	return s.kind, s.size, nil
+	// Note: this might return a sticky error generated
+	// by an earlier call to readKind.
+	return s.kind, s.size, s.kinderr
 }
 
 func (s *Stream) readKind() (kind Kind, size uint64, err error) {
@@ -805,6 +815,9 @@ func (s *Stream) readKind() (kind Kind, size uint64, err error) {
 		// would be encoded as 0xB90400 followed by the string. The range of
 		// the first byte is thus [0xB8, 0xBF].
 		size, err = s.readUint(b - 0xB7)
+		if err == nil && size < 56 {
+			err = ErrCanonSize
+		}
 		return String, size, err
 	case b < 0xF8:
 		// If the total payload of a list
@@ -821,24 +834,40 @@ func (s *Stream) readKind() (kind Kind, size uint64, err error) {
 		// the concatenation of the RLP encodings of the items. The
 		// range of the first byte is thus [0xF8, 0xFF].
 		size, err = s.readUint(b - 0xF7)
+		if err == nil && size < 56 {
+			err = ErrCanonSize
+		}
 		return List, size, err
 	}
 }
 
 func (s *Stream) readUint(size byte) (uint64, error) {
-	if size == 1 {
+	switch size {
+	case 0:
+		s.kind = -1 // rearm Kind
+		return 0, nil
+	case 1:
 		b, err := s.readByte()
 		if err == io.EOF {
 			err = io.ErrUnexpectedEOF
 		}
 		return uint64(b), err
+	default:
+		start := int(8 - size)
+		for i := 0; i < start; i++ {
+			s.uintbuf[i] = 0
+		}
+		if err := s.readFull(s.uintbuf[start:]); err != nil {
+			return 0, err
+		}
+		if s.uintbuf[start] == 0 {
+			// Note: readUint is also used to decode integer
+			// values. The error needs to be adjusted to become
+			// ErrCanonInt in this case.
+			return 0, ErrCanonSize
+		}
+		return binary.BigEndian.Uint64(s.uintbuf), nil
 	}
-	start := int(8 - size)
-	for i := 0; i < start; i++ {
-		s.uintbuf[i] = 0
-	}
-	err := s.readFull(s.uintbuf[start:])
-	return binary.BigEndian.Uint64(s.uintbuf), err
 }
 
 func (s *Stream) readFull(buf []byte) (err error) {
diff --git a/rlp/decode_test.go b/rlp/decode_test.go
index e5c7f3761..aa410de92 100644
--- a/rlp/decode_test.go
+++ b/rlp/decode_test.go
@@ -21,16 +21,11 @@ func TestStreamKind(t *testing.T) {
 		{"7F", Byte, 0},
 		{"80", String, 0},
 		{"B7", String, 55},
-		{"B800", String, 0},
 		{"B90400", String, 1024},
-		{"BA000400", String, 1024},
-		{"BB00000400", String, 1024},
 		{"BFFFFFFFFFFFFFFFFF", String, ^uint64(0)},
 		{"C0", List, 0},
 		{"C8", List, 8},
 		{"F7", List, 55},
-		{"F800", List, 0},
-		{"F804", List, 4},
 		{"F90400", List, 1024},
 		{"FFFFFFFFFFFFFFFFFF", List, ^uint64(0)},
 	}
@@ -85,7 +80,7 @@ func TestStreamErrors(t *testing.T) {
 		string
 		calls
 		newStream func([]byte) *Stream // uses bytes.Reader if nil
-		error
+		error     error
 	}{
 		{"C0", calls{"Bytes"}, nil, ErrExpectedString},
 		{"C0", calls{"Uint"}, nil, ErrExpectedString},
@@ -98,6 +93,21 @@ func TestStreamErrors(t *testing.T) {
 		{"00", calls{"ListEnd"}, nil, errNotInList},
 		{"C401020304", calls{"List", "Uint", "ListEnd"}, nil, errNotAtEOL},
 
+		// Leading zero bytes are rejected when reading integers.
+		{"00", calls{"Uint"}, nil, ErrCanonInt},
+		{"820002", calls{"Uint"}, nil, ErrCanonInt},
+
+		// Size tags must use the smallest possible encoding.
+		// Leading zero bytes in the size tag are also rejected.
+		{"B800", calls{"Kind"}, withoutInputLimit, ErrCanonSize},
+		{"B90000", calls{"Kind"}, withoutInputLimit, ErrCanonSize},
+		{"B90055", calls{"Kind"}, withoutInputLimit, ErrCanonSize},
+		{"BA0002FFFF", calls{"Bytes"}, withoutInputLimit, ErrCanonSize},
+		{"F800", calls{"Kind"}, withoutInputLimit, ErrCanonSize},
+		{"F90000", calls{"Kind"}, withoutInputLimit, ErrCanonSize},
+		{"F90055", calls{"Kind"}, withoutInputLimit, ErrCanonSize},
+		{"FA0002FFFF", calls{"List"}, withoutInputLimit, ErrCanonSize},
+
 		// Expected EOF
 		{"", calls{"Kind"}, nil, io.EOF},
 		{"", calls{"Uint"}, nil, io.EOF},
@@ -170,10 +180,12 @@ testfor:
 					want = test.error.Error()
 				}
 				if err != want {
+					t.Log(test)
 					t.Errorf("test %d: last call (%s) error mismatch\ngot:  %s\nwant: %s",
 						i, call, err, test.error)
 				}
 			} else if err != "<nil>" {
+				t.Log(test)
 				t.Errorf("test %d: call %d (%s) unexpected error: %q", i, j, call, err)
 				continue testfor
 			}
@@ -289,15 +301,20 @@ var decodeTests = []decodeTest{
 	{input: "8405050505", ptr: new(uint32), value: uint32(0x05050505)},
 	{input: "850505050505", ptr: new(uint32), error: "rlp: input string too long for uint32"},
 	{input: "C0", ptr: new(uint32), error: "rlp: expected input string or byte for uint32"},
+	{input: "00", ptr: new(uint32), error: "rlp: non-canonical integer (leading zero bytes) for uint32"},
+	{input: "820004", ptr: new(uint32), error: "rlp: non-canonical integer (leading zero bytes) for uint32"},
+	{input: "B8020004", ptr: new(uint32), error: "rlp: non-canonical size information for uint32"},
 
 	// slices
 	{input: "C0", ptr: new([]uint), value: []uint{}},
 	{input: "C80102030405060708", ptr: new([]uint), value: []uint{1, 2, 3, 4, 5, 6, 7, 8}},
+	{input: "F8020004", ptr: new([]uint), error: "rlp: non-canonical size information for []uint"},
 
 	// arrays
 	{input: "C0", ptr: new([5]uint), value: [5]uint{}},
 	{input: "C50102030405", ptr: new([5]uint), value: [5]uint{1, 2, 3, 4, 5}},
 	{input: "C6010203040506", ptr: new([5]uint), error: "rlp: input list has too many elements for [5]uint"},
+	{input: "F8020004", ptr: new([5]uint), error: "rlp: non-canonical size information for [5]uint"},
 
 	// byte slices
 	{input: "01", ptr: new([]byte), value: []byte{1}},
@@ -352,7 +369,7 @@ var decodeTests = []decodeTest{
 	// big ints
 	{input: "01", ptr: new(*big.Int), value: big.NewInt(1)},
 	{input: "89FFFFFFFFFFFFFFFFFF", ptr: new(*big.Int), value: veryBigInt},
-	{input: "820001", ptr: new(big.Int), error: "rlp: canon int error appends zero's for *big.Int"},
+	{input: "820001", ptr: new(big.Int), error: "rlp: non-canonical integer (leading zero bytes) for *big.Int"},
 	{input: "10", ptr: new(big.Int), value: *big.NewInt(16)}, // non-pointer also works
 	{input: "C0", ptr: new(*big.Int), error: "rlp: expected input string or byte for *big.Int"},
 
@@ -366,6 +383,11 @@ var decodeTests = []decodeTest{
 		value: recstruct{1, &recstruct{2, &recstruct{3, nil}}},
 	},
 
+	{
+		input: "83222222",
+		ptr:   new(simplestruct),
+		error: "rlp: expected input list for rlp.simplestruct",
+	},
 	{
 		input: "C3010101",
 		ptr:   new(simplestruct),
@@ -378,7 +400,7 @@ var decodeTests = []decodeTest{
 	},
 
 	// pointers
-	{input: "00", ptr: new(*uint), value: uintp(0)},
+	{input: "00", ptr: new(*[]byte), value: &[]byte{0}},
 	{input: "80", ptr: new(*uint), value: (*uint)(nil)},
 	{input: "C0", ptr: new(*uint), value: (*uint)(nil)},
 	{input: "07", ptr: new(*uint), value: uintp(7)},
-- 
GitLab