diff --git a/websocket_test.go b/websocket_test.go index f4073bce64d7fbfba80c06b409720b13c45df76c..8d18c738bb73c1ca7d940aed309b50ba19269de9 100644 --- a/websocket_test.go +++ b/websocket_test.go @@ -776,6 +776,7 @@ func benchConn(b *testing.B, echo, stream bool, size int) { func BenchmarkConn(b *testing.B) { sizes := []int{ 2, + 16, 32, 512, 4096, diff --git a/xor.go b/xor.go index 5a68e81d990b2782cf886658a93006a5e8087df6..a58a72f473199a023b8322fbecf8890fcf482176 100644 --- a/xor.go +++ b/xor.go @@ -13,10 +13,10 @@ import ( // to be used for masking in the key. This is so that // unmasking can be performed without the entire frame. func fastXOR(key [4]byte, keyPos int, b []byte) int { - // If the payload is greater than 16 bytes, then it's worth + // If the payload is greater than or equal to 16 bytes, then it's worth // masking 8 bytes at a time. // Optimization from https://github.com/golang/go/issues/31586#issuecomment-485530859 - if len(b) > 16 { + if len(b) >= 16 { // We first create a key that is 8 bytes long // and is aligned on the position correctly. var alignedKey [8]byte @@ -25,6 +25,86 @@ func fastXOR(key [4]byte, keyPos int, b []byte) int { } k := binary.LittleEndian.Uint64(alignedKey[:]) + // Then we xor until b is less than 128 bytes. + for len(b) >= 128 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^k) + v = binary.LittleEndian.Uint64(b[8:]) + binary.LittleEndian.PutUint64(b[8:], v^k) + v = binary.LittleEndian.Uint64(b[16:]) + binary.LittleEndian.PutUint64(b[16:], v^k) + v = binary.LittleEndian.Uint64(b[24:]) + binary.LittleEndian.PutUint64(b[24:], v^k) + v = binary.LittleEndian.Uint64(b[32:]) + binary.LittleEndian.PutUint64(b[32:], v^k) + v = binary.LittleEndian.Uint64(b[40:]) + binary.LittleEndian.PutUint64(b[40:], v^k) + v = binary.LittleEndian.Uint64(b[48:]) + binary.LittleEndian.PutUint64(b[48:], v^k) + v = binary.LittleEndian.Uint64(b[56:]) + binary.LittleEndian.PutUint64(b[56:], v^k) + v = binary.LittleEndian.Uint64(b[64:]) + binary.LittleEndian.PutUint64(b[64:], v^k) + v = binary.LittleEndian.Uint64(b[72:]) + binary.LittleEndian.PutUint64(b[72:], v^k) + v = binary.LittleEndian.Uint64(b[80:]) + binary.LittleEndian.PutUint64(b[80:], v^k) + v = binary.LittleEndian.Uint64(b[88:]) + binary.LittleEndian.PutUint64(b[88:], v^k) + v = binary.LittleEndian.Uint64(b[96:]) + binary.LittleEndian.PutUint64(b[96:], v^k) + v = binary.LittleEndian.Uint64(b[104:]) + binary.LittleEndian.PutUint64(b[104:], v^k) + v = binary.LittleEndian.Uint64(b[112:]) + binary.LittleEndian.PutUint64(b[112:], v^k) + v = binary.LittleEndian.Uint64(b[120:]) + binary.LittleEndian.PutUint64(b[120:], v^k) + b = b[128:] + } + + // Then we xor until b is less than 64 bytes. + for len(b) >= 64 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^k) + v = binary.LittleEndian.Uint64(b[8:]) + binary.LittleEndian.PutUint64(b[8:], v^k) + v = binary.LittleEndian.Uint64(b[16:]) + binary.LittleEndian.PutUint64(b[16:], v^k) + v = binary.LittleEndian.Uint64(b[24:]) + binary.LittleEndian.PutUint64(b[24:], v^k) + v = binary.LittleEndian.Uint64(b[32:]) + binary.LittleEndian.PutUint64(b[32:], v^k) + v = binary.LittleEndian.Uint64(b[40:]) + binary.LittleEndian.PutUint64(b[40:], v^k) + v = binary.LittleEndian.Uint64(b[48:]) + binary.LittleEndian.PutUint64(b[48:], v^k) + v = binary.LittleEndian.Uint64(b[56:]) + binary.LittleEndian.PutUint64(b[56:], v^k) + b = b[64:] + } + + // Then we xor until b is less than 32 bytes. + for len(b) >= 32 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^k) + v = binary.LittleEndian.Uint64(b[8:]) + binary.LittleEndian.PutUint64(b[8:], v^k) + v = binary.LittleEndian.Uint64(b[16:]) + binary.LittleEndian.PutUint64(b[16:], v^k) + v = binary.LittleEndian.Uint64(b[24:]) + binary.LittleEndian.PutUint64(b[24:], v^k) + b = b[32:] + } + + // Then we xor until b is less than 16 bytes. + for len(b) >= 16 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^k) + v = binary.LittleEndian.Uint64(b[8:]) + binary.LittleEndian.PutUint64(b[8:], v^k) + b = b[16:] + } + // Then we xor until b is less than 8 bytes. for len(b) >= 8 { v := binary.LittleEndian.Uint64(b) diff --git a/xor_test.go b/xor_test.go index c3adaf580a499bb02891ac6da38143d19013ce3e..634af606ac0184ef6ba244a7fb168f2e7403d1ca 100644 --- a/xor_test.go +++ b/xor_test.go @@ -36,6 +36,7 @@ func basixXOR(maskKey [4]byte, pos int, b []byte) int { func BenchmarkXOR(b *testing.B) { sizes := []int{ 2, + 16, 32, 512, 4096,