diff --git a/README.md b/README.md
index df20c581a5c868cdeda72acacff26f903669ad1c..8420bdbd4232bb7fb8f20144d5bbc149332cc12e 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ go get nhooyr.io/websocket
 - Minimal and idiomatic API
 - First class [context.Context](https://blog.golang.org/context) support
 - Fully passes the WebSocket [autobahn-testsuite](https://github.com/crossbario/autobahn-testsuite)
-- [Single dependency](https://pkg.go.dev/nhooyr.io/websocket?tab=imports)
+- [Zero dependencies](https://pkg.go.dev/nhooyr.io/websocket?tab=imports)
 - JSON and protobuf helpers in the [wsjson](https://pkg.go.dev/nhooyr.io/websocket/wsjson) and [wspb](https://pkg.go.dev/nhooyr.io/websocket/wspb) subpackages
 - Zero alloc reads and writes
 - Concurrent writes
@@ -112,7 +112,6 @@ Advantages of nhooyr.io/websocket:
   - Gorilla's implementation is slower and uses [unsafe](https://golang.org/pkg/unsafe/).
 - Full [permessage-deflate](https://tools.ietf.org/html/rfc7692) compression extension support
   - Gorilla only supports no context takeover mode
-  - We use [klauspost/compress](https://github.com/klauspost/compress) for much lower memory usage ([gorilla/websocket#203](https://github.com/gorilla/websocket/issues/203))
 - [CloseRead](https://pkg.go.dev/nhooyr.io/websocket#Conn.CloseRead) helper ([gorilla/websocket#492](https://github.com/gorilla/websocket/issues/492))
 - Actively maintained ([gorilla/websocket#370](https://github.com/gorilla/websocket/issues/370))
 
diff --git a/accept.go b/accept.go
index 66379b5d979d9b7e9b06b7569b63d9c089bfe1af..f038dec9c8941bc9ee1f201baf1807aa3059c8a7 100644
--- a/accept.go
+++ b/accept.go
@@ -51,7 +51,7 @@ type AcceptOptions struct {
 	OriginPatterns []string
 
 	// CompressionMode controls the compression mode.
-	// Defaults to CompressionNoContextTakeover.
+	// Defaults to CompressionDisabled.
 	//
 	// See docs on CompressionMode for details.
 	CompressionMode CompressionMode
diff --git a/accept_test.go b/accept_test.go
index 9b18d8e11e3d57beaebd09a20add28c6be5a5d2b..f7bc669356a7f301974d896be5c64959ab51ba26 100644
--- a/accept_test.go
+++ b/accept_test.go
@@ -55,7 +55,9 @@ func TestAccept(t *testing.T) {
 		r.Header.Set("Sec-WebSocket-Key", "meow123")
 		r.Header.Set("Sec-WebSocket-Extensions", "permessage-deflate; harharhar")
 
-		_, err := Accept(w, r, nil)
+		_, err := Accept(w, r, &AcceptOptions{
+			CompressionMode: CompressionContextTakeover,
+		})
 		assert.Contains(t, err, `unsupported permessage-deflate parameter`)
 	})
 
diff --git a/autobahn_test.go b/autobahn_test.go
index e56a4912db8fa0d4a77930bf371b24640720d727..d53159a048db6471f2ab98c621dd625f2e0ed738 100644
--- a/autobahn_test.go
+++ b/autobahn_test.go
@@ -61,7 +61,9 @@ func TestAutobahn(t *testing.T) {
 				ctx, cancel := context.WithTimeout(context.Background(), time.Minute*5)
 				defer cancel()
 
-				c, _, err := websocket.Dial(ctx, fmt.Sprintf(wstestURL+"/runCase?case=%v&agent=main", i), nil)
+				c, _, err := websocket.Dial(ctx, fmt.Sprintf(wstestURL+"/runCase?case=%v&agent=main", i), &websocket.DialOptions{
+					CompressionMode: websocket.CompressionContextTakeover,
+				})
 				assert.Success(t, err)
 				err = wstest.EchoLoop(ctx, c)
 				t.Logf("echoLoop: %v", err)
diff --git a/compress.go b/compress.go
index 63d961b4d50fd8dfdcc7ab4003de793f42b53e8b..f49d9e5d34b534fc4d7cb8628f0d3d275455bfa6 100644
--- a/compress.go
+++ b/compress.go
@@ -3,49 +3,47 @@
 package websocket
 
 import (
+	"compress/flate"
 	"io"
 	"net/http"
 	"sync"
-
-	"github.com/klauspost/compress/flate"
 )
 
 // CompressionMode represents the modes available to the deflate extension.
 // See https://tools.ietf.org/html/rfc7692
-//
-// A compatibility layer is implemented for the older deflate-frame extension used
-// by safari. See https://tools.ietf.org/html/draft-tyoshino-hybi-websocket-perframe-deflate-06
-// It will work the same in every way except that we cannot signal to the peer we
-// want to use no context takeover on our side, we can only signal that they should.
-// It is however currently disabled due to Safari bugs. See https://github.com/nhooyr/websocket/issues/218
 type CompressionMode int
 
 const (
-	// CompressionNoContextTakeover grabs a new flate.Reader and flate.Writer as needed
-	// for every message. This applies to both server and client side.
+	// CompressionDisabled disables the deflate extension.
 	//
-	// This means less efficient compression as the sliding window from previous messages
-	// will not be used but the memory overhead will be lower if the connections
-	// are long lived and seldom used.
+	// Use this if you are using a predominantly binary protocol with very
+	// little duplication in between messages or CPU and memory are more
+	// important than bandwidth.
 	//
-	// The message will only be compressed if greater than 512 bytes.
-	CompressionNoContextTakeover CompressionMode = iota
+	// This is the default.
+	CompressionDisabled CompressionMode = iota
 
-	// CompressionContextTakeover uses a flate.Reader and flate.Writer per connection.
-	// This enables reusing the sliding window from previous messages.
+	// CompressionContextTakeover uses a 32 kB sliding window and flate.Writer per connection.
+	// It reusing the sliding window from previous messages.
 	// As most WebSocket protocols are repetitive, this can be very efficient.
-	// It carries an overhead of 8 kB for every connection compared to CompressionNoContextTakeover.
+	// It carries an overhead of 32 kB + 1.2 MB for every connection compared to CompressionNoContextTakeover.
+	//
+	// Sometime in the future it will carry 65 kB overhead instead once https://github.com/golang/go/issues/36919
+	// is fixed.
 	//
 	// If the peer negotiates NoContextTakeover on the client or server side, it will be
 	// used instead as this is required by the RFC.
 	CompressionContextTakeover
 
-	// CompressionDisabled disables the deflate extension.
+	// CompressionNoContextTakeover grabs a new flate.Reader and flate.Writer as needed
+	// for every message. This applies to both server and client side.
 	//
-	// Use this if you are using a predominantly binary protocol with very
-	// little duplication in between messages or CPU and memory are more
-	// important than bandwidth.
-	CompressionDisabled
+	// This means less efficient compression as the sliding window from previous messages
+	// will not be used but the memory overhead will be lower if the connections
+	// are long lived and seldom used.
+	//
+	// The message will only be compressed if greater than 512 bytes.
+	CompressionNoContextTakeover
 )
 
 func (m CompressionMode) opts() *compressionOptions {
@@ -146,6 +144,22 @@ func putFlateReader(fr io.Reader) {
 	flateReaderPool.Put(fr)
 }
 
+var flateWriterPool sync.Pool
+
+func getFlateWriter(w io.Writer) *flate.Writer {
+	fw, ok := flateWriterPool.Get().(*flate.Writer)
+	if !ok {
+		fw, _ = flate.NewWriter(w, flate.BestSpeed)
+		return fw
+	}
+	fw.Reset(w)
+	return fw
+}
+
+func putFlateWriter(w *flate.Writer) {
+	flateWriterPool.Put(w)
+}
+
 type slidingWindow struct {
 	buf []byte
 }
diff --git a/conn_test.go b/conn_test.go
index c2c4129236d362bbc0bdda55091508ccaf781b11..4bab5adf4eaee9f30e6056619faa427421d4faa2 100644
--- a/conn_test.go
+++ b/conn_test.go
@@ -37,7 +37,7 @@ func TestConn(t *testing.T) {
 		t.Parallel()
 
 		compressionMode := func() websocket.CompressionMode {
-			return websocket.CompressionMode(xrand.Int(int(websocket.CompressionDisabled) + 1))
+			return websocket.CompressionMode(xrand.Int(int(websocket.CompressionContextTakeover) + 1))
 		}
 
 		for i := 0; i < 5; i++ {
@@ -389,7 +389,7 @@ func BenchmarkConn(b *testing.B) {
 			mode: websocket.CompressionDisabled,
 		},
 		{
-			name: "compress",
+			name: "compressContextTakeover",
 			mode: websocket.CompressionContextTakeover,
 		},
 		{
diff --git a/dial.go b/dial.go
index 2b25e3517d666f5740c4905c9790c8897dd06eb7..9ec9044422807f79b14281a5ce8e6dc4a6e89b55 100644
--- a/dial.go
+++ b/dial.go
@@ -35,7 +35,7 @@ type DialOptions struct {
 	Subprotocols []string
 
 	// CompressionMode controls the compression mode.
-	// Defaults to CompressionNoContextTakeover.
+	// Defaults to CompressionDisabled.
 	//
 	// See docs on CompressionMode for details.
 	CompressionMode CompressionMode
diff --git a/go.mod b/go.mod
index c5f1a20f59c2a962838c451cc62e242187e8ce27..d4bca92321d7bc80008004d96d75890fd32e20d3 100644
--- a/go.mod
+++ b/go.mod
@@ -10,6 +10,5 @@ require (
 	github.com/golang/protobuf v1.3.5
 	github.com/google/go-cmp v0.4.0
 	github.com/gorilla/websocket v1.4.1
-	github.com/klauspost/compress v1.10.3
 	golang.org/x/time v0.0.0-20191024005414-555d28b269f0
 )
diff --git a/go.sum b/go.sum
index 155c3013266a0905123fb82524f0cf5b1d169854..1344e958e465b2b93b2dee49f03aca26ccf0dcae 100644
--- a/go.sum
+++ b/go.sum
@@ -29,8 +29,6 @@ github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvK
 github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGns=
 github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
-github.com/klauspost/compress v1.10.3 h1:OP96hzwJVBIHYU52pVTI6CczrxPvrGfgqF9N5eTO0Q8=
-github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y=
 github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
 github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
diff --git a/write.go b/write.go
index 2210cf817a35e587e53235eb1204bdf0ffc323f4..b1c57c1b9c35ffb04665b2146167403e74e0fa93 100644
--- a/write.go
+++ b/write.go
@@ -12,7 +12,7 @@ import (
 	"io"
 	"time"
 
-	"github.com/klauspost/compress/flate"
+	"compress/flate"
 
 	"nhooyr.io/websocket/internal/errd"
 )
@@ -76,8 +76,8 @@ type msgWriterState struct {
 	opcode opcode
 	flate  bool
 
-	trimWriter *trimLastFourBytesWriter
-	dict       slidingWindow
+	trimWriter  *trimLastFourBytesWriter
+	flateWriter *flate.Writer
 }
 
 func newMsgWriterState(c *Conn) *msgWriterState {
@@ -96,7 +96,9 @@ func (mw *msgWriterState) ensureFlate() {
 		}
 	}
 
-	mw.dict.init(8192)
+	if mw.flateWriter == nil {
+		mw.flateWriter = getFlateWriter(mw.trimWriter)
+	}
 	mw.flate = true
 }
 
@@ -153,6 +155,13 @@ func (mw *msgWriterState) reset(ctx context.Context, typ MessageType) error {
 	return nil
 }
 
+func (mw *msgWriterState) putFlateWriter() {
+	if mw.flateWriter != nil {
+		putFlateWriter(mw.flateWriter)
+		mw.flateWriter = nil
+	}
+}
+
 // Write writes the given bytes to the WebSocket connection.
 func (mw *msgWriterState) Write(p []byte) (_ int, err error) {
 	err = mw.writeMu.lock(mw.ctx)
@@ -177,12 +186,7 @@ func (mw *msgWriterState) Write(p []byte) (_ int, err error) {
 	}
 
 	if mw.flate {
-		err = flate.StatelessDeflate(mw.trimWriter, p, false, mw.dict.buf)
-		if err != nil {
-			return 0, err
-		}
-		mw.dict.write(p)
-		return len(p), nil
+		return mw.flateWriter.Write(p)
 	}
 
 	return mw.write(p)
@@ -207,13 +211,20 @@ func (mw *msgWriterState) Close() (err error) {
 	}
 	defer mw.writeMu.unlock()
 
+	if mw.flate {
+		err = mw.flateWriter.Flush()
+		if err != nil {
+			return fmt.Errorf("failed to flush flate: %w", err)
+		}
+	}
+
 	_, err = mw.c.writeFrame(mw.ctx, true, mw.flate, mw.opcode, nil)
 	if err != nil {
 		return fmt.Errorf("failed to write fin frame: %w", err)
 	}
 
 	if mw.flate && !mw.flateContextTakeover() {
-		mw.dict.close()
+		mw.putFlateWriter()
 	}
 	mw.mu.unlock()
 	return nil
@@ -226,7 +237,7 @@ func (mw *msgWriterState) close() {
 	}
 
 	mw.writeMu.forceLock()
-	mw.dict.close()
+	mw.putFlateWriter()
 }
 
 func (c *Conn) writeControl(ctx context.Context, opcode opcode, p []byte) error {