Parcourir la source

Queues can shrink now, more tests, benchmarking in progress.

Peter H. Froehlich il y a 8 ans
Parent
commit
f407d31118
4 fichiers modifiés avec 204 ajouts et 87 suppressions
  1. 48 0
      BENCH.md
  2. 61 69
      README.md
  3. 39 18
      queue/queue.go
  4. 56 0
      queue/queue_test.go

+ 48 - 0
BENCH.md

@@ -0,0 +1,48 @@
+# Benchmarks and Test Cases
+
+It seems that Go's otherwise excellent
+[testing](https://golang.org/pkg/testing/)
+infrastructure is susceptible to certain "contamination effects"
+in which the presence (or absence) of *test cases* influences the
+performance measured by *benchmarks*.
+Check it out:
+
+```
+$ benchstat without.txt with.txt
+name               old time/op    new time/op    delta
+PushFrontQueue-2     85.5µs ± 1%    97.7µs ± 1%  +14.31%  (p=0.000 n=10+10)
+PushFrontList-2       159µs ± 0%     167µs ± 1%   +4.93%  (p=0.000 n=9+10)
+PushBackQueue-2      85.9µs ± 1%    98.3µs ± 1%  +14.38%  (p=0.000 n=10+10)
+PushBackList-2        159µs ± 0%     166µs ± 1%   +4.91%  (p=0.000 n=9+9)
+PushBackChannel-2     117µs ± 1%     146µs ± 2%  +25.01%  (p=0.000 n=10+10)
+RandomQueue-2         153µs ± 0%     174µs ± 3%  +13.69%  (p=0.000 n=9+9)
+RandomList-2          284µs ± 1%     297µs ± 1%   +4.68%  (p=0.000 n=10+8)
+
+name               old alloc/op   new alloc/op   delta
+PushFrontQueue-2     40.9kB ± 0%    40.9kB ± 0%     ~     (all equal)
+PushFrontList-2      57.4kB ± 0%    57.4kB ± 0%     ~     (all equal)
+PushBackQueue-2      40.9kB ± 0%    40.9kB ± 0%     ~     (all equal)
+PushBackList-2       57.4kB ± 0%    57.4kB ± 0%     ~     (all equal)
+PushBackChannel-2    24.7kB ± 0%    24.7kB ± 0%     ~     (all equal)
+RandomQueue-2        45.7kB ± 0%    45.7kB ± 0%     ~     (all equal)
+RandomList-2         90.8kB ± 0%    90.8kB ± 0%     ~     (all equal)
+
+name               old allocs/op  new allocs/op  delta
+PushFrontQueue-2      1.03k ± 0%     1.03k ± 0%     ~     (all equal)
+PushFrontList-2       2.05k ± 0%     2.05k ± 0%     ~     (all equal)
+PushBackQueue-2       1.03k ± 0%     1.03k ± 0%     ~     (all equal)
+PushBackList-2        2.05k ± 0%     2.05k ± 0%     ~     (all equal)
+PushBackChannel-2     1.03k ± 0%     1.03k ± 0%     ~     (all equal)
+RandomQueue-2         1.63k ± 0%     1.63k ± 0%     ~     (all equal)
+RandomList-2          3.24k ± 0%     3.24k ± 0%     ~     (all equal)
+```
+
+The *only* difference between `old time` and `new time` is that the
+latter benchmarks had two additional *test cases* in `queue_test.go`.
+The presence of those test cases makes *all* benchmarks appear worse
+for some reason.
+If I comment them out and run the *exact* *same* benchmarks, everything
+is "back to normal" for some reason.
+For now I am reporting the worse results in the official `README` just
+to be conservative, but I believe something needs to be fixed on the Go
+side of things.

+ 61 - 69
README.md

@@ -4,18 +4,18 @@
 [![Go Report Card](https://goreportcard.com/badge/github.com/phf/go-queue)](https://goreportcard.com/report/github.com/phf/go-queue)
 
 A double-ended queue (aka "deque") built on top of a slice.
-All operations except pushes are constant-time; pushes are
-*amortized* constant-time.
+All operations are (amortized) constant time.
 Benchmarks compare favorably to
 [container/list](https://golang.org/pkg/container/list/) as
 well as to Go's channels.
 
-I tried to stick to the conventions
-[container/list](https://golang.org/pkg/container/list/) seems to
-follow even though I disagree with them (see
+I tried to stick to the conventions established by
+[container/list](https://golang.org/pkg/container/list/)
+even though I disagree with them (see
 [`RANT.md`](https://github.com/phf/go-queue/blob/master/RANT.md)
 for details).
-In other words, it's ready for the standard library (hah!).
+In other words, this data structure is ready for the standard
+library (hah!).
 
 ## Background
 
@@ -39,86 +39,78 @@ insertion.
 
 ## Performance
 
-The benchmarks are not very sophisticated but we seem to be *almost*
-twice as fast as [container/list](https://golang.org/pkg/container/list/)
-([speedup](https://en.wikipedia.org/wiki/Speedup) of 1.85-1.93).
-We're also a bit faster than Go's channels (speedup of 1.38).
-Here are the numbers from my (ancient) home machine:
+**Please read
+[`BENCH.md`](https://github.com/phf/go-queue/blob/master/BENCH.md)
+for some perspective.
+The numbers below are most likely "contaminated" in a way that makes
+our queues appear *worse* than they are.**
+
+Here are the numbers for my (ancient) home machine:
 
 ```
-$ go test -bench . -benchmem
-BenchmarkPushFrontQueue-2    	   20000	     85886 ns/op	   40944 B/op	    1035 allocs/op
-BenchmarkPushFrontList-2     	   10000	    158998 ns/op	   57392 B/op	    2049 allocs/op
-BenchmarkPushBackQueue-2     	   20000	     85189 ns/op	   40944 B/op	    1035 allocs/op
-BenchmarkPushBackList-2      	   10000	    160718 ns/op	   57392 B/op	    2049 allocs/op
-BenchmarkPushBackChannel-2   	   10000	    117610 ns/op	   24672 B/op	    1026 allocs/op
-BenchmarkRandomQueue-2       	   10000	    144867 ns/op	   45720 B/op	    1632 allocs/op
-BenchmarkRandomList-2        	    5000	    278965 ns/op	   90824 B/op	    3243 allocs/op
-PASS
-ok  	github.com/phf/go-queue/queue	12.472s
+$ go test -bench=. -benchmem -count=10 >bench.txt
+$ benchstat bench.txt
+name               time/op
+PushFrontQueue-2   97.7µs ± 1%
+PushFrontList-2     163µs ± 1%
+PushBackQueue-2    98.0µs ± 1%
+PushBackList-2      165µs ± 3%
+PushBackChannel-2   145µs ± 1%
+RandomQueue-2       172µs ± 1%
+RandomList-2        292µs ± 1%
+GrowShrinkQueue-2   121µs ± 1%
+GrowShrinkList-2    174µs ± 1%
+
+name               alloc/op
+PushFrontQueue-2   40.9kB ± 0%
+PushFrontList-2    57.4kB ± 0%
+PushBackQueue-2    40.9kB ± 0%
+PushBackList-2     57.4kB ± 0%
+PushBackChannel-2  24.7kB ± 0%
+RandomQueue-2      45.7kB ± 0%
+RandomList-2       90.8kB ± 0%
+GrowShrinkQueue-2  57.2kB ± 0%
+GrowShrinkList-2   57.4kB ± 0%
+
+name               allocs/op
+PushFrontQueue-2    1.03k ± 0%
+PushFrontList-2     2.05k ± 0%
+PushBackQueue-2     1.03k ± 0%
+PushBackList-2      2.05k ± 0%
+PushBackChannel-2   1.03k ± 0%
+RandomQueue-2       1.63k ± 0%
+RandomList-2        3.24k ± 0%
+GrowShrinkQueue-2   1.04k ± 0%
+GrowShrinkList-2    2.05k ± 0%
 $ go version
 go version go1.7.5 linux/amd64
-$ uname -p
-AMD Athlon(tm) 64 X2 Dual Core Processor 6000+
-$ date
-Sat Apr 22 11:26:40 EDT 2017
+$ cat /proc/cpuinfo | grep "model name" | uniq
+model name	: AMD Athlon(tm) 64 X2 Dual Core Processor 6000+
 ```
 
-(Note that the number of allocations seems off: since we grow by doubling
-we should only allocate memory O(log n) times.)
+That's a [speedup](https://en.wikipedia.org/wiki/Speedup) of
+1.45-1.70
+over [container/list](https://golang.org/pkg/container/list/) and a speedup of
+1.48
+over Go's channels.
+We also consistently allocate less memory in fewer allocations than
+[container/list](https://golang.org/pkg/container/list/).
+(Note that the number of allocations seems off: since we grow by *doubling*
+we should only allocate memory *O(log n)* times.)
+
 The same benchmarks on my (slightly more recent) laptop:
 
 ```
-$ go test -bench=. -benchmem
-PASS
-BenchmarkPushFrontQueue-4 	   10000	    107377 ns/op	   40944 B/op	    1035 allocs/op
-BenchmarkPushFrontList-4  	   10000	    205141 ns/op	   57392 B/op	    2049 allocs/op
-BenchmarkPushBackQueue-4  	   10000	    107339 ns/op	   40944 B/op	    1035 allocs/op
-BenchmarkPushBackList-4   	   10000	    204100 ns/op	   57392 B/op	    2049 allocs/op
-BenchmarkPushBackChannel-4	   10000	    174319 ns/op	   24672 B/op	    1026 allocs/op
-BenchmarkRandomQueue-4    	   10000	    190498 ns/op	   45720 B/op	    1632 allocs/op
-BenchmarkRandomList-4     	    5000	    364802 ns/op	   90825 B/op	    3243 allocs/op
-ok  	github.com/phf/go-queue/queue	11.881s
-$ go version
-go version go1.6.2 linux/amd64
-$ cat /proc/cpuinfo | grep "model name" | uniq
-model name	: AMD A10-4600M APU with Radeon(tm) HD Graphics
-$ date
-Fri Apr 28 17:20:57 EDT 2017
+TODO
 ```
 
-So that's a [speedup](https://en.wikipedia.org/wiki/Speedup) of 1.90 over
-[container/list](https://golang.org/pkg/container/list/) and of 1.62 over
-Go's channels.
 The same benchmarks on an old
 [Raspberry Pi Model B Rev 1](https://en.wikipedia.org/wiki/Raspberry_Pi):
 
 ```
-$ go test -bench . -benchmem
-PASS
-BenchmarkPushFrontQueue     2000            788316 ns/op           16469 B/op         12 allocs/op
-BenchmarkPushFrontList      1000           2629835 ns/op           33904 B/op       1028 allocs/op
-BenchmarkPushBackQueue      2000            776663 ns/op           16469 B/op         12 allocs/op
-BenchmarkPushBackList       1000           2817162 ns/op           33877 B/op       1028 allocs/op
-BenchmarkPushBackChannel    2000           1229474 ns/op            8454 B/op          1 allocs/op
-BenchmarkRandomQueue        2000           1325947 ns/op           16469 B/op         12 allocs/op
-BenchmarkRandomList          500           4929491 ns/op           53437 B/op       1627 allocs/op
-ok      github.com/phf/go-queue/queue   17.798s
-$ go version
-go version go1.3.3 linux/arm
-$ cat /proc/cpuinfo | grep "model name"
-model name      : ARMv6-compatible processor rev 7 (v6l)
-$ date
-Sat Apr 22 18:04:16 UTC 2017
+TODO
 ```
 
-So that's a [speedup](https://en.wikipedia.org/wiki/Speedup) of
-**3.34**-**3.72** over
-[container/list](https://golang.org/pkg/container/list/) and of 1.58 over
-Go's channels.
-(Also the number of allocations seems to be correct here, maybe the memory
-allocator in older versions of Go was more sane if less performant?)
-
 ### Go's channels as queues
 
 Go's channels *used* to beat our queue implementation by about 22%

+ 39 - 18
queue/queue.go

@@ -2,13 +2,10 @@
 // Use of this source code is governed by a BSD-style license
 // that can be found in the LICENSE file.
 
-// Package queue implements a double-ended queue abstraction on
-// top of a slice/array. All operations are constant time except
-// for PushFront and PushBack which are amortized constant time.
-//
-// We are almost twice as fast as container/list at the price of
-// potentially wasting some memory because we grow by doubling.
-// We are also faster than Go's channels by a smaller margin.
+// Package queue implements a double-ended queue (aka "deque") on top
+// of a slice. All operations are (amortized) constant time.
+// Benchmarks compare favorably to container/list as well as to Go's
+// channels.
 package queue
 
 import (
@@ -52,8 +49,7 @@ func (q *Queue) Init() *Queue {
 // I am mostly doing this because container/list does the same thing.
 // Personally I think it's a little wasteful because every single
 // PushFront/PushBack is going to pay the overhead of calling this.
-// But that's the price for making zero values useful immediately,
-// something Go folks apparently like a lot.
+// But that's the price for making zero values useful immediately.
 func (q *Queue) lazyInit() {
 	if q.rep == nil {
 		q.Init()
@@ -75,13 +71,18 @@ func (q *Queue) full() bool {
 	return q.length == len(q.rep)
 }
 
-// grow doubles the size of queue q's underlying slice/array.
+// sparse returns true if the queue q has excess capacity.
+func (q *Queue) sparse() bool {
+	return 1 < q.length && q.length < len(q.rep)/4
+}
+
+// grow doubles the size of queue q's underlying slice.
 func (q *Queue) grow() {
-	bigger := make([]interface{}, q.length*2)
+	bigger := make([]interface{}, len(q.rep)*2)
 	// Kudos to Rodrigo Moraes, see https://gist.github.com/moraes/2141121
 	// Kudos to Dariusz Górecki, see https://github.com/eapache/queue/commit/334cc1b02398be651373851653017e6cbf588f9e
 	n := copy(bigger, q.rep[q.front:])
-	copy(bigger[n:], q.rep[:q.front])
+	copy(bigger[n:], q.rep[:q.back])
 	// The above replaced the "obvious" for loop and is a bit tricky.
 	// First note that q.front == q.back if we're full; if that wasn't
 	// true, things would be more complicated. Second recall that for
@@ -93,13 +94,34 @@ func (q *Queue) grow() {
 	q.back = q.length
 }
 
-// lazyGrow grows the underlying slice/array if necessary.
+// lazyGrow grows the underlying slice if necessary.
 func (q *Queue) lazyGrow() {
 	if q.full() {
 		q.grow()
 	}
 }
 
+// shrink halves the size of queue q's underlying slice.
+func (q *Queue) shrink() {
+	smaller := make([]interface{}, len(q.rep)/2)
+	if q.front < q.back {
+		copy(smaller, q.rep[q.front:q.back])
+	} else {
+		n := copy(smaller, q.rep[q.front:])
+		copy(smaller[n:], q.rep[:q.back])
+	}
+	q.rep = smaller
+	q.front = 0
+	q.back = q.length
+}
+
+// lazyShrink shrinks the underlying slice if advisable.
+func (q *Queue) lazyShrink() {
+	if q.sparse() {
+		q.shrink()
+	}
+}
+
 // String returns a string representation of queue q formatted
 // from front to back.
 func (q *Queue) String() string {
@@ -161,18 +183,16 @@ func (q *Queue) PushBack(v interface{}) {
 	q.length++
 }
 
-// Both PopFront and PopBack set the newly free slot to nil
-// in an attempt to be nice to the garbage collector.
-
 // PopFront removes and returns the first element of queue q or nil.
 func (q *Queue) PopFront() interface{} {
 	if q.empty() {
 		return nil
 	}
 	v := q.rep[q.front]
-	q.rep[q.front] = nil
+	q.rep[q.front] = nil // be nice to GC
 	q.front = q.inc(q.front)
 	q.length--
+	q.lazyShrink()
 	return v
 }
 
@@ -183,7 +203,8 @@ func (q *Queue) PopBack() interface{} {
 	}
 	q.back = q.dec(q.back)
 	v := q.rep[q.back]
-	q.rep[q.back] = nil
+	q.rep[q.back] = nil // be nice to GC
 	q.length--
+	q.lazyShrink()
 	return v
 }

+ 56 - 0
queue/queue_test.go

@@ -113,6 +113,35 @@ func TestZeroValue(t *testing.T) {
 	}
 }
 
+func TestGrowShrink1(t *testing.T) {
+	var q Queue
+	for i := 0; i < size; i++ {
+		q.PushBack(i)
+		ensureLength(t, &q, i+1)
+	}
+	for i := 0; q.Len() > 0; i++ {
+		x := q.PopFront().(int)
+		if x != i {
+			t.Errorf("q.PopFront() = %d, want %d", x, i)
+		}
+		ensureLength(t, &q, size-i-1)
+	}
+}
+func TestGrowShrink2(t *testing.T) {
+	var q Queue
+	for i := 0; i < size; i++ {
+		q.PushFront(i)
+		ensureLength(t, &q, i+1)
+	}
+	for i := 0; q.Len() > 0; i++ {
+		x := q.PopBack().(int)
+		if x != i {
+			t.Errorf("q.PopBack() = %d, want %d", x, i)
+		}
+		ensureLength(t, &q, size-i-1)
+	}
+}
+
 const size = 1024
 
 func BenchmarkPushFrontQueue(b *testing.B) {
@@ -171,6 +200,7 @@ func makeRands() {
 }
 func BenchmarkRandomQueue(b *testing.B) {
 	makeRands()
+	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		var q Queue
 		for n := 0; n < size; n++ {
@@ -191,6 +221,7 @@ func BenchmarkRandomQueue(b *testing.B) {
 }
 func BenchmarkRandomList(b *testing.B) {
 	makeRands()
+	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		var q list.List
 		for n := 0; n < size; n++ {
@@ -213,3 +244,28 @@ func BenchmarkRandomList(b *testing.B) {
 		}
 	}
 }
+
+func BenchmarkGrowShrinkQueue(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var q Queue
+		for n := 0; n < size; n++ {
+			q.PushBack(i)
+		}
+		for n := 0; n < size; n++ {
+			q.PopFront()
+		}
+	}
+}
+func BenchmarkGrowShrinkList(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var q list.List
+		for n := 0; n < size; n++ {
+			q.PushBack(i)
+		}
+		for n := 0; n < size; n++ {
+			if e := q.Front(); e != nil {
+				q.Remove(e)
+			}
+		}
+	}
+}