diff options
| author | byte2016 <[email protected]> | 2018-06-12 19:55:38 +0800 |
|---|---|---|
| committer | byte2016 <[email protected]> | 2018-06-12 19:55:38 +0800 |
| commit | 76f2c13d7c27d7419af79ea0bdc7ab7717b6935b (patch) | |
| tree | aa2ca741501d40990b892d504a1cc3b7defe57aa /tests/unit/hyperloglog.tcl | |
Diffstat (limited to 'tests/unit/hyperloglog.tcl')
| -rw-r--r-- | tests/unit/hyperloglog.tcl | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl new file mode 100644 index 0000000..7d36b7a --- /dev/null +++ b/tests/unit/hyperloglog.tcl @@ -0,0 +1,185 @@ +start_server {tags {"hll"}} { + test {HyperLogLog self test passes} { + catch {r pfselftest} e + set e + } {OK} + + test {PFADD without arguments creates an HLL value} { + r pfadd hll + r exists hll + } {1} + + test {Approximated cardinality after creation is zero} { + r pfcount hll + } {0} + + test {PFADD returns 1 when at least 1 reg was modified} { + r pfadd hll a b c + } {1} + + test {PFADD returns 0 when no reg was modified} { + r pfadd hll a b c + } {0} + + test {PFADD works with empty string (regression)} { + r pfadd hll "" + } + + # Note that the self test stresses much better the + # cardinality estimation error. We are testing just the + # command implementation itself here. + test {PFCOUNT returns approximated cardinality of set} { + r del hll + set res {} + r pfadd hll 1 2 3 4 5 + lappend res [r pfcount hll] + # Call it again to test cached value invalidation. + r pfadd hll 6 7 8 8 9 10 + lappend res [r pfcount hll] + set res + } {5 10} + + test {HyperLogLogs are promote from sparse to dense} { + r del hll + r config set hll-sparse-max-bytes 3000 + set n 0 + while {$n < 100000} { + set elements {} + for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]} + incr n 100 + r pfadd hll {*}$elements + set card [r pfcount hll] + set err [expr {abs($card-$n)}] + assert {$err < (double($card)/100)*5} + if {$n < 1000} { + assert {[r pfdebug encoding hll] eq {sparse}} + } elseif {$n > 10000} { + assert {[r pfdebug encoding hll] eq {dense}} + } + } + } + + test {HyperLogLog sparse encoding stress test} { + for {set x 0} {$x < 1000} {incr x} { + r del hll1 hll2 + set numele [randomInt 100] + set elements {} + for {set j 0} {$j < $numele} {incr j} { + lappend elements [expr rand()] + } + # Force dense representation of hll2 + r pfadd hll2 + r pfdebug todense hll2 + r pfadd hll1 {*}$elements + r pfadd hll2 {*}$elements + assert {[r pfdebug encoding hll1] eq {sparse}} + assert {[r pfdebug encoding hll2] eq {dense}} + # Cardinality estimated should match exactly. + assert {[r pfcount hll1] eq [r pfcount hll2]} + } + } + + test {Corrupted sparse HyperLogLogs are detected: Additionl at tail} { + r del hll + r pfadd hll a b c + r append hll "hello" + set e {} + catch {r pfcount hll} e + set e + } {*INVALIDOBJ*} + + test {Corrupted sparse HyperLogLogs are detected: Broken magic} { + r del hll + r pfadd hll a b c + r setrange hll 0 "0123" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + + test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} { + r del hll + r pfadd hll a b c + r setrange hll 4 "x" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + + test {Corrupted dense HyperLogLogs are detected: Wrong length} { + r del hll + r pfadd hll a b c + r setrange hll 4 "\x00" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + + test {PFADD, PFCOUNT, PFMERGE type checking works} { + r set foo bar + catch {r pfadd foo 1} e + assert_match {*WRONGTYPE*} $e + catch {r pfcount foo} e + assert_match {*WRONGTYPE*} $e + catch {r pfmerge bar foo} e + assert_match {*WRONGTYPE*} $e + catch {r pfmerge foo bar} e + assert_match {*WRONGTYPE*} $e + } + + test {PFMERGE results on the cardinality of union of sets} { + r del hll hll1 hll2 hll3 + r pfadd hll1 a b c + r pfadd hll2 b c d + r pfadd hll3 c d e + r pfmerge hll hll1 hll2 hll3 + r pfcount hll + } {5} + + test {PFCOUNT multiple-keys merge returns cardinality of union #1} { + r del hll1 hll2 hll3 + for {set x 1} {$x < 10000} {incr x} { + r pfadd hll1 "foo-$x" + r pfadd hll2 "bar-$x" + r pfadd hll3 "zap-$x" + + set card [r pfcount hll1 hll2 hll3] + set realcard [expr {$x*3}] + set err [expr {abs($card-$realcard)}] + assert {$err < (double($card)/100)*5} + } + } + + test {PFCOUNT multiple-keys merge returns cardinality of union #2} { + r del hll1 hll2 hll3 + set elements {} + for {set x 1} {$x < 10000} {incr x} { + for {set j 1} {$j <= 3} {incr j} { + set rint [randomInt 20000] + r pfadd hll$j $rint + lappend elements $rint + } + } + set realcard [llength [lsort -unique $elements]] + set card [r pfcount hll1 hll2 hll3] + set err [expr {abs($card-$realcard)}] + assert {$err < (double($card)/100)*5} + } + + test {PFDEBUG GETREG returns the HyperLogLog raw registers} { + r del hll + r pfadd hll 1 2 3 + llength [r pfdebug getreg hll] + } {16384} + + test {PFADD / PFCOUNT cache invalidation works} { + r del hll + r pfadd hll a b c + r pfcount hll + assert {[r getrange hll 15 15] eq "\x00"} + r pfadd hll a b c + assert {[r getrange hll 15 15] eq "\x00"} + r pfadd hll 1 2 3 + assert {[r getrange hll 15 15] eq "\x80"} + } +} |
