summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Ahern <[email protected]>2016-02-22 21:31:44 -0800
committerWilliam Ahern <[email protected]>2016-02-22 21:31:44 -0800
commit7ae6b2e6e4bf096dfc23736ae9587598ea5a8e23 (patch)
treea943ed987d446eb466bdce6510454f4242af7083
parent6f9379e9ffccb465d0e19db97011bf8568fd4999 (diff)
parentd9d3e5dd2fdd9738133eeeae43cb05c3c92f2355 (diff)
Merge branch 'portable_bitops' of git://github.com/nmathewson/timeout into nmathewson-portable_bitops
-rw-r--r--bitops.c249
-rw-r--r--timeout.c32
2 files changed, 269 insertions, 12 deletions
diff --git a/bitops.c b/bitops.c
new file mode 100644
index 0000000..d8325db
--- /dev/null
+++ b/bitops.c
@@ -0,0 +1,249 @@
+#include <stdint.h>
+#ifdef _MSC_VER
+#include <intrin.h> /* _BitScanForward, _BitScanReverse */
+#endif
+
+/* First define ctz and clz functions; these are compiler-dependent if
+ * you want them to be fast. */
+#if defined(__GNUC__) && !defined(TIMEOUT_DISABLE_GNUC_BITOPS)
+
+/* On GCC and clang and some others, we can use __builtin functions. They
+ * are not defined for n==0, but timeout.s never calls them with n==0. */
+
+#define ctz64(n) __builtin_ctzll(n)
+#define clz64(n) __builtin_clzll(n)
+#if LONG_BITS == 32
+#define ctz32(n) __builtin_ctzl(n)
+#define clz32(n) __builtin_clzl(n)
+#else
+#define ctz32(n) __builtin_ctz(n)
+#define clz32(n) __builtin_clz(n)
+#endif
+
+#elif defined(_MSC_VER) && !defined(TIMEOUT_DISABLE_MSVC_BITOPS)
+
+/* On MSVC, we have these handy functions. We can ignore their return
+ * values, since we will never supply val == 0. */
+
+static __inline int ctz32(unsigned long val)
+{
+ DWORD zeros = 0;
+ _BitScanForward(&zeros, val);
+ return zeros;
+}
+static __inline int clz32(unsigned long val)
+{
+ DWORD zeros = 0;
+ _BitScanReverse(&zeros, val);
+ return zeros;
+}
+#ifdef _WIN64
+/* According to the documentation, these only exist on Win64. */
+static __inline int ctz64(uint64_t val)
+{
+ DWORD zeros = 0;
+ _BitScanForward64(&zeros, val);
+ return zeros;
+}
+static __inline int clz64(uint64_t val)
+{
+ DWORD zeros = 0;
+ _BitScanReverse64(&zeros, val);
+ return zeros;
+}
+#else
+static __inline int ctz64(uint64_t val)
+{
+ uint32_t lo = (uint32_t) val;
+ uint32_t hi = (uint32_t) (val >> 32);
+ return lo ? ctz32(lo) : 32 + ctz32(hi);
+}
+static __inline int clz64(uint64_t val)
+{
+ uint32_t lo = (uint32_t) val;
+ uint32_t hi = (uint32_t) (val >> 32);
+ return hi ? clz32(hi) : 32 + clz32(lo);
+}
+#endif
+
+/* End of MSVC case. */
+
+#else
+
+/* TODO: There are more clever ways to do this in the generic case. */
+
+
+#define process_(one, cz_bits, bits) \
+ if (x < ( one << (cz_bits - bits))) { rv += bits; x <<= bits; }
+
+#define process64(bits) process_((UINT64_C(1)), 64, (bits))
+static inline int clz64(uint64_t x)
+{
+ int rv = 0;
+
+ process64(32);
+ process64(16);
+ process64(8);
+ process64(4);
+ process64(2);
+ process64(1);
+ return rv;
+}
+#define process32(bits) process_((UINT32_C(1)), 32, (bits))
+static inline int clz32(uint32_t x)
+{
+ int rv = 0;
+
+ process32(16);
+ process32(8);
+ process32(4);
+ process32(2);
+ process32(1);
+ return rv;
+}
+
+#undef process_
+#undef process32
+#undef process64
+#define process_(one, bits) \
+ if ((x & ((one << (bits))-1)) == 0) { rv += bits; x >>= bits; }
+
+#define process64(bits) process_((UINT64_C(1)), bits)
+static inline int ctz64(uint64_t x)
+{
+ int rv = 0;
+
+ process64(32);
+ process64(16);
+ process64(8);
+ process64(4);
+ process64(2);
+ process64(1);
+ return rv;
+}
+
+#define process32(bits) process_((UINT32_C(1)), bits)
+static inline int ctz32(uint32_t x)
+{
+ int rv = 0;
+
+ process32(16);
+ process32(8);
+ process32(4);
+ process32(2);
+ process32(1);
+ return rv;
+}
+
+#undef process32
+#undef process64
+#undef process_
+
+/* End of generic case */
+
+#endif /* End of defining ctz */
+
+#ifdef TEST_BITOPS
+#include <stdio.h>
+#include <stdlib.h>
+
+static uint64_t testcases[] = {
+ 13371337 * 10,
+ 100,
+ 385789752,
+ 82574,
+ (((uint64_t)1)<<63) + (((uint64_t)1)<<31) + 10101
+};
+
+static int
+naive_clz(int bits, uint64_t v)
+{
+ int r = 0;
+ uint64_t bit = ((uint64_t)1) << (bits-1);
+ while (bit && 0 == (v & bit)) {
+ r++;
+ bit >>= 1;
+ }
+ /* printf("clz(%d,%lx) -> %d\n", bits, v, r); */
+ return r;
+}
+
+static int
+naive_ctz(int bits, uint64_t v)
+{
+ int r = 0;
+ uint64_t bit = 1;
+ while (bit && 0 == (v & bit)) {
+ r++;
+ bit <<= 1;
+ if (r == bits)
+ break;
+ }
+ /* printf("ctz(%d,%lx) -> %d\n", bits, v, r); */
+ return r;
+}
+
+static int
+check(uint64_t vv)
+{
+ uint32_t v32 = (uint32_t) vv;
+
+ if (vv == 0)
+ return 1; /* c[tl]z64(0) is undefined. */
+
+ if (ctz64(vv) != naive_ctz(64, vv)) {
+ printf("mismatch with ctz64: %d\n", ctz64(vv));
+ exit(1);
+ return 0;
+ }
+ if (clz64(vv) != naive_clz(64, vv)) {
+ printf("mismatch with clz64: %d\n", clz64(vv));
+ exit(1);
+ return 0;
+ }
+
+ if (v32 == 0)
+ return 1; /* c[lt]z(0) is undefined. */
+
+ if (ctz32(v32) != naive_ctz(32, v32)) {
+ printf("mismatch with ctz32: %d\n", ctz32(v32));
+ exit(1);
+ return 0;
+ }
+ if (clz32(v32) != naive_clz(32, v32)) {
+ printf("mismatch with clz32: %d\n", clz32(v32));
+ exit(1);
+ return 0;
+ }
+ return 1;
+}
+
+int
+main(int c, char **v)
+{
+ unsigned int i;
+ const unsigned int n = sizeof(testcases)/sizeof(testcases[0]);
+ int result = 0;
+
+ for (i = 0; i <= 63; ++i) {
+ uint64_t x = 1 << i;
+ if (!check(x))
+ result = 1;
+ --x;
+ if (!check(x))
+ result = 1;
+ }
+
+ for (i = 0; i < n; ++i) {
+ if (! check(testcases[i]))
+ result = 1;
+ }
+ if (result) {
+ puts("FAIL");
+ } else {
+ puts("OK");
+ }
+ return result;
+}
+#endif
+
diff --git a/timeout.c b/timeout.c
index fa23fc4..88aaa02 100644
--- a/timeout.c
+++ b/timeout.c
@@ -122,17 +122,25 @@
#define WHEEL_MASK (WHEEL_LEN - 1)
#define TIMEOUT_MAX ((TIMEOUT_C(1) << (WHEEL_BIT * WHEEL_NUM)) - 1)
+#include "bitops.c"
+
#if WHEEL_BIT == 6
+#define ctz(n) ctz64(n)
+#define clz(n) clz64(n)
+#define fls(n) ((int)(64 - clz64(n)))
+#else
+#define ctz(n) ctz32(n)
+#define clz(n) clz32(n)
+#define fls(n) ((int)(32 - clz32(n)))
+#endif
+#if WHEEL_BIT == 6
#define WHEEL_C(n) UINT64_C(n)
#define WHEEL_PRIu PRIu64
#define WHEEL_PRIx PRIx64
typedef uint64_t wheel_t;
-#define ctz(n) __builtin_ctzll(n)
-#define fls(n) ((int)(sizeof (long long) * CHAR_BIT) - __builtin_clzll(n))
-
#elif WHEEL_BIT == 5
#define WHEEL_C(n) UINT32_C(n)
@@ -141,9 +149,6 @@ typedef uint64_t wheel_t;
typedef uint32_t wheel_t;
-#define ctz(n) __builtin_ctzl(n)
-#define fls(n) ((int)(sizeof (long) * CHAR_BIT) - __builtin_clzl(n))
-
#elif WHEEL_BIT == 4
#define WHEEL_C(n) UINT16_C(n)
@@ -152,9 +157,6 @@ typedef uint32_t wheel_t;
typedef uint16_t wheel_t;
-#define ctz(n) __builtin_ctz(n)
-#define fls(n) ((int)(sizeof (int) * CHAR_BIT) - __builtin_clz(n))
-
#elif WHEEL_BIT == 3
#define WHEEL_C(n) UINT8_C(n)
@@ -163,9 +165,6 @@ typedef uint16_t wheel_t;
typedef uint8_t wheel_t;
-#define ctz(n) __builtin_ctz(n)
-#define fls(n) ((int)(sizeof (int) * CHAR_BIT) - __builtin_clz(n))
-
#else
#error invalid WHEEL_BIT value
#endif
@@ -300,6 +299,7 @@ static inline reltime_t timeout_rem(struct timeouts *T, struct timeout *to) {
static inline int timeout_wheel(timeout_t timeout) {
+ /* must be called with timeout != 0, so fls input is nonzero */
return (fls(MIN(timeout, TIMEOUT_MAX)) - 1) / WHEEL_BIT;
} /* timeout_wheel() */
@@ -322,6 +322,11 @@ static void timeouts_sched(struct timeouts *T, struct timeout *to, timeout_t exp
if (expires > T->curtime) {
rem = timeout_rem(T, to);
+ /* rem is nonzero since:
+ * rem == timeout_rem(T,to),
+ * == to->expires - T->curtime
+ * and above we have expires > T->curtime.
+ */
wheel = timeout_wheel(rem);
slot = timeout_slot(wheel, to->expires);
@@ -417,6 +422,7 @@ TIMEOUT_PUBLIC void timeouts_update(struct timeouts *T, abstime_t curtime) {
}
while (pending & T->pending[wheel]) {
+ /* ctz input cannot be zero: loop condition. */
int slot = ctz(pending & T->pending[wheel]);
TAILQ_CONCAT(&todo, &T->wheel[wheel][slot], tqe);
T->pending[wheel] &= ~(UINT64_C(1) << slot);
@@ -493,6 +499,8 @@ static timeout_t timeouts_int(struct timeouts *T) {
if (T->pending[wheel]) {
slot = WHEEL_MASK & (T->curtime >> (wheel * WHEEL_BIT));
+ /* ctz input cannot be zero: T->pending[wheel] is
+ * nonzero, so rotr() is nonzero. */
_timeout = (ctz(rotr(T->pending[wheel], slot)) + !!wheel) << (wheel * WHEEL_BIT);
/* +1 to higher order wheels as those timeouts are one rotation in the future (otherwise they'd be on a lower wheel or expired) */