diff options
| author | William Ahern <[email protected]> | 2016-02-22 21:31:44 -0800 |
|---|---|---|
| committer | William Ahern <[email protected]> | 2016-02-22 21:31:44 -0800 |
| commit | 7ae6b2e6e4bf096dfc23736ae9587598ea5a8e23 (patch) | |
| tree | a943ed987d446eb466bdce6510454f4242af7083 | |
| parent | 6f9379e9ffccb465d0e19db97011bf8568fd4999 (diff) | |
| parent | d9d3e5dd2fdd9738133eeeae43cb05c3c92f2355 (diff) | |
Merge branch 'portable_bitops' of git://github.com/nmathewson/timeout into nmathewson-portable_bitops
| -rw-r--r-- | bitops.c | 249 | ||||
| -rw-r--r-- | timeout.c | 32 |
2 files changed, 269 insertions, 12 deletions
diff --git a/bitops.c b/bitops.c new file mode 100644 index 0000000..d8325db --- /dev/null +++ b/bitops.c @@ -0,0 +1,249 @@ +#include <stdint.h> +#ifdef _MSC_VER +#include <intrin.h> /* _BitScanForward, _BitScanReverse */ +#endif + +/* First define ctz and clz functions; these are compiler-dependent if + * you want them to be fast. */ +#if defined(__GNUC__) && !defined(TIMEOUT_DISABLE_GNUC_BITOPS) + +/* On GCC and clang and some others, we can use __builtin functions. They + * are not defined for n==0, but timeout.s never calls them with n==0. */ + +#define ctz64(n) __builtin_ctzll(n) +#define clz64(n) __builtin_clzll(n) +#if LONG_BITS == 32 +#define ctz32(n) __builtin_ctzl(n) +#define clz32(n) __builtin_clzl(n) +#else +#define ctz32(n) __builtin_ctz(n) +#define clz32(n) __builtin_clz(n) +#endif + +#elif defined(_MSC_VER) && !defined(TIMEOUT_DISABLE_MSVC_BITOPS) + +/* On MSVC, we have these handy functions. We can ignore their return + * values, since we will never supply val == 0. */ + +static __inline int ctz32(unsigned long val) +{ + DWORD zeros = 0; + _BitScanForward(&zeros, val); + return zeros; +} +static __inline int clz32(unsigned long val) +{ + DWORD zeros = 0; + _BitScanReverse(&zeros, val); + return zeros; +} +#ifdef _WIN64 +/* According to the documentation, these only exist on Win64. */ +static __inline int ctz64(uint64_t val) +{ + DWORD zeros = 0; + _BitScanForward64(&zeros, val); + return zeros; +} +static __inline int clz64(uint64_t val) +{ + DWORD zeros = 0; + _BitScanReverse64(&zeros, val); + return zeros; +} +#else +static __inline int ctz64(uint64_t val) +{ + uint32_t lo = (uint32_t) val; + uint32_t hi = (uint32_t) (val >> 32); + return lo ? ctz32(lo) : 32 + ctz32(hi); +} +static __inline int clz64(uint64_t val) +{ + uint32_t lo = (uint32_t) val; + uint32_t hi = (uint32_t) (val >> 32); + return hi ? clz32(hi) : 32 + clz32(lo); +} +#endif + +/* End of MSVC case. */ + +#else + +/* TODO: There are more clever ways to do this in the generic case. */ + + +#define process_(one, cz_bits, bits) \ + if (x < ( one << (cz_bits - bits))) { rv += bits; x <<= bits; } + +#define process64(bits) process_((UINT64_C(1)), 64, (bits)) +static inline int clz64(uint64_t x) +{ + int rv = 0; + + process64(32); + process64(16); + process64(8); + process64(4); + process64(2); + process64(1); + return rv; +} +#define process32(bits) process_((UINT32_C(1)), 32, (bits)) +static inline int clz32(uint32_t x) +{ + int rv = 0; + + process32(16); + process32(8); + process32(4); + process32(2); + process32(1); + return rv; +} + +#undef process_ +#undef process32 +#undef process64 +#define process_(one, bits) \ + if ((x & ((one << (bits))-1)) == 0) { rv += bits; x >>= bits; } + +#define process64(bits) process_((UINT64_C(1)), bits) +static inline int ctz64(uint64_t x) +{ + int rv = 0; + + process64(32); + process64(16); + process64(8); + process64(4); + process64(2); + process64(1); + return rv; +} + +#define process32(bits) process_((UINT32_C(1)), bits) +static inline int ctz32(uint32_t x) +{ + int rv = 0; + + process32(16); + process32(8); + process32(4); + process32(2); + process32(1); + return rv; +} + +#undef process32 +#undef process64 +#undef process_ + +/* End of generic case */ + +#endif /* End of defining ctz */ + +#ifdef TEST_BITOPS +#include <stdio.h> +#include <stdlib.h> + +static uint64_t testcases[] = { + 13371337 * 10, + 100, + 385789752, + 82574, + (((uint64_t)1)<<63) + (((uint64_t)1)<<31) + 10101 +}; + +static int +naive_clz(int bits, uint64_t v) +{ + int r = 0; + uint64_t bit = ((uint64_t)1) << (bits-1); + while (bit && 0 == (v & bit)) { + r++; + bit >>= 1; + } + /* printf("clz(%d,%lx) -> %d\n", bits, v, r); */ + return r; +} + +static int +naive_ctz(int bits, uint64_t v) +{ + int r = 0; + uint64_t bit = 1; + while (bit && 0 == (v & bit)) { + r++; + bit <<= 1; + if (r == bits) + break; + } + /* printf("ctz(%d,%lx) -> %d\n", bits, v, r); */ + return r; +} + +static int +check(uint64_t vv) +{ + uint32_t v32 = (uint32_t) vv; + + if (vv == 0) + return 1; /* c[tl]z64(0) is undefined. */ + + if (ctz64(vv) != naive_ctz(64, vv)) { + printf("mismatch with ctz64: %d\n", ctz64(vv)); + exit(1); + return 0; + } + if (clz64(vv) != naive_clz(64, vv)) { + printf("mismatch with clz64: %d\n", clz64(vv)); + exit(1); + return 0; + } + + if (v32 == 0) + return 1; /* c[lt]z(0) is undefined. */ + + if (ctz32(v32) != naive_ctz(32, v32)) { + printf("mismatch with ctz32: %d\n", ctz32(v32)); + exit(1); + return 0; + } + if (clz32(v32) != naive_clz(32, v32)) { + printf("mismatch with clz32: %d\n", clz32(v32)); + exit(1); + return 0; + } + return 1; +} + +int +main(int c, char **v) +{ + unsigned int i; + const unsigned int n = sizeof(testcases)/sizeof(testcases[0]); + int result = 0; + + for (i = 0; i <= 63; ++i) { + uint64_t x = 1 << i; + if (!check(x)) + result = 1; + --x; + if (!check(x)) + result = 1; + } + + for (i = 0; i < n; ++i) { + if (! check(testcases[i])) + result = 1; + } + if (result) { + puts("FAIL"); + } else { + puts("OK"); + } + return result; +} +#endif + @@ -122,17 +122,25 @@ #define WHEEL_MASK (WHEEL_LEN - 1) #define TIMEOUT_MAX ((TIMEOUT_C(1) << (WHEEL_BIT * WHEEL_NUM)) - 1) +#include "bitops.c" + #if WHEEL_BIT == 6 +#define ctz(n) ctz64(n) +#define clz(n) clz64(n) +#define fls(n) ((int)(64 - clz64(n))) +#else +#define ctz(n) ctz32(n) +#define clz(n) clz32(n) +#define fls(n) ((int)(32 - clz32(n))) +#endif +#if WHEEL_BIT == 6 #define WHEEL_C(n) UINT64_C(n) #define WHEEL_PRIu PRIu64 #define WHEEL_PRIx PRIx64 typedef uint64_t wheel_t; -#define ctz(n) __builtin_ctzll(n) -#define fls(n) ((int)(sizeof (long long) * CHAR_BIT) - __builtin_clzll(n)) - #elif WHEEL_BIT == 5 #define WHEEL_C(n) UINT32_C(n) @@ -141,9 +149,6 @@ typedef uint64_t wheel_t; typedef uint32_t wheel_t; -#define ctz(n) __builtin_ctzl(n) -#define fls(n) ((int)(sizeof (long) * CHAR_BIT) - __builtin_clzl(n)) - #elif WHEEL_BIT == 4 #define WHEEL_C(n) UINT16_C(n) @@ -152,9 +157,6 @@ typedef uint32_t wheel_t; typedef uint16_t wheel_t; -#define ctz(n) __builtin_ctz(n) -#define fls(n) ((int)(sizeof (int) * CHAR_BIT) - __builtin_clz(n)) - #elif WHEEL_BIT == 3 #define WHEEL_C(n) UINT8_C(n) @@ -163,9 +165,6 @@ typedef uint16_t wheel_t; typedef uint8_t wheel_t; -#define ctz(n) __builtin_ctz(n) -#define fls(n) ((int)(sizeof (int) * CHAR_BIT) - __builtin_clz(n)) - #else #error invalid WHEEL_BIT value #endif @@ -300,6 +299,7 @@ static inline reltime_t timeout_rem(struct timeouts *T, struct timeout *to) { static inline int timeout_wheel(timeout_t timeout) { + /* must be called with timeout != 0, so fls input is nonzero */ return (fls(MIN(timeout, TIMEOUT_MAX)) - 1) / WHEEL_BIT; } /* timeout_wheel() */ @@ -322,6 +322,11 @@ static void timeouts_sched(struct timeouts *T, struct timeout *to, timeout_t exp if (expires > T->curtime) { rem = timeout_rem(T, to); + /* rem is nonzero since: + * rem == timeout_rem(T,to), + * == to->expires - T->curtime + * and above we have expires > T->curtime. + */ wheel = timeout_wheel(rem); slot = timeout_slot(wheel, to->expires); @@ -417,6 +422,7 @@ TIMEOUT_PUBLIC void timeouts_update(struct timeouts *T, abstime_t curtime) { } while (pending & T->pending[wheel]) { + /* ctz input cannot be zero: loop condition. */ int slot = ctz(pending & T->pending[wheel]); TAILQ_CONCAT(&todo, &T->wheel[wheel][slot], tqe); T->pending[wheel] &= ~(UINT64_C(1) << slot); @@ -493,6 +499,8 @@ static timeout_t timeouts_int(struct timeouts *T) { if (T->pending[wheel]) { slot = WHEEL_MASK & (T->curtime >> (wheel * WHEEL_BIT)); + /* ctz input cannot be zero: T->pending[wheel] is + * nonzero, so rotr() is nonzero. */ _timeout = (ctz(rotr(T->pending[wheel], slot)) + !!wheel) << (wheel * WHEEL_BIT); /* +1 to higher order wheels as those timeouts are one rotation in the future (otherwise they'd be on a lower wheel or expired) */ |
