From d30b79a2fe8fb940ded4265882572bc9602606e9 Mon Sep 17 00:00:00 2001 From: fumingwei Date: Tue, 5 Sep 2023 20:32:16 +0800 Subject: bugfix:性能优化:自旋锁伪共享 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/fieldstat_dynamic.cpp | 34 +++--- src/fieldstat_internal.h | 10 +- src/line_protocol_output.cpp | 6 +- test/src/CMakeLists.txt | 5 +- test/src/gtest_dynamic_benchmark.cpp | 216 +++++++++++++++++++++++++++++++++++ 5 files changed, 253 insertions(+), 18 deletions(-) create mode 100644 test/src/gtest_dynamic_benchmark.cpp diff --git a/src/fieldstat_dynamic.cpp b/src/fieldstat_dynamic.cpp index 7b2cb52..275a91d 100644 --- a/src/fieldstat_dynamic.cpp +++ b/src/fieldstat_dynamic.cpp @@ -25,10 +25,11 @@ struct fieldstat_dynamic_instance * fieldstat_dynamic_instance_new(const char *n instance->n_thread = n_thread; instance->n_thread_dynamic_metric = (struct dynamic_metric **)calloc(instance->n_thread, sizeof(struct dynamic_metric *)); - instance->uthash_locks = (pthread_spinlock_t*)calloc(n_thread, sizeof(pthread_spinlock_t)); + instance->uthash_locks = (struct uthash_spinlock *)calloc(n_thread, sizeof(struct uthash_spinlock)); for(int i = 0; i < n_thread; i++) { - pthread_spin_init(instance->uthash_locks + i, PTHREAD_PROCESS_SHARED); + struct uthash_spinlock *uthash_lock = instance->uthash_locks + i; + pthread_spin_init(&(uthash_lock->lock), PTHREAD_PROCESS_SHARED); } return instance; @@ -101,7 +102,8 @@ void fieldstat_dynamic_instance_free(struct fieldstat_dynamic_instance *instance { for(i = 0; i < instance->n_thread; i++) { - pthread_spin_destroy(instance->uthash_locks + i); + struct uthash_spinlock *uthash_lock = instance->uthash_locks + i; + pthread_spin_destroy(&(uthash_lock->lock)); } free((void *)instance->uthash_locks); instance->uthash_locks = NULL; @@ -342,9 +344,11 @@ static struct metric * read_dynamic_metric(struct fieldstat_dynamic_instance *in { return NULL; } - pthread_spin_lock(instance->uthash_locks + thread_id); + + struct uthash_spinlock *uthash_lock = instance->uthash_locks + thread_id; + pthread_spin_lock(&(uthash_lock->lock)); HASH_FIND(hh, *head, dynamic_metric_key, dynamic_metric_keylen, find); - pthread_spin_unlock(instance->uthash_locks + thread_id); + pthread_spin_unlock(&(uthash_lock->lock)); if(find == NULL) { return NULL; @@ -420,9 +424,10 @@ static struct metric * create_dynamic_table_metric(struct fieldstat_dynamic_inst value->metrics[i] = metric; } - pthread_spin_lock(instance->uthash_locks + thread_id); + struct uthash_spinlock *uthash_lock = instance->uthash_locks + thread_id; + pthread_spin_lock(&(uthash_lock->lock)); HASH_ADD_KEYPTR(hh, *head, value->metric_key, value->metric_keylen, value); - pthread_spin_unlock(instance->uthash_locks + thread_id); + pthread_spin_unlock(&(uthash_lock->lock)); return value->metrics[column_id]; } @@ -470,9 +475,10 @@ static struct metric * create_dynamic_metric(struct fieldstat_dynamic_instance * } *(insert->metrics) = metric; - pthread_spin_lock(instance->uthash_locks + thread_id); + struct uthash_spinlock *uthash_lock = instance->uthash_locks + thread_id; + pthread_spin_lock(&(uthash_lock->lock)); HASH_ADD_KEYPTR(hh, *head, insert->metric_key, insert->metric_keylen, insert); - pthread_spin_unlock(instance->uthash_locks + thread_id); + pthread_spin_unlock(&(uthash_lock->lock)); return metric; } @@ -583,9 +589,10 @@ static struct metric **read_dynamic_row_metrics( head = &instance->n_thread_dynamic_metric[thread_id]; - pthread_spin_lock(instance->uthash_locks + thread_id); + struct uthash_spinlock *uthash_lock = instance->uthash_locks + thread_id; + pthread_spin_lock(&(uthash_lock->lock)); HASH_FIND(hh, *head, metric_key, metric_keylen, find); - pthread_spin_unlock(instance->uthash_locks + thread_id); + pthread_spin_unlock(&(uthash_lock->lock)); if(find == NULL) { return NULL; @@ -650,9 +657,10 @@ static struct metric **create_dynamic_table_row_metrics( value->metrics[i] = metric; } - pthread_spin_lock(instance->uthash_locks + thread_id); + struct uthash_spinlock *uthash_lock = instance->uthash_locks + thread_id; + pthread_spin_lock(&(uthash_lock->lock)); HASH_ADD_KEYPTR(hh, *head, value->metric_key, value->metric_keylen, value); - pthread_spin_unlock(instance->uthash_locks + thread_id); + pthread_spin_unlock(&(uthash_lock->lock)); return value->metrics; } diff --git a/src/fieldstat_internal.h b/src/fieldstat_internal.h index e471116..132f9f5 100644 --- a/src/fieldstat_internal.h +++ b/src/fieldstat_internal.h @@ -66,7 +66,7 @@ #define METRIC_SIZE 1024 - +#define CACHE_LINE_SIZE 64 enum field_calc_algo { @@ -221,6 +221,11 @@ struct dynamic_metric UT_hash_handle hh; }; +struct uthash_spinlock +{ + pthread_spinlock_t lock; +} __attribute__((aligned(CACHE_LINE_SIZE))); + struct fieldstat_dynamic_instance { char name[INSTANCE_NAME_LEN]; @@ -241,7 +246,8 @@ struct fieldstat_dynamic_instance struct dynamic_metric **n_thread_dynamic_metric; int n_thread; int output_type; // 0b0000:not output, 0b1000:output file, 0b0100:output line_protocol, 0b0010: output statsd, 0b0001: output prometheus - pthread_spinlock_t *uthash_locks; + //pthread_spinlock_t *uthash_locks; + struct uthash_spinlock *uthash_locks; }; void prometheus_endpoint_instance_output(struct http_request_s* request); diff --git a/src/line_protocol_output.cpp b/src/line_protocol_output.cpp index 9110205..3c687ed 100644 --- a/src/line_protocol_output.cpp +++ b/src/line_protocol_output.cpp @@ -481,7 +481,9 @@ int line_protocol_dynamic_metric_output(struct fieldstat_dynamic_instance *insta for(int i = 0; i < instance->n_thread; i++) { std::vector line_buf_to_send; - pthread_spin_lock(instance->uthash_locks + i); + struct uthash_spinlock *uthash_lock = instance->uthash_locks + i; + + pthread_spin_lock(&(uthash_lock->lock)); head = &instance->n_thread_dynamic_metric[i]; HASH_ITER(hh, *head, dyn_metric, tmp_dyn_metric) { @@ -502,7 +504,7 @@ int line_protocol_dynamic_metric_output(struct fieldstat_dynamic_instance *insta /* copy the line_buf as str to vector line_buf_to_send */ line_buf_to_send.push_back(std::string(line_buf)); } - pthread_spin_unlock(instance->uthash_locks + i); + pthread_spin_unlock(&(uthash_lock->lock)); for (std::vector::iterator it = line_buf_to_send.begin(); it != line_buf_to_send.end(); ++it) { diff --git a/test/src/CMakeLists.txt b/test/src/CMakeLists.txt index 03311e2..a1acdb6 100644 --- a/test/src/CMakeLists.txt +++ b/test/src/CMakeLists.txt @@ -19,4 +19,7 @@ add_executable(gtest_dynamic_fieldstat_output ${SRC} gtest_dynamic_fieldstat_out target_link_libraries(gtest_dynamic_fieldstat_output gtest-static) add_executable(gtest_fieldstat_output_file_instance ${SRC} gtest_fieldstat_output_file_instance.cpp) -target_link_libraries(gtest_fieldstat_output_file_instance gtest-static) \ No newline at end of file +target_link_libraries(gtest_fieldstat_output_file_instance gtest-static) + +add_executable(gtest_dynamic_benchmark ${SRC} gtest_dynamic_benchmark.cpp) +target_link_libraries(gtest_dynamic_benchmark gtest-static) \ No newline at end of file diff --git a/test/src/gtest_dynamic_benchmark.cpp b/test/src/gtest_dynamic_benchmark.cpp new file mode 100644 index 0000000..01e22e2 --- /dev/null +++ b/test/src/gtest_dynamic_benchmark.cpp @@ -0,0 +1,216 @@ +#include +#include +#include +#include +#include "fieldstat.h" +#include "fieldstat_internal.h" +#include "cJSON.h" +#include + +/* +spinlock, output interval: 1ms + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | | Thread num 1 | Thread num 2 | Thread num 4 | Thread num 8 | Thread num 16 | Thread num 32 | Thread num 64 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | operate counter | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1 | 45 | 17 | 18 | 28 | 48 | 41 | 35 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10 | 21 | 19 | 19 | 17 | 13 | 10 | 9 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100 | 49 | 33 | 33 | 34 | 32 | 32 | 30 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1,000 | 217 | 284 | 321 | 719 | 536 | 803 | 1202 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10,000 | 1,961 | 2,618 | 3,742 | 7,068 | 9,943 | 19,171 | 16,970 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100,000 | 19,413 | 29,401 | 35,992 | 65,337 | 97,261 | 135,835 | 197,634 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1,000,000 | 193,769 | 278,782 | 359,098 | 883,753 | 1,493,073 | 1,592,048 | 1,974,276 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10,000,000 | 1,948,891 | 3,861,912 | 5,796,965 | 7,507,053 | 11,502,201 | 17,721,148 | 20,532,196 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100,000,000 | 19,425,541 | 30,764,254 | 57,262,235 | 97,336,038 | 117,999,613 | 156,962,386 | 203,782,206 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +/* +no spinlock, output interval: 1ms + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | | Thread num 1 | Thread num 2 | Thread num 4 | Thread num 8 | Thread num 16 | Thread num 32 | Thread num 64 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | operate counter | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1 | 39 | 22 | 25 | 81 | 65 | 41 | 45 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10 | 34 | 12 | 13 | 8 | 11 | 11 | 9 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100 | 30 | 32 | 30 | 29 | 30 | 30 | 27 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1,000 | 201 | 205 | 201 | 205 | 207 | 204 | 202 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10,000 | 1,928 | 1,912 | 1,901 | 1,905 | 1,901 | 19,215 | 1,967 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100,000 | 18,742 | 18,674 | 18,917 | 18,801 | 18,815 | 193,246 | 23,323 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1,000,000 | 187,415 | 189,673 | 188,229 | 188,012 | 187,757 | 193,246 | 309,948 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10,000,000 | 1,879,720 | 1,888,791 | 1,880,554 | 1,883,515 | 1,882,172 | 1,892,792 | 2,681,721 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100,000,000 | 18,784,181 | 18,827,600 | 18,812,896 | 18,875,007 | 18,833,475 | 18,811,591 | 26,965,032 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ +/* +spinlock with Struct Alignment, output interval: 1ms + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | | Thread num 1 | Thread num 2 | Thread num 4 | Thread num 8 | Thread num 16 | Thread num 32 | Thread num 64 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | operate counter | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | operation duration(us) | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1 | 46 | 18 | 19 | 22 | 40 | 53 | 38 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10 | 25 | 13 | 14 | 12 | 12 | 10 | 9 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100 | 33 | 31 | 33 | 30 | 32 | 30 | 30 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1,000 | 212 | 207 | 536 | 205 | 207 | 207 | 204 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10,000 | 1,882 | 1,895 | 4,333 | 1,890 | 1,899 | 1,898 | 1,907 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100,000 | 18,751 | 18,823 | 32,792 | 19,100 | 18,839 | 18,860 | 20,919 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 1,000,000 | 187,274 | 187,520 | 187,618 | 188,000 | 188,097 | 191,853 | 248,770 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 10,000,000 | 18,731,69 | 1,874,701 | 1,875,510 | 1,875,648 | 1,878,946 | 1,919,397 | 2,523,767 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + | 100,000,000 | 18,776,370 | 18,781,233 | 18,767,859 | 18,768,590 | 18,830,596 | 18,799,717 | 27,003,506 | + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +*/ + +struct thread_para +{ + int loops; + int thread_id; + long long duration; + struct fieldstat_dynamic_instance *instance; +}; + +long long current_timestamp() +{ + struct timeval te; + gettimeofday(&te, NULL); + return te.tv_sec * 1000LL * 1000LL + te.tv_usec; +} + +void _worker_thread_one_metric(void *arg) +{ + struct thread_para *para = (struct thread_para*)arg; + + int loops = para->loops; + int thread_id = para->thread_id; + struct fieldstat_dynamic_instance *instance = para->instance; + + int ret = 0; + long long start_time, end_time; + + start_time = current_timestamp(); + for(int i = 0; i < loops; i++) + { + ret = fieldstat_dynamic_metric_value_incrby(instance, FIELD_TYPE_GAUGE, + "Active_sessions", 10, + NULL, 0, thread_id); + EXPECT_EQ(0, ret); + } + end_time =current_timestamp(); + para->duration = end_time - start_time; + + return; +} +void *worker_thread_one_metric(void *arg) +{ + _worker_thread_one_metric(arg); + usleep(1000 * 100); + return NULL; +} + +static void fieldstat_dynamic_benchmark(int n_thread, int n_loops) +{ + int ret = 0; + //int n_thread = 4; + //int n_loops = 10; + struct fieldstat_dynamic_instance *instance = NULL; + struct thread_para para[n_thread]; + pthread_t thread_ids[n_thread]; + long long sum_durations = 0; + + instance = fieldstat_dynamic_instance_new("firewall", n_thread); + ret = fieldstat_dynamic_set_line_protocol_server(instance, "127.0.0.1", 8700); + EXPECT_EQ(0, ret); + // ret = fieldstat_dynamic_set_output_interval(instance, 1); + // EXPECT_EQ(0, ret); + + fieldstat_dynamic_instance_start(instance); + + for(int i = 0; i < n_thread; i++) + { + para[i].loops = n_loops; + para[i].instance = instance; + para[i].thread_id = i; + } + + for(int i = 0; i < n_thread; i++) + { + ret = pthread_create(&(thread_ids[i]), NULL, worker_thread_one_metric, &(para[i])); + EXPECT_EQ(0, ret); + } + + void *temp; + for(int i = 0; i < n_thread; i++) + { + pthread_join(thread_ids[i], (void**)&temp); + sum_durations += para[i].duration; + //printf("Thread id:%d, operate counter:%d, duration:%lldus\n", i, n_loops, para[i].duration); + } + printf("Thread num:%d, operate counter:%d, duration:%lldus\n", n_thread, n_loops, sum_durations/n_thread); + sleep(2); + fieldstat_dynamic_instance_free(instance); +} + +// TEST(FeildStatDynamicAPI, NThread64Counter10Million) +// { +// int n_thread = 0; +// int n_loops = 0; + +// for(int i = 0; i < 7; i++) +// { +// n_thread = 1 << i; +// for(int j = 0; j < 9; j++) +// { +// n_loops = (int)pow(10, (double)j); +// fieldstat_dynamic_benchmark(n_thread, n_loops); +// } +// } +// } + +TEST(FeildStatDynamicAPI, AllConditions) +{ + int n_thread = 0; + int n_loops = 0; + + for(int i = 0; i < 7; i++) + { + n_thread = 1 << i; + for(int j = 0; j < 9; j++) + { + n_loops = (int)pow(10, (double)j); + fieldstat_dynamic_benchmark(n_thread, n_loops); + } + } +} + + + +int main(int argc, char *argv[]) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file -- cgit v1.2.3