summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrant Limberg <[email protected]>2023-05-16 11:56:58 -0700
committerGitHub <[email protected]>2023-05-16 11:56:58 -0700
commitadfbbc3fb00becc578afc0645c60b1de3d84bb4c (patch)
tree1bc22da8a3a9ac7db73482e903450834a400e044
parentf621261ff919278d631d566591a2eaf7c4c918d8 (diff)
Controller Metrics & Network Config Request Fix (#2003)
* add new metrics for network config request queue size and sso expirations * move sso expiration to its own thread in the controller * fix potential undefined behavior when modifying a set
-rw-r--r--controller/EmbeddedNetworkController.cpp69
-rw-r--r--controller/EmbeddedNetworkController.hpp4
-rw-r--r--node/Metrics.cpp9
-rw-r--r--node/Metrics.hpp6
-rw-r--r--osdep/BlockingQueue.hpp5
5 files changed, 65 insertions, 28 deletions
diff --git a/controller/EmbeddedNetworkController.cpp b/controller/EmbeddedNetworkController.cpp
index 1d5cee01..914cad47 100644
--- a/controller/EmbeddedNetworkController.cpp
+++ b/controller/EmbeddedNetworkController.cpp
@@ -468,6 +468,8 @@ EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPa
_path(dbPath),
_sender((NetworkController::Sender *)0),
_db(this),
+ _ssoExpiryRunning(true),
+ _ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this)),
_rc(rc)
{
}
@@ -476,8 +478,11 @@ EmbeddedNetworkController::~EmbeddedNetworkController()
{
std::lock_guard<std::mutex> l(_threads_l);
_queue.stop();
- for(auto t=_threads.begin();t!=_threads.end();++t)
+ for(auto t=_threads.begin();t!=_threads.end();++t) {
t->join();
+ }
+ _ssoExpiryRunning = false;
+ _ssoExpiry.join();
}
void EmbeddedNetworkController::setSSORedirectURL(const std::string &url) {
@@ -1543,7 +1548,7 @@ void EmbeddedNetworkController::_request(
*(reinterpret_cast<InetAddress *>(&(r->target))) = t;
if (v.ss_family == t.ss_family)
*(reinterpret_cast<InetAddress *>(&(r->via))) = v;
- ++nc->routeCount;
+ ++nc->routeCount;
}
}
}
@@ -1765,10 +1770,9 @@ void EmbeddedNetworkController::_startThreads()
const long hwc = std::max((long)std::thread::hardware_concurrency(),(long)1);
for(long t=0;t<hwc;++t) {
_threads.emplace_back([this]() {
- std::vector<_MemberStatusKey> expired;
- nlohmann::json network, member;
for(;;) {
_RQEntry *qe = (_RQEntry *)0;
+ Metrics::network_config_request_queue_size = _queue.size();
auto timedWaitResult = _queue.get(qe, 1000);
if (timedWaitResult == BlockingQueue<_RQEntry *>::STOP) {
break;
@@ -1782,37 +1786,46 @@ void EmbeddedNetworkController::_startThreads()
fprintf(stderr,"ERROR: exception in controller request handling thread: unknown exception" ZT_EOL_S);
}
delete qe;
+ qe = nullptr;
}
}
+ }
+ });
+ }
+}
- expired.clear();
- int64_t now = OSUtils::now();
- {
- std::lock_guard<std::mutex> l(_expiringSoon_l);
- for(auto s=_expiringSoon.begin();s!=_expiringSoon.end();) {
- const int64_t when = s->first;
- if (when <= now) {
- // The user may have re-authorized, so we must actually look it up and check.
- network.clear();
- member.clear();
- if (_db.get(s->second.networkId, network, s->second.nodeId, member)) {
- int64_t authenticationExpiryTime = (int64_t)OSUtils::jsonInt(member["authenticationExpiryTime"], 0);
- if (authenticationExpiryTime <= now) {
- expired.push_back(s->second);
- }
- }
- _expiringSoon.erase(s++);
- } else {
- // Don't bother going further into the future than necessary.
- break;
+void EmbeddedNetworkController::_ssoExpiryThread() {
+ while(_ssoExpiryRunning) {
+ std::vector<_MemberStatusKey> expired;
+ nlohmann::json network, member;
+ int64_t now = OSUtils::now();
+ {
+ std::lock_guard<std::mutex> l(_expiringSoon_l);
+ for(auto s=_expiringSoon.begin();s!=_expiringSoon.end();) {
+ Metrics::sso_expiration_checks++;
+ const int64_t when = s->first;
+ if (when <= now) {
+ // The user may have re-authorized, so we must actually look it up and check.
+ network.clear();
+ member.clear();
+ if (_db.get(s->second.networkId, network, s->second.nodeId, member)) {
+ int64_t authenticationExpiryTime = (int64_t)OSUtils::jsonInt(member["authenticationExpiryTime"], 0);
+ if (authenticationExpiryTime <= now) {
+ expired.push_back(s->second);
}
}
- }
- for(auto e=expired.begin();e!=expired.end();++e) {
- onNetworkMemberDeauthorize(nullptr, e->networkId, e->nodeId);
+ s = _expiringSoon.erase(s);
+ } else {
+ // Don't bother going further into the future than necessary.
+ break;
}
}
- });
+ }
+ for(auto e=expired.begin();e!=expired.end();++e) {
+ Metrics::sso_member_deauth++;
+ onNetworkMemberDeauthorize(nullptr, e->networkId, e->nodeId);
+ }
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
}
diff --git a/controller/EmbeddedNetworkController.hpp b/controller/EmbeddedNetworkController.hpp
index 4f2e20e0..97692fa4 100644
--- a/controller/EmbeddedNetworkController.hpp
+++ b/controller/EmbeddedNetworkController.hpp
@@ -81,6 +81,7 @@ public:
private:
void _request(uint64_t nwid,const InetAddress &fromAddr,uint64_t requestPacketId,const Identity &identity,const Dictionary<ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY> &metaData);
void _startThreads();
+ void _ssoExpiryThread();
std::string networkUpdateFromPostData(uint64_t networkID, const std::string &body);
@@ -138,6 +139,9 @@ private:
std::vector<std::thread> _threads;
std::mutex _threads_l;
+ bool _ssoExpiryRunning;
+ std::thread _ssoExpiry;
+
std::unordered_map< _MemberStatusKey,_MemberStatus,_MemberStatusHash > _memberStatus;
std::mutex _memberStatus_l;
diff --git a/node/Metrics.cpp b/node/Metrics.cpp
index 62345476..633c1b85 100644
--- a/node/Metrics.cpp
+++ b/node/Metrics.cpp
@@ -206,6 +206,15 @@ namespace ZeroTier {
prometheus::simpleapi::counter_metric_t member_deauths
{"controller_member_deauth_count", "number of network member deauths"};
+ prometheus::simpleapi::gauge_metric_t network_config_request_queue_size
+ { "controller_network_config_request_queue", "number of entries in the request queue for network configurations" };
+
+ prometheus::simpleapi::counter_metric_t sso_expiration_checks
+ { "controller_sso_expiration_checks", "number of sso expiration checks done" };
+
+ prometheus::simpleapi::counter_metric_t sso_member_deauth
+ { "controller_sso_timeouts", "number of sso timeouts" };
+
#ifdef ZT_CONTROLLER_USE_LIBPQ
// Central Controller Metrics
prometheus::simpleapi::counter_metric_t pgsql_mem_notification
diff --git a/node/Metrics.hpp b/node/Metrics.hpp
index 66b97c0d..492a6f9e 100644
--- a/node/Metrics.hpp
+++ b/node/Metrics.hpp
@@ -123,6 +123,10 @@ namespace ZeroTier {
extern prometheus::simpleapi::counter_metric_t member_auths;
extern prometheus::simpleapi::counter_metric_t member_deauths;
+ extern prometheus::simpleapi::gauge_metric_t network_config_request_queue_size;
+ extern prometheus::simpleapi::counter_metric_t sso_expiration_checks;
+ extern prometheus::simpleapi::counter_metric_t sso_member_deauth;
+
#ifdef ZT_CONTROLLER_USE_LIBPQ
// Central Controller Metrics
extern prometheus::simpleapi::counter_metric_t pgsql_mem_notification;
@@ -132,6 +136,8 @@ namespace ZeroTier {
extern prometheus::simpleapi::counter_metric_t redis_net_notification;
extern prometheus::simpleapi::counter_metric_t redis_node_checkin;
+
+
// Central DB Pool Metrics
extern prometheus::simpleapi::counter_metric_t conn_counter;
extern prometheus::simpleapi::counter_metric_t max_pool_size;
diff --git a/osdep/BlockingQueue.hpp b/osdep/BlockingQueue.hpp
index cce37a04..f3caff99 100644
--- a/osdep/BlockingQueue.hpp
+++ b/osdep/BlockingQueue.hpp
@@ -116,6 +116,11 @@ public:
return OK;
}
+ inline size_t size() const {
+ std::unique_lock<std::mutex> lock(m);
+ return q.size();
+ }
+
private:
std::queue<T> q;
mutable std::mutex m;