summaryrefslogtreecommitdiff
path: root/node
diff options
context:
space:
mode:
authorGrant Limberg <[email protected]>2023-05-04 11:12:55 -0700
committerGitHub <[email protected]>2023-05-04 11:12:55 -0700
commit00d55fc4b407eb91382ea412f99b007631f923b5 (patch)
tree8becc0cd56f03088781c66063557d601cf18f537 /node
parent74dc41c7c73669f5575851c830050747e332e38d (diff)
Metrics consolidation (#1997)
* Rename zt_packet_incoming -> zt_packet Also consolidate zt_peer_packets into a single metric with tx and rx labels. Same for ztc_tcp_data and ztc_udp_data * Further collapse tcp & udp into metric labels for zt_data * Fix zt_data metric description * zt_peer_packets description fix * Consolidate incoming/outgoing network packets to a single metric * zt_incoming_packet_error -> zt_packet_error * Disable peer metrics for central controllers Can change in the future if needed, but given the traffic our controllers serve, that's going to be a *lot* of data * Disable peer metrics for controllers pt 2
Diffstat (limited to 'node')
-rw-r--r--node/Metrics.cpp34
-rw-r--r--node/Metrics.hpp15
-rw-r--r--node/Network.cpp10
-rw-r--r--node/Network.hpp2
-rw-r--r--node/Peer.cpp70
-rw-r--r--node/Peer.hpp2
6 files changed, 74 insertions, 59 deletions
diff --git a/node/Metrics.cpp b/node/Metrics.cpp
index e20f06c3..ba168bcc 100644
--- a/node/Metrics.cpp
+++ b/node/Metrics.cpp
@@ -25,7 +25,7 @@ namespace ZeroTier {
namespace Metrics {
// Packet Type Counts
prometheus::simpleapi::counter_family_t packets
- { "zt_packet_incoming", "incoming packet type counts"};
+ { "zt_packet", "incoming packet type counts"};
// Incoming packets
prometheus::simpleapi::counter_metric_t pkt_nop_in
@@ -118,7 +118,7 @@ namespace ZeroTier {
// Packet Error Counts
prometheus::simpleapi::counter_family_t packet_errors
- { "zt_packet_incoming_error", "incoming packet errors"};
+ { "zt_packet_error", "incoming packet errors"};
// Incoming Error Counts
prometheus::simpleapi::counter_metric_t pkt_error_obj_not_found_in
@@ -157,25 +157,26 @@ namespace ZeroTier {
{ packet_errors.Add({{"error_type", "internal_server_error"}, {"direction", "tx"}}) };
// Data Sent/Received Metrics
- prometheus::simpleapi::counter_metric_t udp_send
- { "zt_udp_data_sent", "number of bytes ZeroTier has sent via UDP" };
+ prometheus::simpleapi::counter_family_t data
+ { "zt_data", "number of bytes ZeroTier has transmitted or received" };
prometheus::simpleapi::counter_metric_t udp_recv
- { "zt_udp_data_recv", "number of bytes ZeroTier has received via UDP" };
+ { data.Add({{"protocol","udp"},{"direction","rx"}}) };
+ prometheus::simpleapi::counter_metric_t udp_send
+ { data.Add({{"protocol","udp"},{"direction","tx"}}) };
prometheus::simpleapi::counter_metric_t tcp_send
- { "zt_tcp_data_sent", "number of bytes ZeroTier has sent via TCP" };
+ { data.Add({{"protocol","tcp"},{"direction", "tx"}}) };
prometheus::simpleapi::counter_metric_t tcp_recv
- { "zt_tcp_data_recv", "number of bytes ZeroTier has received via TCP" };
+ { data.Add({{"protocol","tcp"},{"direction", "rx"}}) };
// Network Metrics
prometheus::simpleapi::gauge_metric_t network_num_joined
{ "zt_num_networks", "number of networks this instance is joined to" };
prometheus::simpleapi::gauge_family_t network_num_multicast_groups
- { "zt_network_multcast_groups_subscribed", "number of multicast groups networks are subscribed to" };
- prometheus::simpleapi::counter_family_t network_incoming_packets
- { "zt_network_incoming_packets", "number of incoming packets per network" };
- prometheus::simpleapi::counter_family_t network_outgoing_packets
- { "zt_network_outgoing_packets", "number of outgoing packets per network" };
-
+ { "zt_network_multicast_groups_subscribed", "number of multicast groups networks are subscribed to" };
+ prometheus::simpleapi::counter_family_t network_packets
+ { "zt_network_packets", "number of incoming/outgoing packets per network" };
+
+#ifndef ZT_NO_PEER_METRICS
// PeerMetrics
prometheus::CustomFamily<prometheus::Histogram<uint64_t>> &peer_latency =
prometheus::Builder<prometheus::Histogram<uint64_t>>()
@@ -185,12 +186,11 @@ namespace ZeroTier {
prometheus::simpleapi::gauge_family_t peer_path_count
{ "zt_peer_path_count", "number of paths to peer" };
- prometheus::simpleapi::counter_family_t peer_incoming_packets
- { "zt_peer_incoming_packets", "number of incoming packets from a peer" };
- prometheus::simpleapi::counter_family_t peer_outgoing_packets
- { "zt_peer_outgoing_packets", "number of outgoing packets to a peer" };
+ prometheus::simpleapi::counter_family_t peer_packets
+ { "zt_peer_packets", "number of packets to/from a peer" };
prometheus::simpleapi::counter_family_t peer_packet_errors
{ "zt_peer_packet_errors" , "number of incoming packet errors from a peer" };
+#endif
// General Controller Metrics
prometheus::simpleapi::gauge_metric_t network_count
diff --git a/node/Metrics.hpp b/node/Metrics.hpp
index f78a0f15..66b97c0d 100644
--- a/node/Metrics.hpp
+++ b/node/Metrics.hpp
@@ -96,23 +96,24 @@ namespace ZeroTier {
extern prometheus::simpleapi::counter_metric_t pkt_error_internal_server_error_out;
// Data Sent/Received Metrics
+ extern prometheus::simpleapi::counter_family_t data;
extern prometheus::simpleapi::counter_metric_t udp_send;
extern prometheus::simpleapi::counter_metric_t udp_recv;
extern prometheus::simpleapi::counter_metric_t tcp_send;
extern prometheus::simpleapi::counter_metric_t tcp_recv;
// Network Metrics
- extern prometheus::simpleapi::gauge_metric_t network_num_joined;
- extern prometheus::simpleapi::gauge_family_t network_num_multicast_groups;
- extern prometheus::simpleapi::counter_family_t network_incoming_packets;
- extern prometheus::simpleapi::counter_family_t network_outgoing_packets;
+ extern prometheus::simpleapi::gauge_metric_t network_num_joined;
+ extern prometheus::simpleapi::gauge_family_t network_num_multicast_groups;
+ extern prometheus::simpleapi::counter_family_t network_packets;
+#ifndef ZT_NO_PEER_METRICS
// Peer Metrics
extern prometheus::CustomFamily<prometheus::Histogram<uint64_t>> &peer_latency;
- extern prometheus::simpleapi::gauge_family_t peer_path_count;
- extern prometheus::simpleapi::counter_family_t peer_incoming_packets;
- extern prometheus::simpleapi::counter_family_t peer_outgoing_packets;
+ extern prometheus::simpleapi::gauge_family_t peer_path_count;
+ extern prometheus::simpleapi::counter_family_t peer_packets;
extern prometheus::simpleapi::counter_family_t peer_packet_errors;
+#endif
// General Controller Metrics
extern prometheus::simpleapi::gauge_metric_t network_count;
diff --git a/node/Network.cpp b/node/Network.cpp
index 10436aed..1e77e463 100644
--- a/node/Network.cpp
+++ b/node/Network.cpp
@@ -569,10 +569,10 @@ Network::Network(const RuntimeEnvironment *renv,void *tPtr,uint64_t nwid,void *u
_netconfFailure(NETCONF_FAILURE_NONE),
_portError(0),
_num_multicast_groups{Metrics::network_num_multicast_groups.Add({{"network_id", _nwidStr}})},
- _incoming_packets_accpeted{Metrics::network_incoming_packets.Add({{"network_id", _nwidStr},{"accepted","yes"}})},
- _incoming_packets_dropped{Metrics::network_incoming_packets.Add({{"network_id", _nwidStr},{"accepted","no"}})},
- _outgoing_packets_accepted{Metrics::network_outgoing_packets.Add({{"network_id", _nwidStr},{"accepted","yes"}})},
- _outgoing_packets_dropped{Metrics::network_outgoing_packets.Add({{"network_id", _nwidStr},{"accepted","no"}})}
+ _incoming_packets_accepted{Metrics::network_packets.Add({{"direction","rx"},{"network_id", _nwidStr},{"accepted","yes"}})},
+ _incoming_packets_dropped{Metrics::network_packets.Add({{"direction","rx"},{"network_id", _nwidStr},{"accepted","no"}})},
+ _outgoing_packets_accepted{Metrics::network_packets.Add({{"direction","tx"},{"network_id", _nwidStr},{"accepted","yes"}})},
+ _outgoing_packets_dropped{Metrics::network_packets.Add({{"direction","tx"},{"network_id", _nwidStr},{"accepted","no"}})}
{
for(int i=0;i<ZT_NETWORK_MAX_INCOMING_UPDATES;++i) {
_incomingConfigChunks[i].ts = 0;
@@ -837,7 +837,7 @@ int Network::filterIncomingPacket(
}
if (accept) {
- _incoming_packets_accpeted++;
+ _incoming_packets_accepted++;
if (cc) {
Packet outp(cc,RR->identity.address(),Packet::VERB_EXT_FRAME);
outp.append(_id);
diff --git a/node/Network.hpp b/node/Network.hpp
index 676e5556..a3bce14a 100644
--- a/node/Network.hpp
+++ b/node/Network.hpp
@@ -483,7 +483,7 @@ private:
AtomicCounter __refCount;
prometheus::simpleapi::gauge_metric_t _num_multicast_groups;
- prometheus::simpleapi::counter_metric_t _incoming_packets_accpeted;
+ prometheus::simpleapi::counter_metric_t _incoming_packets_accepted;
prometheus::simpleapi::counter_metric_t _incoming_packets_dropped;
prometheus::simpleapi::counter_metric_t _outgoing_packets_accepted;
prometheus::simpleapi::counter_metric_t _outgoing_packets_dropped;
diff --git a/node/Peer.cpp b/node/Peer.cpp
index a08bebbf..6fcf193d 100644
--- a/node/Peer.cpp
+++ b/node/Peer.cpp
@@ -28,35 +28,37 @@ namespace ZeroTier {
static unsigned char s_freeRandomByteCounter = 0;
-Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Identity &peerIdentity) :
- RR(renv),
- _lastReceive(0),
- _lastNontrivialReceive(0),
- _lastTriedMemorizedPath(0),
- _lastDirectPathPushSent(0),
- _lastDirectPathPushReceive(0),
- _lastCredentialRequestSent(0),
- _lastWhoisRequestReceived(0),
- _lastCredentialsReceived(0),
- _lastTrustEstablishedPacketReceived(0),
- _lastSentFullHello(0),
- _lastEchoCheck(0),
- _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter),
- _vProto(0),
- _vMajor(0),
- _vMinor(0),
- _vRevision(0),
- _id(peerIdentity),
- _directPathPushCutoffCount(0),
- _echoRequestCutoffCount(0),
- _localMultipathSupported(false),
- _lastComputedAggregateMeanLatency(0),
- _peer_latency{Metrics::peer_latency.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}}, std::vector<uint64_t>{1,3,6,10,30,60,100,300,600,1000})},
- _alive_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","alive"}})},
- _dead_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","dead"}})},
- _incoming_packet{Metrics::peer_incoming_packets.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})},
- _outgoing_packet{Metrics::peer_outgoing_packets.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})},
- _packet_errors{Metrics::peer_packet_errors.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
+Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Identity &peerIdentity)
+ : RR(renv)
+ , _lastReceive(0)
+ , _lastNontrivialReceive(0)
+ , _lastTriedMemorizedPath(0)
+ , _lastDirectPathPushSent(0)
+ , _lastDirectPathPushReceive(0)
+ , _lastCredentialRequestSent(0)
+ , _lastWhoisRequestReceived(0)
+ , _lastCredentialsReceived(0)
+ , _lastTrustEstablishedPacketReceived(0)
+ , _lastSentFullHello(0)
+ , _lastEchoCheck(0)
+ , _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter)
+ , _vProto(0)
+ , _vMajor(0)
+ , _vMinor(0)
+ , _vRevision(0)
+ , _id(peerIdentity)
+ , _directPathPushCutoffCount(0)
+ , _echoRequestCutoffCount(0)
+ , _localMultipathSupported(false)
+ , _lastComputedAggregateMeanLatency(0)
+#ifndef ZT_NO_PEER_METRICS
+ , _peer_latency{Metrics::peer_latency.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}}, std::vector<uint64_t>{1,3,6,10,30,60,100,300,600,1000})}
+ , _alive_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","alive"}})}
+ , _dead_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","dead"}})}
+ , _incoming_packet{Metrics::peer_packets.Add({{"direction", "rx"},{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
+ , _outgoing_packet{Metrics::peer_packets.Add({{"direction", "tx"},{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
+ , _packet_errors{Metrics::peer_packet_errors.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
+#endif
{
if (!myIdentity.agree(peerIdentity,_key)) {
throw ZT_EXCEPTION_INVALID_ARGUMENT;
@@ -97,7 +99,9 @@ void Peer::received(
default:
break;
}
+#ifndef ZT_NO_PEER_METRICS
_incoming_packet++;
+#endif
recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now);
if (trustEstablished) {
@@ -569,6 +573,7 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
deletionOccurred = false;
}
}
+#ifndef ZT_NO_PEER_METRICS
uint16_t alive_path_count_tmp = 0, dead_path_count_tmp = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
@@ -582,8 +587,11 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
}
_alive_path_count = alive_path_count_tmp;
_dead_path_count = dead_path_count_tmp;
+#endif
}
+#ifndef ZT_NO_PEER_METRICS
_peer_latency.Observe(latency(now));
+#endif
return sent;
}
@@ -658,7 +666,9 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
{
+#ifndef ZT_NO_PEER_METRICS
_outgoing_packet++;
+#endif
if (_localMultipathSupported && _bond) {
_bond->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now);
}
@@ -666,7 +676,9 @@ void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t pack
void Peer::recordIncomingInvalidPacket(const SharedPtr<Path>& path)
{
+#ifndef ZT_NO_PEER_METRICS
_packet_errors++;
+#endif
if (_localMultipathSupported && _bond) {
_bond->recordIncomingInvalidPacket(path);
}
diff --git a/node/Peer.hpp b/node/Peer.hpp
index cd6b871f..d03e8f88 100644
--- a/node/Peer.hpp
+++ b/node/Peer.hpp
@@ -599,12 +599,14 @@ private:
SharedPtr<Bond> _bond;
+#ifndef ZT_NO_PEER_METRICS
prometheus::Histogram<uint64_t> &_peer_latency;
prometheus::simpleapi::gauge_metric_t _alive_path_count;
prometheus::simpleapi::gauge_metric_t _dead_path_count;
prometheus::simpleapi::counter_metric_t _incoming_packet;
prometheus::simpleapi::counter_metric_t _outgoing_packet;
prometheus::simpleapi::counter_metric_t _packet_errors;
+#endif
};
} // namespace ZeroTier