RATE: 函数,(当前采集值 - 上一个采集值) / 采集时间差
Name的详细说明链接
Part 1
Name
- 如果
combine_connection_states == true
system.net.udp4.connections
system.net.udp6.connections
system.net.tcp4.established
- The number of TCP IPv4 closing connections.
- connection/second
- system.net.tcp4.opening
- The number of TCP IPv4 opening connections.
- connection/second
- system.net.tcp4.closing
- The number of TCP IPv4 closing connections.
- connection/second
- system.net.tcp4.listening
- The number of TCP IPv4 listening connections.
- connection/second
system.net.tcp4.time_wait
system.net.tcp6.established
- The number of TCP IPv6 closing connections.
- connection/second
- system.net.tcp6.opening
- The number of TCP IPv6 opening connections.
- connection/second
- system.net.tcp6.closing
- The number of TCP IPv6 closing connections.
- connection/second
- system.net.tcp6.listening
- The number of TCP IPv6 listening connections.
- connection/second
- system.net.tcp6.time_wait
- 如果
combine_connection_states == true
system.net.udp4.connections
system.net.udp6.connections
system.net.tcp4.estab
system.net.tcp4.syn_sent
system.net.tcp4.syn_recv
system.net.tcp4.fin_wait_1
system.net.tcp4.fin_wait_2
system.net.tcp4.time_wait
system.net.tcp4.unconn
system.net.tcp4.close
system.net.tcp4.close_wait
system.net.tcp4.closing
system.net.tcp4.listen
system.net.tcp4.time_wait
计算公式
# combine_connection_states是配置文件的配置,配置文件默认为false
self._combine_connection_states = instance.get('combine_connection_states', True)
if self._combine_connection_states:
self.cx_state_gauge = {
('udp4', 'connections'): 'system.net.udp4.connections',
('udp6', 'connections'): 'system.net.udp6.connections',
('tcp4', 'established'): 'system.net.tcp4.established',
('tcp4', 'opening'): 'system.net.tcp4.opening',
('tcp4', 'closing'): 'system.net.tcp4.closing',
('tcp4', 'listening'): 'system.net.tcp4.listening',
('tcp4', 'time_wait'): 'system.net.tcp4.time_wait',
('tcp6', 'established'): 'system.net.tcp6.established',
('tcp6', 'opening'): 'system.net.tcp6.opening',
('tcp6', 'closing'): 'system.net.tcp6.closing',
('tcp6', 'listening'): 'system.net.tcp6.listening',
('tcp6', 'time_wait'): 'system.net.tcp6.time_wait',
}
self.tcp_states = {
"ss": {
"ESTAB": "established",
"SYN-SENT": "opening",
"SYN-RECV": "opening",
"FIN-WAIT-1": "closing",
"FIN-WAIT-2": "closing",
"TIME-WAIT": "time_wait",
"UNCONN": "closing",
"CLOSE-WAIT": "closing",
"LAST-ACK": "closing",
"LISTEN": "listening",
"CLOSING": "closing",
},
"netstat": {
"ESTABLISHED": "established",
"SYN_SENT": "opening",
"SYN_RECV": "opening",
"FIN_WAIT1": "closing",
"FIN_WAIT2": "closing",
"TIME_WAIT": "time_wait",
"CLOSE": "closing",
"CLOSE_WAIT": "closing",
"LAST_ACK": "closing",
"LISTEN": "listening",
"CLOSING": "closing",
},
"psutil": {
psutil.CONN_ESTABLISHED: "established",
psutil.CONN_SYN_SENT: "opening",
psutil.CONN_SYN_RECV: "opening",
psutil.CONN_FIN_WAIT1: "closing",
psutil.CONN_FIN_WAIT2: "closing",
psutil.CONN_TIME_WAIT: "time_wait",
psutil.CONN_CLOSE: "closing",
psutil.CONN_CLOSE_WAIT: "closing",
psutil.CONN_LAST_ACK: "closing",
psutil.CONN_LISTEN: "listening",
psutil.CONN_CLOSING: "closing",
psutil.CONN_NONE: "connections", # CONN_NONE is always returned for udp connections
}
}
else:
self.cx_state_gauge = {
('udp4', 'connections'): 'system.net.udp4.connections',
('udp6', 'connections'): 'system.net.udp6.connections',
('tcp4', 'estab'): 'system.net.tcp4.estab',
('tcp4', 'syn_sent'): 'system.net.tcp4.syn_sent',
('tcp4', 'syn_recv'): 'system.net.tcp4.syn_recv',
('tcp4', 'fin_wait_1'): 'system.net.tcp4.fin_wait_1',
('tcp4', 'fin_wait_2'): 'system.net.tcp4.fin_wait_2',
('tcp4', 'time_wait'): 'system.net.tcp4.time_wait',
('tcp4', 'unconn'): 'system.net.tcp4.unconn',
('tcp4', 'close'): 'system.net.tcp4.close',
('tcp4', 'close_wait'): 'system.net.tcp4.close_wait',
('tcp4', 'closing'): 'system.net.tcp4.closing',
('tcp4', 'listen'): 'system.net.tcp4.listen',
('tcp4', 'last_ack'): 'system.net.tcp4.time_wait',
('tcp6', 'estab'): 'system.net.tcp6.estab',
('tcp6', 'syn_sent'): 'system.net.tcp6.syn_sent',
('tcp6', 'syn_recv'): 'system.net.tcp6.syn_recv',
('tcp6', 'fin_wait_1'): 'system.net.tcp6.fin_wait_1',
('tcp6', 'fin_wait_2'): 'system.net.tcp6.fin_wait_2',
('tcp6', 'time_wait'): 'system.net.tcp6.time_wait',
('tcp6', 'unconn'): 'system.net.tcp6.unconn',
('tcp6', 'close'): 'system.net.tcp6.close',
('tcp6', 'close_wait'): 'system.net.tcp6.close_wait',
('tcp6', 'closing'): 'system.net.tcp6.closing',
('tcp6', 'listen'): 'system.net.tcp6.listen',
('tcp6', 'last_ack'): 'system.net.tcp6.time_wait',
}
self.tcp_states = {
"ss": {
"ESTAB": "estab",
"SYN-SENT": "syn_sent",
"SYN-RECV": "syn_recv",
"FIN-WAIT-1": "fin_wait_1",
"FIN-WAIT-2": "fin_wait_2",
"TIME-WAIT": "time_wait",
"UNCONN": "unconn",
"CLOSE-WAIT": "close_wait",
"LAST-ACK": "last_ack",
"LISTEN": "listen",
"CLOSING": "closing",
},
"netstat": {
"ESTABLISHED": "estab",
"SYN_SENT": "syn_sent",
"SYN_RECV": "syn_recv",
"FIN_WAIT1": "fin_wait_1",
"FIN_WAIT2": "fin_wait_2",
"TIME_WAIT": "time_wait",
"CLOSE": "close",
"CLOSE_WAIT": "close_wait",
"LAST_ACK": "last_ack",
"LISTEN": "listen",
"CLOSING": "closing",
},
"psutil": {
psutil.CONN_ESTABLISHED: "estab",
psutil.CONN_SYN_SENT: "syn_sent",
psutil.CONN_SYN_RECV: "syn_recv",
psutil.CONN_FIN_WAIT1: "fin_wait_1",
psutil.CONN_FIN_WAIT2: "fin_wait_2",
psutil.CONN_TIME_WAIT: "time_wait",
psutil.CONN_CLOSE: "close",
psutil.CONN_CLOSE_WAIT: "close_wait",
psutil.CONN_LAST_ACK: "last_ack",
psutil.CONN_LISTEN: "listen",
psutil.CONN_CLOSING: "closing",
psutil.CONN_NONE: "connections", # CONN_NONE is always returned for udp connections
}
}
def _parse_linux_cx_state(lines, tcp_states, state_col, protocol=None, ip_version=None):
"""
Parse the output of the command that retrieves the connection state (either `ss` or `netstat`)
Returns a dict metric_name -> value
"""
metrics = dict.fromkeys(cx_state_gauge.values(), 0)
for l in lines:
cols = l.split()
if cols[0].startswith('tcp') or protocol == 'tcp':
proto = "tcp{0}".format(ip_version) if ip_version else ("tcp4", "tcp6")[cols[0] == "tcp6"]
if cols[state_col] in tcp_states:
metric = cx_state_gauge[proto, tcp_states[cols[state_col]]]
metrics[metric] += 1
elif cols[0].startswith('udp') or protocol == 'udp':
proto = "udp{0}".format(ip_version) if ip_version else ("udp4", "udp6")[cols[0] == "udp6"]
metric = cx_state_gauge[proto, 'connections']
metrics[metric] += 1
return metrics
数据来源
如果存在ss
命令,则使用ss
命令
for ip_version in ['4', '6']:
for protocol in ['tcp', 'udp']:
# Call `ss` for each IP version because there's no built-in way of distinguishing
# between the IP versions in the output
# Also calls `ss` for each protocol, because on some systems (e.g. Ubuntu 14.04), there is a
# bug that print `tcp` even if it's `udp`
output, _, _ = get_subprocess_output(["ss", "-n", "-{0}".format(protocol[0]),
"-a", "-{0}".format(ip_version)], self.log)
lines = output.splitlines()
metrics = _parse_linux_cx_state(lines[1:], tcp_states['ss'], 0, protocol=protocol, ip_version=ip_version)
# Only send the metrics which match the loop iteration's ip version
for stat, metric in cx_state_gauge.items():
if stat[0].endswith(ip_version) and stat[0].startswith(protocol):
self.gauge(metric, metrics.get(metric), tags=custom_tags)
# ss -n -t -a -4 > output
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 2048 127.0.0.1:10248 *:*
LISTEN 0 511 127.0.0.1:2376 *:*
TIME-WAIT 0 0 127.0.0.1:44180 127.0.0.1:19900
TIME-WAIT 0 0 127.0.0.1:43092 127.0.0.1:19900
ESTAB 0 0 127.0.0.1:46606 127.0.0.1:24224
ESTAB 0 0 127.0.0.1:46604 127.0.0.1:24224
如果没有ss
命令,则使用netstat
命令
output, _, _ = get_subprocess_output(["netstat", "-n", "-u", "-t", "-a"], self.log)
lines = output.splitlines()
metrics = _parse_linux_cx_state(lines[2:], tcp_states['netstat'], 5)
for metric, value in metrics.items():
self.gauge(metric, value, tags=custom_tags)
# netstat -n -u -t -a
Active Internet connections (servers and established)
Proto Recv-Q Send-Q Local Address Foreign Address State
tcp 0 0 127.0.0.1:11211 0.0.0.0:* LISTEN
tcp 0 0 0.0.0.0:55214 0.0.0.0:* LISTEN
tcp 0 0 172.31.22.22:22 125.35.101.186:60326 ESTABLISHED
tcp 0 0 172.31.22.22:35926 52.37.207.158:8443 ESTABLISHED
tcp 0 0 172.31.22.22:58345 172.31.22.228:22
tcp6 0 0 :::111 :::* LISTEN
udp 0 0 0.0.0.0:33113 0.0.0.0:*
udp 0 0 127.0.0.1:875 0.0.0.0:*
udp6 0 0 :::956 :::*
Part 2
Name
- system.net.bytes_rcvd
- The number of bytes received on a device per second.
- bytes/second
- system.net.bytes_sent
- The number of bytes sent from a device per second.
- bytes/second
- system.net.packets_in.count
- The number of packets of data received by the interface.
- packets/second
- system.net.packets_in.error
- The number of packet receive errors detected by the device driver.
- errors/second
- system.net.packets_out.count
- The number of packets of data transmitted by the interface.
- packets/second
- system.net.packets_out.error
- The number of packet transmit errors detected by the device driver.
- errors/second
计算公式
system.net.bytes_rcvd = RATE(line[0])
system.net.bytes_sent = RATE(line[8])
system.net.packets_in.count = RATE(line[1])
system.net.packets_in.error = RATE(line[2] + line[3])
system.net.packets_out.count = RATE(line[9])
system.net.packets_out.error = RATE(line[10] + line[11])
数据来源
# cat /proc/net/dev
Inter-| Receive | Transmit
face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed
eth0: 37808188785 50895609 0 0 0 0 0 0 42329915428 43474989 0 0 0 0 0 0
lo: 2547864 1526 0 0 0 0 0 0 2547864 1526 0 0 0 0 0 0
Part 3
Name
- system.net.tcp.retrans_segs
- The number of TCP segments retransmitted.
- segment/second
- gauge (metrics type)
- system.net.tcp.retrans_segs.count
- Total number of retransmitted TCP segments.
- segment (unit)
- count (metrics type)
- system.net.tcp.in_segs
- The number of TCP segments received.
- segment/second
- system.net.tcp.in_segs.count
- Total number of received TCP segments.
- segment
- system.net.tcp.out_segs
- The number of TCP segments transmitted
- segment/second
- system.net.tcp.out_segs.count
- Total number of transmitted TCP segments.
- system.net.tcp.listen_overflows
- The number of times connections have overflowed the accept buffer. Available since Agent v5.14.0.
- system.net.tcp.listen_overflows.count
- Total number of times connections have overflowed the accept buffer.
- system.net.tcp.listen_drops
- The number of times connections have dropped out of listen. Available since Agent v5.14.0.
- system.net.tcp.listen_drops.count
- Total number of times connections have dropped out of listen.
- system.net.tcp.backlog_drops
- The number of packets dropped because there wasn't room in the TCP backlog. Available since Agent v5.14.0.
- packet
- system.net.tcp.backlog_drops.count
- Total number of packets dropped because there wasn't room in the TCP backlog.
system.net.tcp.failed_retransmits
system.net.tcp.failed_retransmits.count
- Total number of packets that failed to be retransmitted.
- packet
- system.net.udp.in_datagrams
- system.net.udp.no_ports
- system.net.udp.in_errors
- system.net.udp.out_datagrams
- system.net.udp.rcv_buf_errors
- system.net.udp.snd_buf_errors
- system.net.udp.in_csum_errors
计算公式
netstat_data = {}
for f in ['netstat', 'snmp']:
proc_data_path = "/proc/net/{}".format(f)
with open(proc_data_path, 'r') as netstat:
while True:
n_header = netstat.readline()
n_data = netstat.readline()
h_parts = n_header.strip().split(' ')
h_values = n_data.strip().split(' ')
ns_category = h_parts[0][:-1]
netstat_data[ns_category] = {}
# Turn the data into a dictionary
for idx, hpart in enumerate(h_parts[1:]):
netstat_data[ns_category][hpart] = h_values[idx + 1]
nstat_metrics_names = {
'Tcp': {
'RetransSegs': 'system.net.tcp.retrans_segs',
'InSegs': 'system.net.tcp.in_segs',
'OutSegs': 'system.net.tcp.out_segs',
},
'TcpExt': {
'ListenOverflows': 'system.net.tcp.listen_overflows',
'ListenDrops': 'system.net.tcp.listen_drops',
'TCPBacklogDrop': 'system.net.tcp.backlog_drops',
'TCPRetransFail': 'system.net.tcp.failed_retransmits',
},
'Udp': {
'InDatagrams': 'system.net.udp.in_datagrams',
'NoPorts': 'system.net.udp.no_ports',
'InErrors': 'system.net.udp.in_errors',
'OutDatagrams': 'system.net.udp.out_datagrams',
'RcvbufErrors': 'system.net.udp.rcv_buf_errors',
'SndbufErrors': 'system.net.udp.snd_buf_errors',
'InCsumErrors': 'system.net.udp.in_csum_errors'
}
}
# Skip the first line, as it's junk
for k in nstat_metrics_names:
for met in nstat_metrics_names[k]:
if met in netstat_data.get(k, {}):
metric_name = nstat_metrics_names[k][met]
metric_value = RATE(netstat_data[k][met])
metric_name_count = '{}.count'.format(nstat_metrics_names[k][met])
metric_value_count =COUNT(netstat_data[k][met])
数据来源
# cat /proc/net/netstat
TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected TCPSackShifted TCPSackMerged TCPSackShiftFallback TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK TCPSYNChallenge TCPFastOpenActive TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets
TcpExt: 0 0 2339 10475 244 0 0 0 0 0 38300 0 0 0 0 8541 796678 135 58187 0 0 3332364 75541088 1648038116 0 16967265 2463630 10559734 8247650 0 27371 0 6 192 0 208 151 375 3560 12680 703 0 2293 584 44991 2801 2763 106644 158411 75645 0 1454 4 3474 60168 200 86628 1266 150196 25912 0 5951 0 0 0 0 10 27773 2126 0 0 14392 93650 262596 0 0 0 0 0 0 0 24 3842949 320719 0 197 1492 754 0 0 0 0 0 757 0
IpExt: InNoRoutes InTruncatedPkts InMcastPkts OutMcastPkts InBcastPkts OutBcastPkts InOctets OutOctets InMcastOctets OutMcastOctets InBcastOctets OutBcastOctets InCsumErrors InNoECTPkts InECT1Pkts InECT0Pkts InCEPkts
IpExt: 1 0 0 0 0 0 37594794075 41702562633 0 0 0 0 0 49599031 17 527514 908
# cat /proc/net/snmp
Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates
Ip: 1 64 46556382 0 0 0 0 0 46556381 42706303 0 0 0 0 0 0 0 0 0
Icmp: InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps
Icmp: 678 420 0 663 0 0 0 0 0 15 0 0 0 0 4049 0 3801 0 0 0 0 248 0 0 0 0 0
IcmpMsg: InType0 InType3 OutType3 OutType8
IcmpMsg: 15 663 3801 248
Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors
Tcp: 1 200 120000 -1 224478 308563 23717 183361 75 45638370 56328617 186532 778 360402 25
Udp: InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors
Udp: 913432 3211 0 917405 0 0 0
UdpLite: InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors
UdpLite: 0 0 0 0 0 0 0