Browse code

Add whisper-fix-cpu-metrics.

Xavier G authored on24/01/2018 15:40:23
Showing1 changed files

1 1
new file mode 100755
... ...
@@ -0,0 +1,35 @@
1
+#!/bin/bash
2
+# Fix obviously erroneous CPU metrics in Graphite+collectd Whisper files.
3
+cd /var/lib/graphite/whisper/collectd/ || exit 50
4
+
5
+for host in *; do
6
+	echo "Treating host ${host}:"
7
+	cd "${host}" || exit 49
8
+	cpu_count=$(find -type d -name 'cpu-*' | wc -l)
9
+	((cpu_threshold=110*cpu_count))
10
+	echo "    ${host} has ${cpu_count} CPUs, considering a threshold of ${cpu_threshold}"
11
+	for file in cpu-*/cpu-*.wsp; do
12
+		echo "    Treating ${file}:"
13
+		whisper-dump "${file}" | perl -lanE '
14
+			# Skip non-data lines:
15
+			next unless m#^\d+: \d+, [0-9.]+$#;
16
+			($timestamp, $value) = ($F[1], $F[2]);
17
+			# Spot timestamps with abnormal values:
18
+			if ($value > '${cpu_threshold}') {
19
+				$fix_timestamp = $timestamp;
20
+				next;
21
+			}
22
+			# Pick up the next value to replace the abnormal one:
23
+			if ($fix_timestamp) {
24
+				$fix = sprintf(q[%d:%f], $fix_timestamp, $value);
25
+				push(@fixes, $fix);
26
+				$fix_timestamp = 0;
27
+			}
28
+			END {
29
+				if (@fixes) {
30
+					printf(qq[        whisper-update '${file}' %s\n], join(q[ ], @fixes));
31
+				}
32
+			}'
33
+	done
34
+	cd - > /dev/null || exit 40
35
+done