File: //usr/lib/check_mk_agent/plugins/mk_filestats_2.py
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Checkmk GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
# Do not test generated 2.x files
# fmt: off
# type: ignore
r"""Check_MK Agent Plugin: mk_filestats
This is a Check_MK Agent plugin. If configured, it will be called by the
agent without any arguments.
usage: mk_filestats [OPTIONS]
Options:
-h, --help Show this help message and exit
-v, -vv Increase verbosity
-c, --config-file Read config file
(default: $MK_CONFDIR/filestats.cfg)
Details:
This plugin is configured using ini-style configuration files, i.e. a file with
sections started by lines of the form '[Section Name]' and consisting of
'key: value' (key-colon-space-value) lines.
Every section will be processed individually, and this processing can be
described by the following four phases:
1. Input:
This phase will gather (iterators returning) the files the plugin will
initiallly be aware of.
Option keys concerning this phase a prefixed 'input_'. Currently, there
is only one option available (which therefore must be present):
* ``input_patterns'':
Here you can specify one or more *globbing* patterns. If more than one
pattern is provided, they will be splitted according to shell rules
(using shlex.split). Every matching file will be dealt with, every
matching folder will recursively searched for *all* files.
2. Filtering:
This phase will filter the input files according to filters provided using
the option keys starting with 'filter_' of the corresponding configuration
section. The following are available (note that regex filters will allways
be applied before other types of filters)
* ``filter_regex: regular_expression''
Only further process a file, if its full path matches the given regular
expression. Everything following the characters 'filter_regex: ' will
considered one single regular expression.
* ``filter_regex_inverse: regular_expression''
Only further process a file, if its full path *does not* match the given
regular expression.
* ``filter_size: specification''
Only further process a file, if its size in bytes matches the provided
specification. The specification consists of one of the operators '>',
'<', '>=', '<=' and '==', directly followed by an integer.
E.g.: 'filter_size: <43008' will only match files smaller than 42 KB.
* ``filter_age: specification''
Only further process a file, if its age in seconds matches the filter.
See ``filter_size''.
3. Grouping
It is possible to group files within a file group further into subgroups
using grouping criteria. The supported options are:
* ``grouping_regex: regular_expression''
Assign a file to a subgroup if its full path matches the given regular
expression.
A separate service is created for each subgroup, prefixed with its parent
group name (i.e. <parent group name> <subgroup name>). The order in which
subgroups and corresponding patterns are specified matters: rules are
processed in the given order.
Files that match the specified subgroups are shown as a separate service
and are excluded from the parent file group.
4. Output
You can choose from three different ways the output will be aggregated:
* ``output: file_stats''
Output the full information for on every single file that is processed.
This is the default.
* ``output: count_only''
Only count the files matching all of the provided filters. Unless
required for the filtering operation, no stat call on the files is
made.
* ``output: extremes_only''
Only report the youngest, oldest, smallest, and biggest files. In case
checks only require this information, we can signifficantly reduce data.
* ``output: single_file''
Monitor a single file and send its metrics. If a input_pattern of a .cfg section
matches multiple files, the agent sents one subsection per file.
You should find an example configuration file at
'../cfg_examples/filestats.cfg' relative to this file.
"""
from __future__ import absolute_import
__version__ = "2.3.0p38"
import collections
import ConfigParser
import errno
import glob
import logging
import operator
import os
import re
import shlex
import sys
import time
def ensure_str(s):
if sys.version_info[0] >= 3:
if isinstance(s, str):
return s.decode("utf-8")
else:
if isinstance(s, unicode): # pylint: disable=undefined-variable
return s.encode("utf-8")
return s
def ensure_text(s):
if sys.version_info[0] >= 3:
if isinstance(s, str):
return s.decode("utf-8")
else:
if isinstance(s, str):
return s.decode("utf-8")
return s
DEFAULT_CFG_FILE = os.path.join(os.getenv("MK_CONFDIR", ""), "filestats.cfg")
DEFAULT_CFG_SECTION = {"output": "file_stats", "subgroups_delimiter": "@"}
FILTER_SPEC_PATTERN = re.compile("(?P<operator>[<>=]+)(?P<value>.+)")
LOGGER = logging.getLogger(__name__)
def parse_arguments(argv=None):
if argv is None:
argv = sys.argv[1:]
parsed_args = {}
if "-h" in argv or "--help" in argv:
sys.stderr.write(ensure_str(__doc__))
sys.exit(0)
if "-v" in argv or "--verbose" in argv:
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
elif "-vv" in argv or "--verbose" in argv:
logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(lineno)s: %(message)s")
else:
LOGGER.propagate = False
parsed_args["cfg_file"] = DEFAULT_CFG_FILE
for opt in ("-c", "--config-file"):
if opt in argv:
try:
parsed_args["cfg_file"] = argv[argv.index(opt) + 1]
except IndexError:
sys.stderr.write("missing value for option %r\n" % opt)
sys.exit(1)
return parsed_args
class FileStat(object):
"""Wrapper around os.stat
Only call os.stat once.
"""
@classmethod
def from_path(cls, raw_file_path, file_path):
LOGGER.debug("Creating FileStat(%r)", raw_file_path)
try:
file_stat = os.stat(raw_file_path)
except OSError, exc:
# report on errors, regard failure as 'file'
stat_status = "file vanished" if exc.errno == errno.ENOENT else str(exc)
return cls(file_path, stat_status)
try:
size = int(file_stat.st_size)
except ValueError, exc:
stat_status = str(exc)
return cls(file_path, stat_status)
try:
m_time = int(file_stat.st_mtime)
age = int(time.time()) - m_time
except ValueError, exc:
stat_status = str(exc)
return cls(file_path, stat_status, size)
return cls(file_path, "ok", size, age, m_time)
def __init__(self, file_path, stat_status, size=None, age=None, m_time=None):
super(FileStat, self).__init__()
self.file_path = file_path
self.stat_status = stat_status
self.size = size
self.age = age
self._m_time = m_time
def dumps(self):
data = {
"type": "file",
"path": self.file_path,
"stat_status": self.stat_status,
"size": self.size,
"age": self.age,
"mtime": self._m_time,
}
return repr(data)
# .
# .--Input---------------------------------------------------------------.
# | ___ _ |
# | |_ _|_ __ _ __ _ _| |_ |
# | | || '_ \| '_ \| | | | __| |
# | | || | | | |_) | |_| | |_ |
# | |___|_| |_| .__/ \__,_|\__| |
# | |_| |
# +----------------------------------------------------------------------+
# | |
# '----------------------------------------------------------------------'
def _sanitize_path(raw_file_path):
# raw_file_path is the value returned by iglob. This value cannot typed meaningfully:
# * if the path is utf-8 decodable, python2: unicode
# * if the path is not utf-8 decodable, python2: str
# * python3: str, possibly with surrogates aka it can only be encoded again like so:
# str_with_surrogates.encode('utf-8', 'surrogateescape')
if sys.version_info[0] >= 3:
return raw_file_path.encode("utf-8", "surrogateescape").decode("utf-8", "replace")
if isinstance(raw_file_path, unicode): # type: ignore[name-defined] # noqa: F821 pylint: disable=undefined-variable
return raw_file_path
return raw_file_path.decode("utf-8", "replace")
class PatternIterator(object):
"""Recursively iterate over all files"""
def __init__(self, pattern_list, filters):
super(PatternIterator, self).__init__()
self._patterns = [os.path.abspath(os.path.expanduser(p)) for p in pattern_list]
self._regex_filters = [f for f in filters if isinstance(f, RegexFilter)]
self._numerical_filters = [f for f in filters if isinstance(f, AbstractNumericFilter)]
def _file_stats(self, raw_file_paths):
for raw_file_path in raw_file_paths:
file_path = _sanitize_path(raw_file_path)
if not all(f.matches(file_path) for f in self._regex_filters):
LOGGER.debug("File %r does not match any regex filter", raw_file_path)
continue
file_stat = FileStat.from_path(raw_file_path, file_path)
if not all(f.matches(file_stat) for f in self._numerical_filters):
LOGGER.debug("File %r does not match any numerical filter", raw_file_path)
continue
LOGGER.debug("File %s matches all regex and numerical filters", raw_file_path)
yield file_stat
def __iter__(self):
for pattern in self._patterns:
LOGGER.info("processing pattern: %r", pattern)
# pattern needs to be a unicode/python3 str. Otherwise things might go sour, for instance:
# If we pass "*".encode("utf-8"), then a non-UTF-8 filesystem may no longer realize that
# b'\x2A' refers to a wildcard. Instead iglob is responsible for conversion.
for item in glob.iglob(pattern):
if os.path.isdir(item):
# equivalent to `find -type f`
for currentpath, _folders, file_names in os.walk(item):
for file_stat in self._file_stats(
[os.path.join(currentpath, fn) for fn in file_names]
):
yield file_stat
else:
for file_stat in self._file_stats([item]):
yield file_stat
def get_file_iterator(config):
"""get a FileStat iterator"""
input_specs = [(k[6:], v) for k, v in config.items() if k.startswith("input_")]
if not input_specs:
raise ValueError("missing input definition")
if len(input_specs) != 1: # currently not supported
raise ValueError("multiple input definitions: %r" % input_specs)
variety, spec_string = input_specs[0]
if variety != "patterns":
raise ValueError("unknown input type: %r" % variety)
patterns = shlex.split(spec_string)
return PatternIterator(patterns, get_file_filters(config))
# .
# .--Filtering-----------------------------------------------------------.
# | _____ _ _ _ _ |
# | | ___(_) | |_ ___ _ __(_)_ __ __ _ |
# | | |_ | | | __/ _ \ '__| | '_ \ / _` | |
# | | _| | | | || __/ | | | | | | (_| | |
# | |_| |_|_|\__\___|_| |_|_| |_|\__, | |
# | |___/ |
# +----------------------------------------------------------------------+
# | |
# '----------------------------------------------------------------------'
COMPARATORS = {
"<": operator.lt,
"<=": operator.le,
">": operator.gt,
">=": operator.ge,
"==": operator.eq,
}
class AbstractNumericFilter(object):
"""Common code for filtering by comparing integers"""
def __init__(self, spec_string):
super(AbstractNumericFilter, self).__init__()
match = FILTER_SPEC_PATTERN.match(spec_string)
if match is None:
raise ValueError("unable to parse filter spec: %r" % spec_string)
spec = match.groupdict()
comp = COMPARATORS.get(spec["operator"])
if comp is None:
raise ValueError("unknown operator for numeric filter: %r" % spec["operator"])
reference = int(spec["value"])
self._matches_value = lambda actual: comp(int(actual), reference)
def matches(self, filestat):
raise NotImplementedError()
class SizeFilter(AbstractNumericFilter):
def matches(self, filestat):
"""apply AbstractNumericFilter ti file size"""
size = filestat.size
if size is not None and size != "null":
return self._matches_value(size)
# Don't return vanished files.
# Other cases are a problem, and should be included
return filestat.stat_status != "file vanished"
class AgeFilter(AbstractNumericFilter):
def matches(self, filestat):
"""apply AbstractNumericFilter ti file age"""
age = filestat.age
if age is not None and age != "null":
return self._matches_value(age)
# Don't return vanished files.
# Other cases are a problem, and should be included
return filestat.stat_status != "file vanished"
class RegexFilter(object):
def __init__(self, regex_pattern):
super(RegexFilter, self).__init__()
LOGGER.debug("initializing with pattern: %r", regex_pattern)
self._regex = re.compile(ensure_text(regex_pattern), re.UNICODE)
def matches(self, file_path):
return bool(self._regex.match(file_path))
class InverseRegexFilter(RegexFilter):
def matches(self, file_path):
return not self._regex.match(file_path)
def get_file_filters(config):
filter_specs = ((k[7:], v) for k, v in config.items() if k.startswith("filter_"))
filters = []
for variety, spec_string in filter_specs:
LOGGER.debug("found filter spec: %r", (variety, spec_string))
try:
filter_type = {
"regex": RegexFilter,
"regex_inverse": InverseRegexFilter,
"size": SizeFilter,
"age": AgeFilter,
}[variety]
except KeyError:
raise ValueError("unknown filter type: %r" % variety)
filters.append(filter_type(spec_string))
# add regex filters first to save stat calls
return sorted(filters, key=lambda x: not isinstance(x, RegexFilter))
# .
# .--Grouping------------------------------------------------------------.
# | ____ _ |
# | / ___|_ __ ___ _ _ _ __ (_)_ __ __ _ |
# | | | _| '__/ _ \| | | | '_ \| | '_ \ / _` | |
# | | |_| | | | (_) | |_| | |_) | | | | | (_| | |
# | \____|_| \___/ \__,_| .__/|_|_| |_|\__, | |
# | |_| |___/ |
# +----------------------------------------------------------------------+
# | |
# '----------------------------------------------------------------------'
def parse_grouping_config(
config,
raw_config_section_name,
options,
subgroups_delimiter,
):
parent_group_name, child_group_name = raw_config_section_name.split(subgroups_delimiter, 1)
for option in options:
if option.startswith("grouping_"):
grouping_type = option.split("_", 1)[1]
grouping_rule = config.get(raw_config_section_name, option)
LOGGER.info("found subgroup: %s", raw_config_section_name)
return parent_group_name, (
child_group_name,
{
"type": grouping_type,
"rule": grouping_rule,
},
)
def _grouping_construct_group_name(parent_group_name, child_group_name=""):
"""allow the user to format the service name using '%s'.
>>> _grouping_construct_group_name('aard %s vark', 'banana')
'aard banana vark'
>>> _grouping_construct_group_name('aard %s vark %s %s', 'banana')
'aard banana vark %s %s'
>>> _grouping_construct_group_name('aard %s vark')
'aard %s vark'
>>> _grouping_construct_group_name('aard %s', '')
'aard %s'
"""
format_specifiers_count = parent_group_name.count("%s")
if not format_specifiers_count:
return ("%s %s" % (parent_group_name, child_group_name)).strip()
if not child_group_name:
return parent_group_name
return (
parent_group_name % ((child_group_name,) + ("%s",) * (format_specifiers_count - 1))
).strip()
def _get_matching_child_group(single_file, grouping_conditions):
for child_group_name, grouping_condition in grouping_conditions:
if re.match(grouping_condition["rule"], single_file.file_path):
return child_group_name
return ""
def grouping_multiple_groups(config_section_name, files_iter, grouping_conditions):
"""create multiple groups per section if the agent is configured
for grouping. each group is shown as a separate service. if a file
does not belong to a group, it is added to the section."""
parent_group_name = config_section_name
# Initalise dict with parent and child group because they should be in the section
# with 0 count if there are no files for them.
grouped_files = {
"": [], # parent
} # type: dict[str, list[FileStat]]
grouped_files.update(dict((g[0], []) for g in grouping_conditions))
for single_file in files_iter:
matching_child_group = _get_matching_child_group(single_file, grouping_conditions)
grouped_files[matching_child_group].append(single_file)
for matching_child_group, files in grouped_files.items():
yield (
_grouping_construct_group_name(parent_group_name, matching_child_group),
files,
)
def grouping_single_group(config_section_name, files_iter, _grouping_conditions):
"""create one single group per section"""
group_name = config_section_name
yield group_name, files_iter
def get_grouper(grouping_conditions):
if grouping_conditions:
return grouping_multiple_groups
return grouping_single_group
# .
# .--Output--------------------------------------------------------------.
# | ___ _ _ |
# | / _ \ _ _| |_ _ __ _ _| |_ |
# | | | | | | | | __| '_ \| | | | __| |
# | | |_| | |_| | |_| |_) | |_| | |_ |
# | \___/ \__,_|\__| .__/ \__,_|\__| |
# | |_| |
# +----------------------------------------------------------------------+
# | |
# '----------------------------------------------------------------------'
def output_aggregator_count_only(group_name, files_iter):
yield "[[[count_only %s]]]" % group_name
count = sum(1 for __ in files_iter)
yield repr({"type": "summary", "count": count})
def output_aggregator_file_stats(group_name, files_iter):
yield "[[[file_stats %s]]]" % group_name
count = 0
for count, filestat in enumerate(files_iter, 1):
yield filestat.dumps()
yield repr({"type": "summary", "count": count})
def output_aggregator_extremes_only(group_name, files_iter):
yield "[[[extremes_only %s]]]" % group_name
files = list(files_iter)
count = len(files)
if not count:
yield repr({"type": "summary", "count": count})
return
min_age = max_age = min_size = max_size = files[0]
for filestat in files[1:]:
if filestat.age is None:
continue
if min_age.age is None or filestat.age < min_age.age:
min_age = filestat
if max_age.age is None or filestat.age > max_age.age:
max_age = filestat
if min_size.size is None or filestat.size < min_size.size:
min_size = filestat
if max_size.size is None or filestat.size > max_size.size:
max_size = filestat
for extreme_file in set((min_age, max_age, min_size, max_size)):
yield extreme_file.dumps()
yield repr({"type": "summary", "count": count})
def output_aggregator_single_file(group_name, files_iter):
for lazy_file in files_iter:
count_format_specifiers = group_name.count("%s")
if count_format_specifiers == 0:
subsection_name = group_name
else:
subsection_name = group_name % (
(lazy_file.file_path,) + (("%s",) * (count_format_specifiers - 1))
)
yield "[[[single_file %s]]]" % subsection_name
yield lazy_file.dumps()
def get_output_aggregator(config):
output_spec = config.get("output")
try:
return {
"count_only": output_aggregator_count_only,
"extremes_only": output_aggregator_extremes_only,
"file_stats": output_aggregator_file_stats,
"single_file": output_aggregator_single_file,
}[output_spec]
except KeyError:
raise ValueError("unknown 'output' spec: %r" % output_spec)
def write_output(groups, output_aggregator):
for group_name, group_files_iter in groups:
for line in output_aggregator(group_name, group_files_iter):
sys.stdout.write("%s\n" % line)
# .
# .--Main----------------------------------------------------------------.
# | __ __ _ |
# | | \/ | __ _(_)_ __ |
# | | |\/| |/ _` | | '_ \ |
# | | | | | (_| | | | | | |
# | |_| |_|\__,_|_|_| |_| |
# | |
# +----------------------------------------------------------------------+
# | |
# '----------------------------------------------------------------------'
def iter_config_section_dicts(cfg_file=None):
if cfg_file is None:
cfg_file = DEFAULT_CFG_FILE
# FIXME: Python 2.6 has no OrderedDict at all, it is only available in a separate ordereddict
# package, but we simply can't assume that this is installed on the client!
config = ConfigParser.ConfigParser(
DEFAULT_CFG_SECTION,
dict_type=collections.OrderedDict,
)
LOGGER.debug("trying to read %r", cfg_file)
files_read = config.read(cfg_file)
LOGGER.info("read configration file(s): %r", files_read)
parsed_config = {}
for raw_cfg_section_name in config.sections():
options = config.options(raw_cfg_section_name)
subgroups_delimiter = config.get(raw_cfg_section_name, "subgroups_delimiter")
if subgroups_delimiter not in raw_cfg_section_name:
consolidated_cfg_section_name = raw_cfg_section_name
parsed_config[consolidated_cfg_section_name] = dict((
k, config.get(raw_cfg_section_name, k)) for k in options)
continue
parent_group_name, parsed_grouping_config = parse_grouping_config(
config,
raw_cfg_section_name,
options,
subgroups_delimiter,
)
consolidated_cfg_section_name = parent_group_name
# TODO: The below suppressions are due to the fact that typing the parsed config properly
# requires a more sophisticated type (perhaps a class) and a bigger refactoring to validate
# the options and dispatch immediately after parsing is complete.
parsed_config[consolidated_cfg_section_name].setdefault(
"grouping",
[], # type: ignore[arg-type]
).append( # type: ignore[attr-defined]
parsed_grouping_config
)
for consolidated_cfg_section_name, parsed_option in parsed_config.items():
yield consolidated_cfg_section_name, parsed_option
def main():
args = parse_arguments()
sys.stdout.write("<<<filestats:sep(0)>>>\n")
for config_section_name, config in iter_config_section_dicts(args["cfg_file"]):
# 1 input and
# 2 filtering
filtered_files = get_file_iterator(config)
# 3 grouping
grouping_conditions = config.get("grouping")
grouper = get_grouper(grouping_conditions)
groups = grouper(config_section_name, filtered_files, grouping_conditions)
# 4 output
output_aggregator = get_output_aggregator(config)
write_output(groups, output_aggregator)
if __name__ == "__main__":
main()