mirror of
https://github.com/ansible-collections/community.general.git
synced 2024-09-14 20:13:21 +02:00
Nagios: added 'acknowledge' and 'schedule_forced_svc_check' action (#820)
* bugfix for None-type error * add acknowledge and service_check options * fix whitespace issues * documentation fix * fix version error * changelog fix * Update plugins/modules/monitoring/nagios.py Co-authored-by: Felix Fontein <felix@fontein.de> * fix int convert error * Update plugins/modules/monitoring/nagios.py Co-authored-by: Felix Fontein <felix@fontein.de> * indentation fix Co-authored-by: Goetheyn Tony <tony.goetheyn@digipolis.gent> Co-authored-by: Felix Fontein <felix@fontein.de>
This commit is contained in:
parent
ea1fb83b0c
commit
7310a34b55
2 changed files with 179 additions and 8 deletions
|
@ -0,0 +1,3 @@
|
|||
minor_changes:
|
||||
- nagios - add the ``acknowledge`` action (https://github.com/ansible-collections/community.general/pull/820).
|
||||
- nagios - add the ``service_check`` action (https://github.com/ansible-collections/community.general/pull/820).
|
|
@ -22,10 +22,10 @@ description:
|
|||
- All actions require the I(host) parameter to be given explicitly. In playbooks you can use the C({{inventory_hostname}}) variable to refer
|
||||
to the host the playbook is currently running on.
|
||||
- You can specify multiple services at once by separating them with commas, .e.g., C(services=httpd,nfs,puppet).
|
||||
- When specifying what service to handle there is a special service value, I(host), which will handle alerts/downtime for the I(host itself),
|
||||
- When specifying what service to handle there is a special service value, I(host), which will handle alerts/downtime/acknowledge for the I(host itself),
|
||||
e.g., C(service=host). This keyword may not be given with other services at the same time.
|
||||
I(Setting alerts/downtime for a host does not affect alerts/downtime for any of the services running on it.) To schedule downtime for all
|
||||
services on particular host use keyword "all", e.g., C(service=all).
|
||||
I(Setting alerts/downtime/acknowledge for a host does not affect alerts/downtime/acknowledge for any of the services running on it.)
|
||||
To schedule downtime for all services on particular host use keyword "all", e.g., C(service=all).
|
||||
- When using the C(nagios) module you will need to specify your Nagios server using the C(delegate_to) parameter.
|
||||
options:
|
||||
action:
|
||||
|
@ -33,10 +33,11 @@ options:
|
|||
- Action to take.
|
||||
- servicegroup options were added in 2.0.
|
||||
- delete_downtime options were added in 2.2.
|
||||
- The C(acknowledge) and C(service_check) actions were added in community.general 1.2.0.
|
||||
required: true
|
||||
choices: [ "downtime", "delete_downtime", "enable_alerts", "disable_alerts", "silence", "unsilence",
|
||||
"silence_nagios", "unsilence_nagios", "command", "servicegroup_service_downtime",
|
||||
"servicegroup_host_downtime" ]
|
||||
"servicegroup_host_downtime", "acknowledge", "service_check" ]
|
||||
host:
|
||||
description:
|
||||
- Host to operate on in Nagios.
|
||||
|
@ -48,11 +49,11 @@ options:
|
|||
author:
|
||||
description:
|
||||
- Author to leave downtime comments as.
|
||||
Only usable with the C(downtime) action.
|
||||
Only usable with the C(downtime) and C(acknowledge) action.
|
||||
default: Ansible
|
||||
comment:
|
||||
description:
|
||||
- Comment for C(downtime) action.
|
||||
- Comment for C(downtime) and C(acknowledge)action.
|
||||
default: Scheduling downtime
|
||||
start:
|
||||
description:
|
||||
|
@ -68,7 +69,7 @@ options:
|
|||
description:
|
||||
- What to manage downtime/alerts for. Separate multiple services with commas.
|
||||
C(service) is an alias for C(services).
|
||||
B(Required) option when using the C(downtime), C(enable_alerts), and C(disable_alerts) actions.
|
||||
B(Required) option when using the C(downtime), C(acknowledge), C(service_check), C(enable_alerts), and C(disable_alerts) actions.
|
||||
aliases: [ "service" ]
|
||||
required: true
|
||||
servicegroup:
|
||||
|
@ -156,6 +157,27 @@ EXAMPLES = '''
|
|||
service: host
|
||||
comment: Planned maintenance
|
||||
|
||||
- name: acknowledge an HOST with a particular comment
|
||||
community.general.nagios:
|
||||
action: acknowledge
|
||||
service: host
|
||||
host: '{{ inventory_hostname }}'
|
||||
comment: 'power outage - see casenr 12345'
|
||||
|
||||
- name: acknowledge an active service problem for the httpd service with a particular comment
|
||||
community.general.nagios:
|
||||
action: acknowledge
|
||||
service: httpd
|
||||
host: '{{ inventory_hostname }}'
|
||||
comment: 'service crashed - see casenr 12345'
|
||||
|
||||
- name: acknowledge an passive service problem for snmp trap with a particular comment
|
||||
community.general.nagios:
|
||||
action: service_check
|
||||
service: snmp
|
||||
host: '{{ inventory_hostname }}'
|
||||
comment: 'switch problem - see casenr 12345'
|
||||
|
||||
- name: Enable SMART disk alerts
|
||||
community.general.nagios:
|
||||
action: enable_alerts
|
||||
|
@ -256,6 +278,8 @@ def main():
|
|||
'command',
|
||||
'servicegroup_host_downtime',
|
||||
'servicegroup_service_downtime',
|
||||
'acknowledge',
|
||||
'service_check',
|
||||
]
|
||||
|
||||
module = AnsibleModule(
|
||||
|
@ -284,6 +308,7 @@ def main():
|
|||
##################################################################
|
||||
# Required args per action:
|
||||
# downtime = (minutes, service, host)
|
||||
# acknowledge = (service, host)
|
||||
# (un)silence = (host)
|
||||
# (enable/disable)_alerts = (service, host)
|
||||
# command = command
|
||||
|
@ -322,6 +347,18 @@ def main():
|
|||
if action in ['command']:
|
||||
if not command:
|
||||
module.fail_json(msg='no command passed for command action')
|
||||
######################################################################
|
||||
if action == 'acknowledge':
|
||||
# Make sure there's an actual service selected
|
||||
if not services:
|
||||
module.fail_json(msg='no service selected to acknowledge')
|
||||
|
||||
##################################################################
|
||||
if action == 'service_check':
|
||||
# Make sure there's an actual service selected
|
||||
if not services:
|
||||
module.fail_json(msg='no service selected to check')
|
||||
|
||||
##################################################################
|
||||
if not cmdfile:
|
||||
module.fail_json(msg='unable to locate nagios.cfg')
|
||||
|
@ -358,7 +395,10 @@ class Nagios(object):
|
|||
self.comment = kwargs['comment']
|
||||
self.host = kwargs['host']
|
||||
self.servicegroup = kwargs['servicegroup']
|
||||
if kwargs['start'] is not None:
|
||||
self.start = int(kwargs['start'])
|
||||
else:
|
||||
self.start = None
|
||||
self.minutes = kwargs['minutes']
|
||||
self.cmdfile = kwargs['cmdfile']
|
||||
self.command = kwargs['command']
|
||||
|
@ -448,6 +488,44 @@ class Nagios(object):
|
|||
|
||||
return dt_str
|
||||
|
||||
def _fmt_ack_str(self, cmd, host, author=None,
|
||||
comment=None, svc=None, sticky=0, notify=1, persistent=0):
|
||||
"""
|
||||
Format an external-command acknowledge string.
|
||||
|
||||
cmd - Nagios command ID
|
||||
host - Host schedule downtime on
|
||||
author - Name to file the downtime as
|
||||
comment - Reason for running this command (upgrade, reboot, etc)
|
||||
svc - Service to schedule downtime for, omit when for host downtime
|
||||
sticky - the acknowledgement will remain until the host returns to an UP state if set to 1
|
||||
notify - a notification will be sent out to contacts
|
||||
persistent - survive across restarts of the Nagios process
|
||||
|
||||
Syntax: [submitted] COMMAND;<host_name>;[<service_description>]
|
||||
<sticky>;<notify>;<persistent>;<author>;<comment>
|
||||
"""
|
||||
|
||||
entry_time = self._now()
|
||||
hdr = "[%s] %s;%s;" % (entry_time, cmd, host)
|
||||
|
||||
if not author:
|
||||
author = self.author
|
||||
|
||||
if not comment:
|
||||
comment = self.comment
|
||||
|
||||
if svc is not None:
|
||||
ack_args = [svc, str(sticky), str(notify), str(persistent), author, comment]
|
||||
else:
|
||||
# Downtime for a host if no svc specified
|
||||
ack_args = [str(sticky), str(notify), str(persistent), author, comment]
|
||||
|
||||
ack_arg_str = ";".join(ack_args)
|
||||
ack_str = hdr + ack_arg_str + "\n"
|
||||
|
||||
return ack_str
|
||||
|
||||
def _fmt_dt_del_str(self, cmd, host, svc=None, start=None, comment=None):
|
||||
"""
|
||||
Format an external-command downtime deletion string.
|
||||
|
@ -489,6 +567,31 @@ class Nagios(object):
|
|||
|
||||
return dt_del_str
|
||||
|
||||
def _fmt_chk_str(self, cmd, host, svc=None, start=None):
|
||||
"""
|
||||
Format an external-command downtime deletion string.
|
||||
|
||||
cmd - Nagios command ID
|
||||
host - Host to check service from
|
||||
svc - Service to check
|
||||
start - check time
|
||||
|
||||
Syntax: [submitted] COMMAND;<host_name>;[<service_description>];<check_time>
|
||||
"""
|
||||
|
||||
entry_time = self._now()
|
||||
hdr = "[%s] %s;%s;" % (entry_time, cmd, host)
|
||||
|
||||
if start is None:
|
||||
start = entry_time + 3
|
||||
|
||||
chk_args = [svc, str(start)]
|
||||
|
||||
chk_arg_str = ";".join(chk_args)
|
||||
chk_str = hdr + chk_arg_str + "\n"
|
||||
|
||||
return chk_str
|
||||
|
||||
def _fmt_notif_str(self, cmd, host=None, svc=None):
|
||||
"""
|
||||
Format an external-command notification string.
|
||||
|
@ -552,6 +655,62 @@ class Nagios(object):
|
|||
dt_cmd_str = self._fmt_dt_str(cmd, host, minutes, start=start)
|
||||
self._write_command(dt_cmd_str)
|
||||
|
||||
def acknowledge_svc_problem(self, host, services=None):
|
||||
"""
|
||||
This command is used to acknowledge a particular
|
||||
service problem.
|
||||
|
||||
By acknowledging the current problem, future notifications
|
||||
for the same servicestate are disabled
|
||||
|
||||
Syntax: ACKNOWLEDGE_SVC_PROBLEM;<host_name>;<service_description>;
|
||||
<sticky>;<notify>;<persistent>;<author>;<comment>
|
||||
"""
|
||||
|
||||
cmd = "ACKNOWLEDGE_SVC_PROBLEM"
|
||||
|
||||
if services is None:
|
||||
services = []
|
||||
|
||||
for service in services:
|
||||
ack_cmd_str = self._fmt_ack_str(cmd, host, svc=service)
|
||||
self._write_command(ack_cmd_str)
|
||||
|
||||
def acknowledge_host_problem(self, host):
|
||||
"""
|
||||
This command is used to acknowledge a particular
|
||||
host problem.
|
||||
|
||||
By acknowledging the current problem, future notifications
|
||||
for the same servicestate are disabled
|
||||
|
||||
Syntax: ACKNOWLEDGE_HOST_PROBLEM;<host_name>;<sticky>;<notify>;
|
||||
<persistent>;<author>;<comment>
|
||||
"""
|
||||
|
||||
cmd = "ACKNOWLEDGE_HOST_PROBLEM"
|
||||
ack_cmd_str = self._fmt_ack_str(cmd, host)
|
||||
self._write_command(ack_cmd_str)
|
||||
|
||||
def schedule_forced_svc_check(self, host, services=None):
|
||||
"""
|
||||
This command is used to check a particular
|
||||
service .
|
||||
|
||||
Schedules a forced active check of a particular service
|
||||
|
||||
Syntax: SCHEDULE_FORCED_SVC_CHECK;<host_name>;<service_description>;<check_time>
|
||||
"""
|
||||
|
||||
cmd = "SCHEDULE_FORCED_SVC_CHECK"
|
||||
|
||||
if services is None:
|
||||
services = []
|
||||
|
||||
for service in services:
|
||||
chk_cmd_str = self._fmt_chk_str(cmd, host, svc=service)
|
||||
self._write_command(chk_cmd_str)
|
||||
|
||||
def schedule_host_svc_downtime(self, host, minutes=30, start=None):
|
||||
"""
|
||||
This command is used to schedule downtime for
|
||||
|
@ -1020,6 +1179,12 @@ class Nagios(object):
|
|||
minutes=self.minutes,
|
||||
start=self.start)
|
||||
|
||||
elif self.action == 'acknowledge':
|
||||
if self.services == 'host':
|
||||
self.acknowledge_host_problem(self.host)
|
||||
else:
|
||||
self.acknowledge_svc_problem(self.host, services=self.services)
|
||||
|
||||
elif self.action == 'delete_downtime':
|
||||
if self.services == 'host':
|
||||
self.delete_host_downtime(self.host)
|
||||
|
@ -1028,6 +1193,9 @@ class Nagios(object):
|
|||
else:
|
||||
self.delete_host_downtime(self.host, services=self.services)
|
||||
|
||||
elif self.action == 'service_check':
|
||||
self.schedule_forced_svc_check(self.host, services=self.services)
|
||||
|
||||
elif self.action == "servicegroup_host_downtime":
|
||||
if self.servicegroup:
|
||||
self.schedule_servicegroup_host_downtime(servicegroup=self.servicegroup, minutes=self.minutes, start=self.start)
|
||||
|
|
Loading…
Reference in a new issue