mirror of
https://github.com/ansible-collections/community.general.git
synced 2024-09-14 20:13:21 +02:00
Add reboot action plugin (#35205)
* Update docs * Add reboot action plugin Refactor win_reboot so it is subclassed from reboot * Use new connection methods * Test fixes * Use better uptime command for Linux Use who -b to get the last time the system was booted rather than uptime, which changes every second. * Use distribution specefic commands and flags Query the managed node to determien its distribution, then set the appropriate command and flags. * Tune debug messages a bit * Update module docs with details about pre_reboot_delay s docs * Ensure that post_reboot_delay is a positive number * Remove the stringification * Add integration tests * Make sure aliases are honored * Handle systems that have an incorrect last boot time SystemD and fakehw-clock do not properly set the last boot time and instead always set it to epoch. Use a different command if that is the case. * Copyright and encoding fixes * Minor fixes based on feedback * Add exponential backoff to sucess check method * Update integration test Skip the integration test if it would try to reboot the control node. We need a new mechanism to account for this scenario in ansible-test, so tests must currently be run manually for this plugin. * Update integration test Skip the integration test if it would try to reboot the control node. We need a new mechanism to account for this scenario in ansible-test, so tests must currently be run manually for this plugin. * Fail early with running with local connection * Update docs based on feedback * minor refactoring, state mgmt changes
This commit is contained in:
parent
60e3af42d5
commit
4d9218cec4
5 changed files with 459 additions and 175 deletions
81
lib/ansible/modules/system/reboot.py
Normal file
81
lib/ansible/modules/system/reboot.py
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright: (c) 2018, Ansible Project
|
||||||
|
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
||||||
|
|
||||||
|
from __future__ import (absolute_import, division, print_function)
|
||||||
|
__metaclass__ = type
|
||||||
|
|
||||||
|
ANSIBLE_METADATA = {'metadata_version': '1.1',
|
||||||
|
'status': ['preview'],
|
||||||
|
'supported_by': 'core'}
|
||||||
|
|
||||||
|
DOCUMENTATION = r'''
|
||||||
|
module: reboot
|
||||||
|
short_description: Reboot a machine
|
||||||
|
description:
|
||||||
|
- Reboot a machine, wait for it to go down, come back up, and respond to commands.
|
||||||
|
version_added: "2.7"
|
||||||
|
options:
|
||||||
|
pre_reboot_delay:
|
||||||
|
description:
|
||||||
|
- Seconds for shutdown to wait before requesting reboot.
|
||||||
|
- On Linux and macOS, this is converted to minutes and rounded down. If less than 60, it will be set to 0.
|
||||||
|
- On Solaris and FreeBSD, this will be seconds.
|
||||||
|
default: 0
|
||||||
|
type: int
|
||||||
|
post_reboot_delay:
|
||||||
|
description:
|
||||||
|
- Seconds to wait after the reboot was successful and the connection was re-established.
|
||||||
|
- This is useful if you want wait for something to settle despite your connection already working.
|
||||||
|
default: 0
|
||||||
|
type: int
|
||||||
|
reboot_timeout:
|
||||||
|
description:
|
||||||
|
- Maximum seconds to wait for machine to reboot and respond to a test command.
|
||||||
|
- This timeout is evaluated separately for both network connection and test command success so the
|
||||||
|
maximum execution time for the module is twice this amount.
|
||||||
|
default: 600
|
||||||
|
type: int
|
||||||
|
connect_timeout:
|
||||||
|
description:
|
||||||
|
- Maximum seconds to wait for a successful connection to the managed hosts before trying again.
|
||||||
|
- If unspecified, the default setting for the underlying connection plugin is used.
|
||||||
|
type: int
|
||||||
|
test_command:
|
||||||
|
description:
|
||||||
|
- Command to run on the rebooted host and expect success from to determine the machine is ready for
|
||||||
|
further tasks.
|
||||||
|
default: whoami
|
||||||
|
type: str
|
||||||
|
msg:
|
||||||
|
description:
|
||||||
|
- Message to display to users before reboot.
|
||||||
|
default: Reboot initiated by Ansible
|
||||||
|
type: str
|
||||||
|
author:
|
||||||
|
- Matt Davis (@nitzmahone)
|
||||||
|
- Sam Doran (@samdoran)
|
||||||
|
'''
|
||||||
|
|
||||||
|
EXAMPLES = r'''
|
||||||
|
# Unconditionally reboot the machine with all defaults
|
||||||
|
- reboot:
|
||||||
|
|
||||||
|
# Reboot a slow machine that might have lots of updates to apply
|
||||||
|
- reboot:
|
||||||
|
reboot_timeout: 3600
|
||||||
|
'''
|
||||||
|
|
||||||
|
RETURN = r'''
|
||||||
|
rebooted:
|
||||||
|
description: true if the machine was rebooted
|
||||||
|
returned: always
|
||||||
|
type: boolean
|
||||||
|
sample: true
|
||||||
|
elapsed:
|
||||||
|
description: The number of seconds that elapsed waiting for the system to be rebooted.
|
||||||
|
returned: always
|
||||||
|
type: int
|
||||||
|
sample: 23
|
||||||
|
'''
|
287
lib/ansible/plugins/action/reboot.py
Normal file
287
lib/ansible/plugins/action/reboot.py
Normal file
|
@ -0,0 +1,287 @@
|
||||||
|
# (c) 2016-2018, Matt Davis <mdavis@ansible.com>
|
||||||
|
# (c) 2018, Sam Doran <sdoran@redhat.com>
|
||||||
|
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
||||||
|
|
||||||
|
from __future__ import (absolute_import, division, print_function)
|
||||||
|
__metaclass__ = type
|
||||||
|
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
from ansible.errors import AnsibleError
|
||||||
|
from ansible.plugins.action import ActionBase
|
||||||
|
from ansible.module_utils._text import to_native, to_text
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from __main__ import display
|
||||||
|
except ImportError:
|
||||||
|
from ansible.utils.display import Display
|
||||||
|
display = Display()
|
||||||
|
|
||||||
|
|
||||||
|
class TimedOutException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ActionModule(ActionBase):
|
||||||
|
TRANSFERS_FILES = False
|
||||||
|
|
||||||
|
DEFAULT_REBOOT_TIMEOUT = 600
|
||||||
|
DEFAULT_CONNECT_TIMEOUT = None
|
||||||
|
DEFAULT_PRE_REBOOT_DELAY = 0
|
||||||
|
DEFAULT_POST_REBOOT_DELAY = 0
|
||||||
|
DEFAULT_TEST_COMMAND = 'whoami'
|
||||||
|
DEFAULT_BOOT_TIME_COMMAND = 'who -b'
|
||||||
|
DEFAULT_REBOOT_MESSAGE = 'Reboot initiated by Ansible'
|
||||||
|
DEFAULT_SHUTDOWN_COMMAND = 'shutdown'
|
||||||
|
DEFAULT_SUDOABLE = True
|
||||||
|
|
||||||
|
DEPRECATED_ARGS = {}
|
||||||
|
|
||||||
|
SHUTDOWN_COMMANDS = {
|
||||||
|
'linux': DEFAULT_SHUTDOWN_COMMAND,
|
||||||
|
'freebsd': DEFAULT_SHUTDOWN_COMMAND,
|
||||||
|
'sunos': '/usr/sbin/shutdown',
|
||||||
|
'darwin': '/sbin/shutdown',
|
||||||
|
}
|
||||||
|
|
||||||
|
SHUTDOWN_COMMAND_ARGS = {
|
||||||
|
'linux': '-r {delay_min} "{message}"',
|
||||||
|
'freebsd': '-r +{delay_sec}s "{message}"',
|
||||||
|
'sunos': '-y -g {delay_sec} -r "{message}"',
|
||||||
|
'darwin': '-r +{delay_min_macos} "{message}"'
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(ActionModule, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
self._original_connection_timeout = None
|
||||||
|
self._previous_boot_time = None
|
||||||
|
|
||||||
|
def deprecated_args(self):
|
||||||
|
for arg, version in self.DEPRECATED_ARGS.items():
|
||||||
|
if self._task.args.get(arg) is not None:
|
||||||
|
display.warning("Since Ansible %s, %s is no longer a valid option for %s" % (version, arg, self._task.action))
|
||||||
|
|
||||||
|
def construct_command(self):
|
||||||
|
# Determine the system distribution in order to use the correct shutdown command arguments
|
||||||
|
uname_result = self._low_level_execute_command('uname')
|
||||||
|
distribution = uname_result['stdout'].strip().lower()
|
||||||
|
|
||||||
|
shutdown_command = self.SHUTDOWN_COMMANDS.get(distribution, self.SHUTDOWN_COMMAND_ARGS['linux'])
|
||||||
|
shutdown_command_args = self.SHUTDOWN_COMMAND_ARGS.get(distribution, self.SHUTDOWN_COMMAND_ARGS['linux'])
|
||||||
|
|
||||||
|
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self.DEFAULT_PRE_REBOOT_DELAY))
|
||||||
|
if pre_reboot_delay < 0:
|
||||||
|
pre_reboot_delay = 0
|
||||||
|
|
||||||
|
# Convert seconds to minutes for Linux. If less that 60, set it to 0 except for macOS which will
|
||||||
|
# sever the connection too quickly if set to 0, so set that to 1.
|
||||||
|
# We could simplify this by setting them both to 1, but I think of all the time that
|
||||||
|
# people will lose waiting for that extra 1 minute delay and want to give them their
|
||||||
|
# lives back.
|
||||||
|
delay_min = pre_reboot_delay // 60
|
||||||
|
delay_min_macos = delay_min | 1
|
||||||
|
msg = self._task.args.get('msg', self.DEFAULT_REBOOT_MESSAGE)
|
||||||
|
|
||||||
|
shutdown_command_args = shutdown_command_args.format(delay_sec=pre_reboot_delay, delay_min=delay_min, delay_min_macos=delay_min_macos, message=msg)
|
||||||
|
|
||||||
|
reboot_command = '%s %s' % (shutdown_command, shutdown_command_args)
|
||||||
|
return reboot_command
|
||||||
|
|
||||||
|
def get_system_boot_time(self):
|
||||||
|
command_result = self._low_level_execute_command(self.DEFAULT_BOOT_TIME_COMMAND, sudoable=self.DEFAULT_SUDOABLE)
|
||||||
|
|
||||||
|
# For single board computers, e.g., Raspberry Pi, that lack a real time clock and are using fake-hwclock
|
||||||
|
# launched by systemd, the update of utmp/wtmp is not done correctly.
|
||||||
|
# Fall back to using uptime -s for those systems.
|
||||||
|
# https://github.com/systemd/systemd/issues/6057
|
||||||
|
if '1970-01-01 00:00' in command_result['stdout']:
|
||||||
|
command_result = self._low_level_execute_command('uptime -s', sudoable=self.DEFAULT_SUDOABLE)
|
||||||
|
|
||||||
|
if command_result['rc'] != 0:
|
||||||
|
raise AnsibleError("%s: failed to get host boot time info, rc: %d, stdout: %s, stderr: %s"
|
||||||
|
% (self._task.action, command_result.rc, to_native(command_result['stdout']), to_native(command_result['stderr'])))
|
||||||
|
|
||||||
|
return command_result['stdout'].strip()
|
||||||
|
|
||||||
|
def check_boot_time(self):
|
||||||
|
display.vvv("%s: attempting to get system boot time" % self._task.action)
|
||||||
|
connect_timeout = self._task.args.get('connect_timeout', self._task.args.get('connect_timeout_sec', self.DEFAULT_CONNECT_TIMEOUT))
|
||||||
|
|
||||||
|
# override connection timeout from defaults to custom value
|
||||||
|
if connect_timeout:
|
||||||
|
try:
|
||||||
|
self._connection.set_option("connection_timeout", connect_timeout)
|
||||||
|
self._connection.reset()
|
||||||
|
except AttributeError:
|
||||||
|
display.warning("Connection plugin does not allow the connection timeout to be overridden")
|
||||||
|
|
||||||
|
# try and get boot time
|
||||||
|
try:
|
||||||
|
current_boot_time = self.get_system_boot_time()
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
# FreeBSD returns an empty string immediately before reboot so adding a length
|
||||||
|
# check to prevent prematurely assuming system has rebooted
|
||||||
|
if len(current_boot_time) == 0 or current_boot_time == self._previous_boot_time:
|
||||||
|
raise Exception("boot time has not changed")
|
||||||
|
|
||||||
|
def run_test_command(self, **kwargs):
|
||||||
|
test_command = self._task.args.get('test_command', self.DEFAULT_TEST_COMMAND)
|
||||||
|
display.vvv("%s: attempting post-reboot test command '%s'" % (self._task.action, test_command))
|
||||||
|
command_result = self._low_level_execute_command(test_command, sudoable=self.DEFAULT_SUDOABLE)
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
if command_result['rc'] != 0:
|
||||||
|
result['failed'] = True
|
||||||
|
result['msg'] = 'test command failed: %s %s' % (to_native(command_result['stderr'], to_native(command_result['stdout'])))
|
||||||
|
else:
|
||||||
|
result['msg'] = to_native(command_result['stdout'])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def do_until_success_or_timeout(self, action, reboot_timeout, action_desc):
|
||||||
|
max_end_time = datetime.utcnow() + timedelta(seconds=reboot_timeout)
|
||||||
|
|
||||||
|
fail_count = 0
|
||||||
|
max_fail_sleep = 12
|
||||||
|
|
||||||
|
while datetime.utcnow() < max_end_time:
|
||||||
|
try:
|
||||||
|
action()
|
||||||
|
if action_desc:
|
||||||
|
display.debug('%s: %s success' % (self._task.action, action_desc))
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
# Use exponential backoff with a max timout, plus a little bit of randomness
|
||||||
|
random_int = random.randint(0, 1000) / 1000
|
||||||
|
fail_sleep = 2 ** fail_count + random_int
|
||||||
|
if fail_sleep > max_fail_sleep:
|
||||||
|
|
||||||
|
fail_sleep = max_fail_sleep + random_int
|
||||||
|
if action_desc:
|
||||||
|
display.debug("{0}: {1} fail '{2}', retrying in {3:.4} seconds...".format(self._task.action, action_desc, to_text(e), fail_sleep))
|
||||||
|
fail_count += 1
|
||||||
|
time.sleep(fail_sleep)
|
||||||
|
|
||||||
|
raise TimedOutException('Timed out waiting for %s' % (action_desc))
|
||||||
|
|
||||||
|
def perform_reboot(self):
|
||||||
|
display.debug("%s: rebooting server" % self._task.action)
|
||||||
|
|
||||||
|
remote_command = self.construct_command()
|
||||||
|
reboot_result = self._low_level_execute_command(remote_command, sudoable=self.DEFAULT_SUDOABLE)
|
||||||
|
result = {}
|
||||||
|
result['start'] = datetime.utcnow()
|
||||||
|
|
||||||
|
if reboot_result['rc'] != 0:
|
||||||
|
result['failed'] = True
|
||||||
|
result['rebooted'] = False
|
||||||
|
result['msg'] = "Shutdown command failed. Error was %s, %s" % (
|
||||||
|
to_native(reboot_result['stdout'].strip()), to_native(reboot_result['stderr'].strip()))
|
||||||
|
return result
|
||||||
|
|
||||||
|
result['failed'] = False
|
||||||
|
|
||||||
|
# attempt to store the original connection_timeout option var so it can be reset after
|
||||||
|
self._original_connection_timeout = None
|
||||||
|
try:
|
||||||
|
self._original_connection_timeout = self._connection.get_option('connection_timeout')
|
||||||
|
except AnsibleError:
|
||||||
|
display.debug("%s: connect_timeout connection option has not been set" % self._task.action)
|
||||||
|
|
||||||
|
post_reboot_delay = int(self._task.args.get('post_reboot_delay', self._task.args.get('post_reboot_delay_sec', self.DEFAULT_POST_REBOOT_DELAY)))
|
||||||
|
if post_reboot_delay < 0:
|
||||||
|
post_reboot_delay = 0
|
||||||
|
|
||||||
|
if post_reboot_delay != 0:
|
||||||
|
display.vvv("%s: waiting an additional %d seconds" % (self._task.action, post_reboot_delay))
|
||||||
|
time.sleep(post_reboot_delay)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def validate_reboot(self):
|
||||||
|
display.debug('%s: Validating reboot' % self._task.action)
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# keep on checking system boot_time with short connection responses
|
||||||
|
reboot_timeout = int(self._task.args.get('reboot_timeout', self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT)))
|
||||||
|
connect_timeout = self._task.args.get('connect_timeout', self._task.args.get('connect_timeout_sec', self.DEFAULT_CONNECT_TIMEOUT))
|
||||||
|
self.do_until_success_or_timeout(self.check_boot_time, reboot_timeout, action_desc="boot_time check")
|
||||||
|
|
||||||
|
if connect_timeout:
|
||||||
|
# reset the connection to clear the custom connection timeout
|
||||||
|
try:
|
||||||
|
self._connection.set_option("connection_timeout", connect_timeout)
|
||||||
|
self._connection.reset()
|
||||||
|
except (AnsibleError, AttributeError) as e:
|
||||||
|
display.debug("Failed to reset connection_timeout back to default: %s" % to_text(e))
|
||||||
|
|
||||||
|
# finally run test command to ensure everything is working
|
||||||
|
# FUTURE: add a stability check (system must remain up for N seconds) to deal with self-multi-reboot updates
|
||||||
|
self.do_until_success_or_timeout(self.run_test_command, reboot_timeout, action_desc="post-reboot test command")
|
||||||
|
|
||||||
|
result['rebooted'] = True
|
||||||
|
result['changed'] = True
|
||||||
|
|
||||||
|
except TimedOutException as toex:
|
||||||
|
result['failed'] = True
|
||||||
|
result['rebooted'] = True
|
||||||
|
result['msg'] = to_text(toex)
|
||||||
|
return result
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def run(self, tmp=None, task_vars=None):
|
||||||
|
self._supports_check_mode = True
|
||||||
|
self._supports_async = True
|
||||||
|
|
||||||
|
# If running with local connection, fail so we don't reboot ourself
|
||||||
|
if self._connection.transport == 'local':
|
||||||
|
msg = 'Running {0} with local connection would reboot the control node.'.format(self._task.action)
|
||||||
|
return dict(changed=False, elapsed=0, rebooted=False, failed=True, msg=msg)
|
||||||
|
|
||||||
|
if self._play_context.check_mode:
|
||||||
|
return dict(changed=True, elapsed=0, rebooted=True)
|
||||||
|
|
||||||
|
if task_vars is None:
|
||||||
|
task_vars = dict()
|
||||||
|
|
||||||
|
self.deprecated_args()
|
||||||
|
|
||||||
|
result = super(ActionModule, self).run(tmp, task_vars)
|
||||||
|
|
||||||
|
if result.get('skipped', False) or result.get('failed', False):
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Get current boot time
|
||||||
|
try:
|
||||||
|
self._previous_boot_time = self.get_system_boot_time()
|
||||||
|
except Exception as e:
|
||||||
|
result['failed'] = True
|
||||||
|
result['reboot'] = False
|
||||||
|
result['msg'] = to_text(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Initiate reboot
|
||||||
|
reboot_result = self.perform_reboot()
|
||||||
|
|
||||||
|
if reboot_result['failed']:
|
||||||
|
result = reboot_result
|
||||||
|
elapsed = datetime.utcnow() - reboot_result['start']
|
||||||
|
result['elapsed'] = elapsed.seconds
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Make sure reboot was successful
|
||||||
|
result = self.validate_reboot()
|
||||||
|
|
||||||
|
elapsed = datetime.utcnow() - reboot_result['start']
|
||||||
|
result['elapsed'] = elapsed.seconds
|
||||||
|
|
||||||
|
return result
|
|
@ -1,16 +1,15 @@
|
||||||
# (c) 2016, Matt Davis <mdavis@ansible.com>
|
# (c) 2018, Matt Davis <mdavis@ansible.com>
|
||||||
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
||||||
|
|
||||||
from __future__ import (absolute_import, division, print_function)
|
from __future__ import (absolute_import, division, print_function)
|
||||||
__metaclass__ = type
|
__metaclass__ = type
|
||||||
|
|
||||||
import time
|
from datetime import datetime
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from ansible.errors import AnsibleError
|
from ansible.errors import AnsibleError
|
||||||
from ansible.plugins.action import ActionBase
|
from ansible.plugins.action import ActionBase
|
||||||
from ansible.module_utils._text import to_native
|
from ansible.module_utils._text import to_native
|
||||||
|
from ansible.plugins.action.reboot import ActionModule as RebootActionModule
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from __main__ import display
|
from __main__ import display
|
||||||
|
@ -23,201 +22,66 @@ class TimedOutException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ActionModule(ActionBase):
|
class ActionModule(RebootActionModule, ActionBase):
|
||||||
TRANSFERS_FILES = False
|
TRANSFERS_FILES = False
|
||||||
|
|
||||||
DEFAULT_REBOOT_TIMEOUT = 600
|
|
||||||
DEFAULT_CONNECT_TIMEOUT = 5
|
DEFAULT_CONNECT_TIMEOUT = 5
|
||||||
DEFAULT_PRE_REBOOT_DELAY = 2
|
DEFAULT_PRE_REBOOT_DELAY = 2
|
||||||
DEFAULT_POST_REBOOT_DELAY = 0
|
DEFAULT_BOOT_TIME_COMMAND = "(Get-WmiObject -ClassName Win32_OperatingSystem).LastBootUpTime"
|
||||||
DEFAULT_TEST_COMMAND = 'whoami'
|
DEFAULT_SHUTDOWN_COMMAND_ARGS = '/r /t %d /c "%s"'
|
||||||
DEFAULT_REBOOT_MESSAGE = 'Reboot initiated by Ansible.'
|
DEFAULT_SUDOABLE = False
|
||||||
|
|
||||||
def get_system_uptime(self):
|
DEPRECATED_ARGS = {
|
||||||
uptime_command = "(Get-WmiObject -ClassName Win32_OperatingSystem).LastBootUpTime"
|
'shutdown_timeout': '2.5',
|
||||||
(rc, stdout, stderr) = self._connection.exec_command(uptime_command)
|
'shutdown_timeout_sec': '2.5',
|
||||||
|
}
|
||||||
|
|
||||||
if rc != 0:
|
def construct_command(self):
|
||||||
raise Exception("win_reboot: failed to get host uptime info, rc: %d, stdout: %s, stderr: %s"
|
shutdown_command = self.DEFAULT_SHUTDOWN_COMMAND
|
||||||
% (rc, stdout, stderr))
|
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self._task.args.get('pre_reboot_delay_sec', self.DEFAULT_PRE_REBOOT_DELAY)))
|
||||||
|
msg = self._task.args.get('msg', self.DEFAULT_REBOOT_MESSAGE)
|
||||||
|
shutdown_command_args = self.DEFAULT_SHUTDOWN_COMMAND_ARGS % (pre_reboot_delay, msg)
|
||||||
|
|
||||||
return stdout
|
reboot_command = '%s %s' % (shutdown_command, shutdown_command_args)
|
||||||
|
return reboot_command
|
||||||
|
|
||||||
def do_until_success_or_timeout(self, what, timeout, what_desc, fail_sleep=1):
|
def perform_reboot(self):
|
||||||
max_end_time = datetime.utcnow() + timedelta(seconds=timeout)
|
display.debug("Rebooting server")
|
||||||
|
|
||||||
exc = ""
|
remote_command = self.construct_command()
|
||||||
while datetime.utcnow() < max_end_time:
|
reboot_result = self._low_level_execute_command(remote_command, sudoable=self.DEFAULT_SUDOABLE)
|
||||||
try:
|
|
||||||
what()
|
|
||||||
if what_desc:
|
|
||||||
display.debug("win_reboot: %s success" % what_desc)
|
|
||||||
return
|
|
||||||
except Exception as e:
|
|
||||||
exc = e
|
|
||||||
if what_desc:
|
|
||||||
display.debug("win_reboot: %s fail '%s' (expected), retrying in %d seconds..." % (what_desc, to_native(e), fail_sleep))
|
|
||||||
time.sleep(fail_sleep)
|
|
||||||
|
|
||||||
raise TimedOutException("timed out waiting for %s: %s" % (what_desc, exc))
|
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self._task.args.get('pre_reboot_delay_sec', self.DEFAULT_PRE_REBOOT_DELAY)))
|
||||||
|
|
||||||
def run(self, tmp=None, task_vars=None):
|
|
||||||
|
|
||||||
self._supports_check_mode = True
|
|
||||||
self._supports_async = True
|
|
||||||
|
|
||||||
if self._play_context.check_mode:
|
|
||||||
return dict(changed=True, elapsed=0, rebooted=True)
|
|
||||||
|
|
||||||
if task_vars is None:
|
|
||||||
task_vars = dict()
|
|
||||||
|
|
||||||
result = super(ActionModule, self).run(tmp, task_vars)
|
|
||||||
del tmp # tmp no longer has any effect
|
|
||||||
|
|
||||||
if result.get('skipped', False) or result.get('failed', False):
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Handle timeout parameters and its alias
|
|
||||||
deprecated_args = {
|
|
||||||
'shutdown_timeout': '2.5',
|
|
||||||
'shutdown_timeout_sec': '2.5',
|
|
||||||
}
|
|
||||||
for arg, version in deprecated_args.items():
|
|
||||||
if self._task.args.get(arg) is not None:
|
|
||||||
display.warning("Since Ansible %s, %s is no longer used with win_reboot" % (version, arg))
|
|
||||||
|
|
||||||
if self._task.args.get('connect_timeout') is not None:
|
|
||||||
connect_timeout = int(self._task.args.get('connect_timeout', self.DEFAULT_CONNECT_TIMEOUT))
|
|
||||||
else:
|
|
||||||
connect_timeout = int(self._task.args.get('connect_timeout_sec', self.DEFAULT_CONNECT_TIMEOUT))
|
|
||||||
|
|
||||||
if self._task.args.get('reboot_timeout') is not None:
|
|
||||||
reboot_timeout = int(self._task.args.get('reboot_timeout', self.DEFAULT_REBOOT_TIMEOUT))
|
|
||||||
else:
|
|
||||||
reboot_timeout = int(self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT))
|
|
||||||
|
|
||||||
if self._task.args.get('pre_reboot_delay') is not None:
|
|
||||||
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self.DEFAULT_PRE_REBOOT_DELAY))
|
|
||||||
else:
|
|
||||||
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay_sec', self.DEFAULT_PRE_REBOOT_DELAY))
|
|
||||||
|
|
||||||
if self._task.args.get('post_reboot_delay') is not None:
|
|
||||||
post_reboot_delay = int(self._task.args.get('post_reboot_delay', self.DEFAULT_POST_REBOOT_DELAY))
|
|
||||||
else:
|
|
||||||
post_reboot_delay = int(self._task.args.get('post_reboot_delay_sec', self.DEFAULT_POST_REBOOT_DELAY))
|
|
||||||
|
|
||||||
test_command = str(self._task.args.get('test_command', self.DEFAULT_TEST_COMMAND))
|
|
||||||
msg = str(self._task.args.get('msg', self.DEFAULT_REBOOT_MESSAGE))
|
|
||||||
|
|
||||||
# Get current uptime
|
|
||||||
try:
|
|
||||||
before_uptime = self.get_system_uptime()
|
|
||||||
except Exception as e:
|
|
||||||
result['failed'] = True
|
|
||||||
result['reboot'] = False
|
|
||||||
result['msg'] = to_native(e)
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Initiate reboot
|
|
||||||
display.vvv("rebooting server")
|
|
||||||
(rc, stdout, stderr) = self._connection.exec_command('shutdown /r /t %d /c "%s"' % (pre_reboot_delay, msg))
|
|
||||||
|
|
||||||
# Test for "A system shutdown has already been scheduled. (1190)" and handle it gracefully
|
# Test for "A system shutdown has already been scheduled. (1190)" and handle it gracefully
|
||||||
if rc == 1190 or (rc != 0 and b"(1190)" in stderr):
|
if reboot_result['rc'] == 1190 or (reboot_result['rc'] != 0 and b"(1190)" in reboot_result['stderr']):
|
||||||
display.warning('A scheduled reboot was pre-empted by Ansible.')
|
display.warning('A scheduled reboot was pre-empted by Ansible.')
|
||||||
|
|
||||||
# Try to abort (this may fail if it was already aborted)
|
# Try to abort (this may fail if it was already aborted)
|
||||||
(rc, stdout1, stderr1) = self._connection.exec_command('shutdown /a')
|
result1 = self._low_level_execute_command('shutdown /a', sudoable=self.DEFAULT_SUDOABLE)
|
||||||
|
# (reboot_result['rc'], stdout1, stderr1) = self._connection.exec_command('shutdown /a')
|
||||||
|
|
||||||
# Initiate reboot again
|
# Initiate reboot again
|
||||||
(rc, stdout2, stderr2) = self._connection.exec_command('shutdown /r /t %d' % pre_reboot_delay)
|
result2 = self._connection.exec_command('shutdown /r /t %d' % pre_reboot_delay)
|
||||||
stdout += stdout1 + stdout2
|
# (reboot_result['rc'], stdout2, stderr2) = self._connection.exec_command('shutdown /r /t %d' % pre_reboot_delay)
|
||||||
stderr += stderr1 + stderr2
|
|
||||||
|
|
||||||
if rc != 0:
|
stdout = reboot_result['stdout'] + result1['stdout'] + result2['stdout']
|
||||||
|
stderr = reboot_result['stderr'] + result1['stderr'] + result2['stderr']
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
if reboot_result['rc'] != 0:
|
||||||
result['failed'] = True
|
result['failed'] = True
|
||||||
result['rebooted'] = False
|
result['rebooted'] = False
|
||||||
result['msg'] = "Shutdown command failed, error text was '%s'" % to_native(stderr)
|
result['msg'] = "Shutdown command failed, error was: %s %s" % (to_native(stdout.strip()), to_native('stderr'.strip()))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
start = datetime.now()
|
result['failed'] = False
|
||||||
|
result['start'] = datetime.utcnow()
|
||||||
|
|
||||||
# Get the original connection_timeout option var so it can be reset after
|
# Get the original connection_timeout option var so it can be reset after
|
||||||
connection_timeout_orig = None
|
|
||||||
try:
|
try:
|
||||||
connection_timeout_orig = self._connection.get_option('connection_timeout')
|
self._original_connection_timeout = self._connection.get_option('connection_timeout')
|
||||||
except AnsibleError:
|
except AnsibleError:
|
||||||
display.debug("win_reboot: connection_timeout connection option has not been set")
|
display.debug("%s: connect_timeout connection option has not been set" % self._task.action)
|
||||||
|
|
||||||
try:
|
|
||||||
# keep on checking system uptime with short connection responses
|
|
||||||
def check_uptime():
|
|
||||||
display.vvv("attempting to get system uptime")
|
|
||||||
|
|
||||||
# override connection timeout from defaults to custom value
|
|
||||||
try:
|
|
||||||
self._connection.set_option("connection_timeout",
|
|
||||||
connect_timeout)
|
|
||||||
self._connection.reset()
|
|
||||||
except AttributeError:
|
|
||||||
display.warning("Connection plugin does not allow the "
|
|
||||||
"connection timeout to be overridden")
|
|
||||||
|
|
||||||
# try and get uptime
|
|
||||||
try:
|
|
||||||
current_uptime = self.get_system_uptime()
|
|
||||||
except Exception as e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
if current_uptime == before_uptime:
|
|
||||||
raise Exception("uptime has not changed")
|
|
||||||
|
|
||||||
self.do_until_success_or_timeout(check_uptime, reboot_timeout, what_desc="reboot uptime check success")
|
|
||||||
|
|
||||||
# reset the connection to clear the custom connection timeout
|
|
||||||
try:
|
|
||||||
self._connection.set_option("connection_timeout",
|
|
||||||
connection_timeout_orig)
|
|
||||||
self._connection.reset()
|
|
||||||
except (AnsibleError, AttributeError) as e:
|
|
||||||
display.debug("Failed to reset connection_timeout back to default: %s" % to_native(e))
|
|
||||||
|
|
||||||
# finally run test command to ensure everything is working
|
|
||||||
def run_test_command():
|
|
||||||
display.vvv("attempting post-reboot test command '%s'" % test_command)
|
|
||||||
try:
|
|
||||||
(rc, stdout, stderr) = self._connection.exec_command(test_command)
|
|
||||||
except Exception as e:
|
|
||||||
# in case of a failure trying to execute the command
|
|
||||||
# (another reboot occurred) we need to reset the connection
|
|
||||||
# to make sure we are not re-using the same shell id
|
|
||||||
try:
|
|
||||||
self._connection.reset()
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
raise
|
|
||||||
else:
|
|
||||||
if rc != 0:
|
|
||||||
raise Exception("test command failed, stdout: '%s', stderr: '%s', rc: %d"
|
|
||||||
% (stdout, stderr, rc))
|
|
||||||
|
|
||||||
# FUTURE: add a stability check (system must remain up for N seconds) to deal with self-multi-reboot updates
|
|
||||||
|
|
||||||
self.do_until_success_or_timeout(run_test_command, reboot_timeout, what_desc="post-reboot test command success")
|
|
||||||
|
|
||||||
result['rebooted'] = True
|
|
||||||
result['changed'] = True
|
|
||||||
|
|
||||||
except TimedOutException as toex:
|
|
||||||
result['failed'] = True
|
|
||||||
result['rebooted'] = True
|
|
||||||
result['msg'] = to_native(toex)
|
|
||||||
|
|
||||||
if post_reboot_delay != 0:
|
|
||||||
display.vvv("win_reboot: waiting an additional %d seconds" % post_reboot_delay)
|
|
||||||
time.sleep(post_reboot_delay)
|
|
||||||
|
|
||||||
elapsed = datetime.now() - start
|
|
||||||
result['elapsed'] = elapsed.seconds
|
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
2
test/integration/targets/reboot/aliases
Normal file
2
test/integration/targets/reboot/aliases
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# No current way to split controller and test node
|
||||||
|
unsupported
|
50
test/integration/targets/reboot/tasks/main.yml
Normal file
50
test/integration/targets/reboot/tasks/main.yml
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
- block:
|
||||||
|
# This block can be removed once we have a mechanism in ansible-test to separate
|
||||||
|
# the control node from the managed node.
|
||||||
|
- block:
|
||||||
|
- name: Write temp file for sanity checking this is not the controller
|
||||||
|
copy:
|
||||||
|
content: 'I am the control node'
|
||||||
|
dest: /tmp/Anything-Nutlike-Nuzzle-Plow-Overdue
|
||||||
|
delegate_to: localhost
|
||||||
|
|
||||||
|
- name: See if the temp file exists on the managed node
|
||||||
|
stat:
|
||||||
|
path: /tmp/Anything-Nutlike-Nuzzle-Plow-Overdue
|
||||||
|
register: controller_temp_file
|
||||||
|
|
||||||
|
- name: EXPECT FAILURE | Check if the managed node is the control node
|
||||||
|
assert:
|
||||||
|
msg: >
|
||||||
|
This test must be run manually by modifying the inventory file to point
|
||||||
|
"{{ inventory_hostname }}" at a remote host rather than "{{ ansible_host }}".
|
||||||
|
Skipping reboot test.
|
||||||
|
that:
|
||||||
|
- not controller_temp_file.stat.exists
|
||||||
|
|
||||||
|
- name: Get current boot time
|
||||||
|
command: who -b
|
||||||
|
register: before_boot_time
|
||||||
|
|
||||||
|
- name: Reboot with default settings
|
||||||
|
reboot:
|
||||||
|
register: reboot_result
|
||||||
|
|
||||||
|
- name: Get current boot time
|
||||||
|
command: who -b
|
||||||
|
register: after_boot_time
|
||||||
|
|
||||||
|
- name: Enusure system was actually rebooted
|
||||||
|
assert:
|
||||||
|
that:
|
||||||
|
- reboot_result is changed
|
||||||
|
- reboot_result.elapsed > 10
|
||||||
|
- before_boot_time.stdout != after_boot_time.stdout
|
||||||
|
|
||||||
|
always:
|
||||||
|
- name: Cleanup temp file
|
||||||
|
file:
|
||||||
|
path: /tmp/Anything-Nutlike-Nuzzle-Plow-Overdue
|
||||||
|
state: absent
|
||||||
|
|
||||||
|
when: ansible_virtualization_type | default('') != 'docker'
|
Loading…
Reference in a new issue