summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Callaghan <dcallagh@redhat.com>2014-06-25 13:32:05 +1000
committerGerrit Code Review <gerrit@beaker-project.org>2014-06-26 05:40:57 +0000
commit0e69642aff512b0a8dbec875abd904534ce77042 (patch)
tree5b8428f748e944cbdc63843997504891dc06378a
parent715bbb42ade0043437bb32aa0a2aeedb2bb535e5 (diff)
beaker-provision: enforce timeout for fetching distro tree images
A sane timeout is particularly important for fetching images because when beaker-provision shuts down it waits for all commands to complete normally, which means it will wait for the images to be fetched -- which could take forever if no timeout is enforced and the server is misbehaving. Bug: 1094553 Change-Id: Iaff0313f88b12f1ce708fc0a1b3e85c1fe08dc32
-rw-r--r--IntegrationTests/labcontroller-test.cfg1
-rwxr-xr-xIntegrationTests/src/bkr/inttest/http_server.py17
-rw-r--r--IntegrationTests/src/bkr/inttest/labcontroller/test_provision.py59
-rw-r--r--LabController/src/bkr/labcontroller/default.conf3
-rw-r--r--LabController/src/bkr/labcontroller/netboot.py5
5 files changed, 71 insertions, 14 deletions
diff --git a/IntegrationTests/labcontroller-test.cfg b/IntegrationTests/labcontroller-test.cfg
index e23f85c..ed2edb3 100644
--- a/IntegrationTests/labcontroller-test.cfg
+++ b/IntegrationTests/labcontroller-test.cfg
@@ -20,6 +20,7 @@ TFTP_ROOT = "/tmp/beaker-tests-tftpboot"
URL_DOMAIN = "localhost"
RENEW_SESSION_INTERVAL= 10
SLEEP_TIME = 5
+IMAGE_FETCH_TIMEOUT = 2
LOG_MAXBYTES = 2150
LOG_BACKUPCOUNT=5
diff --git a/IntegrationTests/src/bkr/inttest/http_server.py b/IntegrationTests/src/bkr/inttest/http_server.py
index dd0fe8b..7581630 100755
--- a/IntegrationTests/src/bkr/inttest/http_server.py
+++ b/IntegrationTests/src/bkr/inttest/http_server.py
@@ -13,10 +13,18 @@ It also treats the following paths specially:
/redirect/<status>/<path>
Responds with a <status> redirect to <path>.
+
+/error/<status>[/...]
+ Responds with a <status> error. Extra path information is ignored.
+
+/slow/<delay>[/...]
+ Waits <delay> seconds and then responds with a dummy response body. Extra
+ path information is ignored.
"""
import os, os.path
import re
+import time
import shutil
import urlparse
import wsgiref.util, wsgiref.simple_server
@@ -37,6 +45,15 @@ class Application(object):
start_response('%s Redirected' % m.group(1), [('Location',
wsgiref.util.application_uri(environ) + m.group(2))])
return []
+ m = re.match(r'/error/(\d+)(/?.*)$', path_info)
+ if m:
+ start_response('%s Error' % m.group(1), [])
+ return []
+ m = re.match(r'/slow/(\d+)(/?.*)$', path_info)
+ if m:
+ time.sleep(int(m.group(1)))
+ start_response('204 No Content', [])
+ return []
localpath = os.path.join(self.basepath, path_info.lstrip('/'))
if os.path.isdir(localpath) and not environ['PATH_INFO'].endswith('/'):
start_response('301 Moved', [('Location',
diff --git a/IntegrationTests/src/bkr/inttest/labcontroller/test_provision.py b/IntegrationTests/src/bkr/inttest/labcontroller/test_provision.py
index 5447352..c1950a9 100644
--- a/IntegrationTests/src/bkr/inttest/labcontroller/test_provision.py
+++ b/IntegrationTests/src/bkr/inttest/labcontroller/test_provision.py
@@ -4,13 +4,15 @@
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
+import sys
import time
import logging
+import pkg_resources
from turbogears.database import session
from nose.plugins.skip import SkipTest
from bkr.server.model import LabController, PowerType, CommandStatus
from bkr.labcontroller.config import get_conf
-from bkr.inttest import data_setup
+from bkr.inttest import data_setup, Process
from bkr.inttest.assertions import wait_for_condition
from bkr.inttest.labcontroller import LabControllerTestCase, processes, \
daemons_running_externally
@@ -18,12 +20,13 @@ from bkr.server.model import System, User
log = logging.getLogger(__name__)
-def wait_for_commands_completed(system, timeout):
- def _commands_completed():
+def wait_for_commands_to_finish(system, timeout):
+ def _commands_finished():
with session.begin():
session.expire_all()
- return system.command_queue[0].status == CommandStatus.completed
- wait_for_condition(_commands_completed, timeout=timeout)
+ return system.command_queue[0].status in \
+ (CommandStatus.completed, CommandStatus.failed)
+ wait_for_condition(_commands_finished, timeout=timeout)
def assert_command_is_delayed(command, min_delay, timeout):
"""
@@ -98,7 +101,7 @@ class PowerTest(LabControllerTestCase):
system.power.power_id = u'' # make power script not sleep
system.power.delay_until = None
system.action_power(action=u'off', service=u'testdata')
- wait_for_commands_completed(system, timeout=10)
+ wait_for_commands_to_finish(system, timeout=10)
finally:
provision_output = provision_process.finish_output_capture()
self.assertIn('Entering quiescent period, delaying 1 seconds for '
@@ -110,7 +113,7 @@ class PowerTest(LabControllerTestCase):
system = System.by_id(system.id, User.by_user_name('admin'))
system.power.power_quiescent_period = 10
system.action_power(action=u'on', service=u'testdata')
- wait_for_commands_completed(system, timeout=15)
+ wait_for_commands_to_finish(system, timeout=15)
finally:
provision_output = provision_process.finish_output_capture()
self.assertIn('Entering quiescent period', provision_output)
@@ -131,7 +134,7 @@ class PowerTest(LabControllerTestCase):
system.power.power_id = u'' # make power script not sleep
system.power.delay_until = None
system.action_power(action=u'off', service=u'testdata')
- wait_for_commands_completed(system, timeout=10)
+ wait_for_commands_to_finish(system, timeout=10)
finally:
provision_output = provision_process.finish_output_capture()
self.assertIn('Entering quiescent period, delaying 1 seconds for '
@@ -143,7 +146,7 @@ class PowerTest(LabControllerTestCase):
with session.begin():
system = System.by_id(system.id, User.by_user_name('admin'))
system.action_power(action=u'off', service=u'testdata')
- wait_for_commands_completed(system, timeout=10)
+ wait_for_commands_to_finish(system, timeout=10)
finally:
provision_output = provision_process.finish_output_capture()
self.assertNotIn('Entering queiscent period', provision_output)
@@ -166,7 +169,7 @@ class PowerTest(LabControllerTestCase):
system.action_power(action=u'off', service=u'testdata')
system.action_power(action=u'off', service=u'testdata')
system.action_power(action=u'off', service=u'testdata')
- wait_for_commands_completed(system, timeout=5 * power_sleep)
+ wait_for_commands_to_finish(system, timeout=5 * power_sleep)
with session.begin():
session.expire_all()
self.assertEquals(system.command_queue[0].status, CommandStatus.completed)
@@ -191,7 +194,7 @@ class PowerTest(LabControllerTestCase):
system.power.power_id = u'' # make power script not sleep
system.power.power_passwd = None
system.action_power(action=u'off', service=u'testdata')
- wait_for_commands_completed(system, timeout=2 * get_conf().get('SLEEP_TIME'))
+ wait_for_commands_to_finish(system, timeout=2 * get_conf().get('SLEEP_TIME'))
finally:
provision_output = provision_process.finish_output_capture()
# The None type is passed in from the db. Later in the code it is converted
@@ -211,9 +214,41 @@ class PowerTest(LabControllerTestCase):
system.power.power_id = u'' # make power script not sleep
system.power.power_passwd = u'dontleakmebro'
system.action_power(action=u'off', service=u'testdata')
- wait_for_commands_completed(system, timeout=2 * get_conf().get('SLEEP_TIME'))
+ wait_for_commands_to_finish(system, timeout=2 * get_conf().get('SLEEP_TIME'))
finally:
provision_output = provision_process.finish_output_capture()
self.assert_('Handling command' in provision_output, provision_output)
self.assert_('Launching power script' in provision_output, provision_output)
self.assert_(system.power.power_passwd not in provision_output, provision_output)
+
+class ConfigureNetbootTest(LabControllerTestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ cls.distro_server = Process('http_server.py', args=[sys.executable,
+ pkg_resources.resource_filename('bkr.inttest', 'http_server.py'),
+ '--base', '/notexist'],
+ listen_port=19998)
+ cls.distro_server.start()
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.distro_server.stop()
+
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1094553
+ def test_timeout_is_enforced_for_fetching_images(self):
+ with session.begin():
+ lc = self.get_lc()
+ system = data_setup.create_system(arch=u'x86_64', lab_controller=lc)
+ distro_tree = data_setup.create_distro_tree(arch=u'x86_64',
+ lab_controllers=[lc],
+ # /slow/600 means the response will be delayed 10 minutes
+ urls=['http://localhost:19998/slow/600/'])
+ system.configure_netboot(distro_tree=distro_tree,
+ kernel_options=u'', service=u'testdata')
+ wait_for_commands_to_finish(system, timeout=(2 * get_conf().get('SLEEP_TIME')
+ + get_conf().get('IMAGE_FETCH_TIMEOUT')))
+ self.assertEquals(system.command_queue[0].action, u'configure_netboot')
+ self.assertEquals(system.command_queue[0].status, CommandStatus.failed)
+ self.assertEquals(system.command_queue[0].new_value,
+ u'URLError: <urlopen error timed out>')
diff --git a/LabController/src/bkr/labcontroller/default.conf b/LabController/src/bkr/labcontroller/default.conf
index c5a60df..abe4f0a 100644
--- a/LabController/src/bkr/labcontroller/default.conf
+++ b/LabController/src/bkr/labcontroller/default.conf
@@ -4,6 +4,9 @@ PROXY_PID_FILE = "/var/run/beaker-lab-controller/beaker-proxy.pid"
# How long to sleep between polls.
SLEEP_TIME = 20
+# Timeout for fetching distro images.
+IMAGE_FETCH_TIMEOUT = 120
+
# How often to renew our session on the server
RENEW_SESSION_INTERVAL = 300
diff --git a/LabController/src/bkr/labcontroller/netboot.py b/LabController/src/bkr/labcontroller/netboot.py
index e28aa87..76c743b 100644
--- a/LabController/src/bkr/labcontroller/netboot.py
+++ b/LabController/src/bkr/labcontroller/netboot.py
@@ -110,12 +110,13 @@ def fetch_images(distro_tree_id, kernel_url, initrd_url, fqdn):
raise
# No luck there, so try something else...
+ timeout = get_conf().get('IMAGE_FETCH_TIMEOUT')
logger.debug('Fetching kernel %s for %s', kernel_url, fqdn)
with atomically_replaced_file(os.path.join(images_dir, 'kernel')) as dest:
- siphon(urllib2.urlopen(kernel_url), dest)
+ siphon(urllib2.urlopen(kernel_url, timeout=timeout), dest)
logger.debug('Fetching initrd %s for %s', initrd_url, fqdn)
with atomically_replaced_file(os.path.join(images_dir, 'initrd')) as dest:
- siphon(urllib2.urlopen(initrd_url), dest)
+ siphon(urllib2.urlopen(initrd_url, timeout=timeout), dest)
def have_images(fqdn):
return os.path.exists(os.path.join(get_tftp_root(), 'images', fqdn))