Replace master with feature/zuulv3

Change-Id: I99650ec1637f7864829600ec0e8feb11a5350c53
This commit is contained in:
James E. Blair 2018-01-18 10:12:12 -08:00
commit 46706ae06b
130 changed files with 8677 additions and 7752 deletions

1
.gitignore vendored
View File

@ -12,6 +12,5 @@ doc/build/*
zuul/versioninfo
dist/
venv/
nodepool.yaml
*~
.*.swp

View File

@ -2,3 +2,4 @@
host=review.openstack.org
port=29418
project=openstack-infra/nodepool.git

View File

@ -1,26 +1,3 @@
- job:
name: nodepool-functional
parent: legacy-dsvm-base
run: playbooks/nodepool-functional/run.yaml
post-run: playbooks/nodepool-functional/post.yaml
timeout: 5400
required-projects:
- openstack-infra/devstack-gate
- openstack-infra/nodepool
- job:
name: nodepool-functional-src
parent: legacy-dsvm-base
run: playbooks/nodepool-functional-src/run.yaml
post-run: playbooks/nodepool-functional-src/post.yaml
timeout: 5400
required-projects:
- openstack-infra/devstack-gate
- openstack-infra/glean
- openstack-infra/nodepool
- openstack-infra/shade
- openstack/diskimage-builder
- job:
name: nodepool-functional-py35
parent: legacy-dsvm-base
@ -44,16 +21,23 @@
- openstack-infra/shade
- openstack/diskimage-builder
- job:
name: nodepool-zuul-functional
parent: legacy-base
run: playbooks/nodepool-zuul-functional/run.yaml
post-run: playbooks/nodepool-zuul-functional/post.yaml
timeout: 1800
required-projects:
- openstack-infra/nodepool
- openstack-infra/zuul
- project:
name: openstack-infra/nodepool
check:
jobs:
- tox-docs
- tox-cover
- tox-pep8
- tox-py27
- nodepool-functional:
voting: false
- nodepool-functional-src:
voting: false
- tox-py35
- nodepool-functional-py35:
voting: false
- nodepool-functional-py35-src:
@ -61,7 +45,7 @@
gate:
jobs:
- tox-pep8
- tox-py27
post:
- tox-py35
experimental:
jobs:
- publish-openstack-python-branch-tarball
- nodepool-zuul-functional

View File

@ -47,29 +47,6 @@ If the cloud being used has no default_floating_pool defined in nova.conf,
you will need to define a pool name using the nodepool yaml file to use
floating ips.
Set up database for interactive testing:
.. code-block:: bash
mysql -u root
mysql> create database nodepool;
mysql> GRANT ALL ON nodepool.* TO 'nodepool'@'localhost';
mysql> flush privileges;
Set up database for unit tests:
.. code-block:: bash
mysql -u root
mysql> grant all privileges on *.* to 'openstack_citest'@'localhost' identified by 'openstack_citest' with grant option;
mysql> flush privileges;
mysql> create database openstack_citest;
Note that the script tools/test-setup.sh can be used for the step
above.
Export variable for your ssh key so you can log into the created instances:
.. code-block:: bash
@ -83,7 +60,7 @@ to contain your data):
export STATSD_HOST=127.0.0.1
export STATSD_PORT=8125
nodepoold -d -c tools/fake.yaml
nodepool-launcher -d -c tools/fake.yaml
All logging ends up in stdout.
@ -92,9 +69,3 @@ Use the following tool to check on progress:
.. code-block:: bash
nodepool image-list
After each run (the fake nova provider is only in-memory):
.. code-block:: bash
mysql> delete from snapshot_image; delete from node;

View File

@ -1,8 +1,8 @@
# This is a cross-platform list tracking distribution packages needed by tests;
# see http://docs.openstack.org/infra/bindep/ for additional information.
mysql-client [test]
mysql-server [test]
libffi-devel [platform:rpm]
libffi-dev [platform:dpkg]
python-dev [platform:dpkg test]
python-devel [platform:rpm test]
zookeeperd [platform:dpkg test]

View File

@ -3,6 +3,3 @@ kpartx
debootstrap
yum-utils
zookeeperd
zypper
# workarond for https://bugs.launchpad.net/ubuntu/+source/zypper/+bug/1639428
gnupg2

View File

@ -14,8 +14,6 @@
# License for the specific language governing permissions and limitations
# under the License.
NODEPOOL_KEY=$HOME/.ssh/id_nodepool
NODEPOOL_KEY_NAME=root
NODEPOOL_PUBKEY=$HOME/.ssh/id_nodepool.pub
NODEPOOL_INSTALL=$HOME/nodepool-venv
NODEPOOL_CACHE_GET_PIP=/opt/stack/cache/files/get-pip.py
@ -34,7 +32,7 @@ function install_shade {
# BUT - install shade into a virtualenv so that we don't have issues
# with OpenStack constraints affecting the shade dependency install.
# This particularly shows up with os-client-config
$NODEPOOL_INSTALL/bin/pip install -e $DEST/shade
$NODEPOOL_INSTALL/bin/pip install $DEST/shade
fi
}
@ -45,7 +43,7 @@ function install_diskimage_builder {
GITBRANCH["diskimage-builder"]=$DISKIMAGE_BUILDER_REPO_REF
git_clone_by_name "diskimage-builder"
setup_dev_lib "diskimage-builder"
$NODEPOOL_INSTALL/bin/pip install -e $DEST/diskimage-builder
$NODEPOOL_INSTALL/bin/pip install $DEST/diskimage-builder
fi
}
@ -56,38 +54,30 @@ function install_glean {
GITBRANCH["glean"]=$GLEAN_REPO_REF
git_clone_by_name "glean"
setup_dev_lib "glean"
$NODEPOOL_INSTALL/bin/pip install -e $DEST/glean
$NODEPOOL_INSTALL/bin/pip install $DEST/glean
fi
}
# Install nodepool code
function install_nodepool {
virtualenv $NODEPOOL_INSTALL
if python3_enabled; then
VENV="virtualenv -p python${PYTHON3_VERSION}"
else
VENV="virtualenv -p python${PYTHON2_VERSION}"
fi
$VENV $NODEPOOL_INSTALL
install_shade
install_diskimage_builder
install_glean
setup_develop $DEST/nodepool
$NODEPOOL_INSTALL/bin/pip install -e $DEST/nodepool
$NODEPOOL_INSTALL/bin/pip install $DEST/nodepool
}
# requires some globals from devstack, which *might* not be stable api
# points. If things break, investigate changes in those globals first.
function nodepool_create_keypairs {
if [[ ! -f $NODEPOOL_KEY ]]; then
ssh-keygen -f $NODEPOOL_KEY -P ""
fi
cat > /tmp/ssh_wrapper <<EOF
#!/bin/bash -ex
sudo -H -u stack ssh -o StrictHostKeyChecking=no -i $NODEPOOL_KEY root@\$@
EOF
sudo chmod 0755 /tmp/ssh_wrapper
}
function nodepool_write_elements {
sudo mkdir -p $(dirname $NODEPOOL_CONFIG)/elements/nodepool-setup/install.d
sudo mkdir -p $(dirname $NODEPOOL_CONFIG)/elements/nodepool-setup/root.d
@ -118,7 +108,6 @@ EOF
function nodepool_write_config {
sudo mkdir -p $(dirname $NODEPOOL_CONFIG)
sudo mkdir -p $(dirname $NODEPOOL_SECURE)
local dburi=$(database_connection_url nodepool)
cat > /tmp/logging.conf <<EOF
[formatters]
@ -178,12 +167,7 @@ EOF
sudo mv /tmp/logging.conf $NODEPOOL_LOGGING
cat > /tmp/secure.conf << EOF
[database]
# The mysql password here may be different depending on your
# devstack install, you should double check it (the devstack var
# is MYSQL_PASSWORD and if unset devstack should prompt you for
# the value).
dburi: $dburi
# Empty
EOF
sudo mv /tmp/secure.conf $NODEPOOL_SECURE
@ -197,131 +181,129 @@ EOF
if [ -f $NODEPOOL_CACHE_GET_PIP ] ; then
DIB_GET_PIP="DIB_REPOLOCATION_pip_and_virtualenv: file://$NODEPOOL_CACHE_GET_PIP"
fi
if [ -f /etc/ci/mirror_info.sh ] ; then
source /etc/ci/mirror_info.sh
if [ -f /etc/nodepool/provider ] ; then
source /etc/nodepool/provider
NODEPOOL_MIRROR_HOST=${NODEPOOL_MIRROR_HOST:-mirror.$NODEPOOL_REGION.$NODEPOOL_CLOUD.openstack.org}
NODEPOOL_MIRROR_HOST=$(echo $NODEPOOL_MIRROR_HOST|tr '[:upper:]' '[:lower:]')
NODEPOOL_CENTOS_MIRROR=${NODEPOOL_CENTOS_MIRROR:-http://$NODEPOOL_MIRROR_HOST/centos}
NODEPOOL_DEBIAN_MIRROR=${NODEPOOL_DEBIAN_MIRROR:-http://$NODEPOOL_MIRROR_HOST/debian}
NODEPOOL_UBUNTU_MIRROR=${NODEPOOL_UBUNTU_MIRROR:-http://$NODEPOOL_MIRROR_HOST/ubuntu}
DIB_DISTRIBUTION_MIRROR_CENTOS="DIB_DISTRIBUTION_MIRROR: $NODEPOOL_CENTOS_MIRROR"
DIB_DISTRIBUTION_MIRROR_DEBIAN="DIB_DISTRIBUTION_MIRROR: $NODEPOOL_DEBIAN_MIRROR"
DIB_DISTRIBUTION_MIRROR_FEDORA="DIB_DISTRIBUTION_MIRROR: $NODEPOOL_FEDORA_MIRROR"
DIB_DISTRIBUTION_MIRROR_UBUNTU="DIB_DISTRIBUTION_MIRROR: $NODEPOOL_UBUNTU_MIRROR"
DIB_DEBOOTSTRAP_EXTRA_ARGS="DIB_DEBOOTSTRAP_EXTRA_ARGS: '--no-check-gpg'"
fi
NODEPOOL_CENTOS_7_MIN_READY=1
NODEPOOL_DEBIAN_JESSIE_MIN_READY=1
# TODO(pabelanger): Remove fedora-25 after fedora-26 is online
NODEPOOL_FEDORA_25_MIN_READY=1
NODEPOOL_FEDORA_26_MIN_READY=1
NODEPOOL_UBUNTU_TRUSTY_MIN_READY=1
NODEPOOL_UBUNTU_XENIAL_MIN_READY=1
if $NODEPOOL_PAUSE_CENTOS_7_DIB ; then
NODEPOOL_CENTOS_7_MIN_READY=0
fi
if $NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB ; then
NODEPOOL_DEBIAN_JESSIE_MIN_READY=0
fi
if $NODEPOOL_PAUSE_FEDORA_25_DIB ; then
NODEPOOL_FEDORA_25_MIN_READY=0
fi
if $NODEPOOL_PAUSE_FEDORA_26_DIB ; then
NODEPOOL_FEDORA_26_MIN_READY=0
fi
if $NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB ; then
NODEPOOL_UBUNTU_TRUSTY_MIN_READY=0
fi
if $NODEPOOL_PAUSE_UBUNTU_XENIAL_DIB ; then
NODEPOOL_UBUNTU_XENIAL_MIN_READY=0
fi
cat > /tmp/nodepool.yaml <<EOF
# You will need to make and populate this path as necessary,
# cloning nodepool does not do this. Further in this doc we have an
# example element.
elements-dir: $(dirname $NODEPOOL_CONFIG)/elements
images-dir: $NODEPOOL_DIB_BASE_PATH/images
# The mysql password here may be different depending on your
# devstack install, you should double check it (the devstack var
# is MYSQL_PASSWORD and if unset devstack should prompt you for
# the value).
dburi: '$dburi'
zookeeper-servers:
- host: localhost
port: 2181
gearman-servers:
- host: localhost
port: 8991
zmq-publishers: []
# Need to have at least one target for node allocations, but
# this does not need to be a jenkins target.
targets:
- name: dummy
assign-via-gearman: True
cron:
cleanup: '*/1 * * * *'
check: '*/15 * * * *'
labels:
- name: centos-7
image: centos-7
min-ready: 1
providers:
- name: devstack
min-ready: $NODEPOOL_CENTOS_7_MIN_READY
- name: debian-jessie
image: debian-jessie
min-ready: 1
providers:
- name: devstack
min-ready: $NODEPOOL_DEBIAN_JESSIE_MIN_READY
- name: fedora-25
min-ready: $NODEPOOL_FEDORA_25_MIN_READY
- name: fedora-26
image: fedora-26
min-ready: 1
providers:
- name: devstack
- name: opensuse-423
image: opensuse-423
min-ready: 1
providers:
- name: devstack
min-ready: $NODEPOOL_FEDORA_26_MIN_READY
- name: ubuntu-trusty
image: ubuntu-trusty
min-ready: 1
providers:
- name: devstack
min-ready: $NODEPOOL_UBUNTU_TRUSTY_MIN_READY
- name: ubuntu-xenial
image: ubuntu-xenial
min-ready: 1
providers:
- name: devstack
min-ready: $NODEPOOL_UBUNTU_XENIAL_MIN_READY
providers:
- name: devstack
region-name: '$REGION_NAME'
cloud: devstack
api-timeout: 60
# Long boot timeout to deal with potentially nested virt.
boot-timeout: 600
launch-timeout: 900
max-servers: 5
rate: 0.25
images:
diskimages:
- name: centos-7
min-ram: 1024
name-filter: 'nodepool'
username: devuser
private-key: $NODEPOOL_KEY
config-drive: true
key-name: $NODEPOOL_KEY_NAME
- name: debian-jessie
min-ram: 512
name-filter: 'nodepool'
username: devuser
private-key: $NODEPOOL_KEY
config-drive: true
key-name: $NODEPOOL_KEY_NAME
- name: fedora-25
config-drive: true
- name: fedora-26
min-ram: 1024
name-filter: 'nodepool'
username: devuser
private-key: $NODEPOOL_KEY
config-drive: true
key-name: $NODEPOOL_KEY_NAME
- name: opensuse-423
min-ram: 1024
name-filter: 'nodepool'
username: devuser
private-key: $NODEPOOL_KEY
config-drive: true
key-name: $NODEPOOL_KEY_NAME
- name: ubuntu-trusty
min-ram: 512
name-filter: 'nodepool'
username: devuser
private-key: $NODEPOOL_KEY
config-drive: true
key-name: $NODEPOOL_KEY_NAME
- name: ubuntu-xenial
min-ram: 512
name-filter: 'nodepool'
username: devuser
private-key: $NODEPOOL_KEY
config-drive: true
key-name: $NODEPOOL_KEY_NAME
pools:
- name: main
max-servers: 5
labels:
- name: centos-7
diskimage: centos-7
min-ram: 1024
flavor-name: 'nodepool'
console-log: True
- name: debian-jessie
diskimage: debian-jessie
min-ram: 512
flavor-name: 'nodepool'
console-log: True
- name: fedora-25
diskimage: fedora-25
min-ram: 1024
flavor-name: 'nodepool'
console-log: True
- name: fedora-26
diskimage: fedora-26
min-ram: 1024
flavor-name: 'nodepool'
console-log: True
- name: ubuntu-trusty
diskimage: ubuntu-trusty
min-ram: 512
flavor-name: 'nodepool'
console-log: True
- name: ubuntu-xenial
diskimage: ubuntu-xenial
min-ram: 512
flavor-name: 'nodepool'
console-log: True
diskimages:
- name: centos-7
@ -369,6 +351,26 @@ diskimages:
$DIB_GLEAN_INSTALLTYPE
$DIB_GLEAN_REPOLOCATION
$DIB_GLEAN_REPOREF
- name: fedora-25
pause: $NODEPOOL_PAUSE_FEDORA_25_DIB
rebuild-age: 86400
elements:
- fedora-minimal
- vm
- simple-init
- devuser
- openssh-server
- nodepool-setup
release: 25
env-vars:
TMPDIR: $NODEPOOL_DIB_BASE_PATH/tmp
DIB_CHECKSUM: '1'
DIB_IMAGE_CACHE: $NODEPOOL_DIB_BASE_PATH/cache
DIB_DEV_USER_AUTHORIZED_KEYS: $NODEPOOL_PUBKEY
$DIB_GET_PIP
$DIB_GLEAN_INSTALLTYPE
$DIB_GLEAN_REPOLOCATION
$DIB_GLEAN_REPOREF
- name: fedora-26
pause: $NODEPOOL_PAUSE_FEDORA_26_DIB
rebuild-age: 86400
@ -380,27 +382,6 @@ diskimages:
- openssh-server
- nodepool-setup
release: 26
env-vars:
TMPDIR: $NODEPOOL_DIB_BASE_PATH/tmp
DIB_CHECKSUM: '1'
DIB_IMAGE_CACHE: $NODEPOOL_DIB_BASE_PATH/cache
DIB_DEV_USER_AUTHORIZED_KEYS: $NODEPOOL_PUBKEY
$DIB_DISTRIBUTION_MIRROR_FEDORA
$DIB_GET_PIP
$DIB_GLEAN_INSTALLTYPE
$DIB_GLEAN_REPOLOCATION
$DIB_GLEAN_REPOREF
- name: opensuse-423
pause: $NODEPOOL_PAUSE_OPENSUSE_423_DIB
rebuild-age: 86400
elements:
- opensuse-minimal
- vm
- simple-init
- devuser
- openssh-server
- nodepool-setup
release: 42.3
env-vars:
TMPDIR: $NODEPOOL_DIB_BASE_PATH/tmp
DIB_CHECKSUM: '1'
@ -474,27 +455,22 @@ cache:
floating-ip: 5
server: 5
port: 5
# TODO(pabelanger): Remove once glean fully supports IPv6.
client:
force_ipv4: True
EOF
sudo mv /tmp/clouds.yaml /etc/openstack/clouds.yaml
mkdir -p $HOME/.cache/openstack/
}
# Initialize database
# Create configs
# Setup custom flavor
function configure_nodepool {
# build a dedicated keypair for nodepool to use with guests
nodepool_create_keypairs
# write the nodepool config
nodepool_write_config
# write the elements
nodepool_write_elements
# builds a fresh db
recreate_database nodepool
}
function start_nodepool {
@ -513,24 +489,19 @@ function start_nodepool {
openstack --os-project-name demo --os-username demo security group rule create --ingress --protocol tcp --dst-port 1:65535 --remote-ip 0.0.0.0/0 default
openstack --os-project-name demo --os-username demo security group rule create --ingress --protocol udp --dst-port 1:65535 --remote-ip 0.0.0.0/0 default
fi
# create root keypair to use with glean for devstack cloud.
nova --os-project-name demo --os-username demo \
keypair-add --pub-key $NODEPOOL_PUBKEY $NODEPOOL_KEY_NAME
export PATH=$NODEPOOL_INSTALL/bin:$PATH
# start gearman server
run_process geard "$NODEPOOL_INSTALL/bin/geard -p 8991 -d"
# run a fake statsd so we test stats sending paths
export STATSD_HOST=localhost
export STATSD_PORT=8125
run_process statsd "/usr/bin/socat -u udp-recv:$STATSD_PORT -"
run_process nodepool "$NODEPOOL_INSTALL/bin/nodepoold -c $NODEPOOL_CONFIG -s $NODEPOOL_SECURE -l $NODEPOOL_LOGGING -d"
# Ensure our configuration is valid.
$NODEPOOL_INSTALL/bin/nodepool -c $NODEPOOL_CONFIG config-validate
run_process nodepool-launcher "$NODEPOOL_INSTALL/bin/nodepool-launcher -c $NODEPOOL_CONFIG -s $NODEPOOL_SECURE -l $NODEPOOL_LOGGING -d"
run_process nodepool-builder "$NODEPOOL_INSTALL/bin/nodepool-builder -c $NODEPOOL_CONFIG -l $NODEPOOL_LOGGING -d"
:
}
@ -545,7 +516,7 @@ function cleanup_nodepool {
}
# check for service enabled
if is_service_enabled nodepool; then
if is_service_enabled nodepool-launcher; then
if [[ "$1" == "stack" && "$2" == "install" ]]; then
# Perform installation of service source

View File

@ -8,8 +8,9 @@ NODEPOOL_DIB_BASE_PATH=/opt/dib
# change the defaults.
NODEPOOL_PAUSE_CENTOS_7_DIB=${NODEPOOL_PAUSE_CENTOS_7_DIB:-true}
NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB=${NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB:-true}
NODEPOOL_PAUSE_FEDORA_25_DIB=${NODEPOOL_PAUSE_FEDORA_25_DIB:-true}
NODEPOOL_PAUSE_FEDORA_26_DIB=${NODEPOOL_PAUSE_FEDORA_26_DIB:-true}
NODEPOOL_PAUSE_OPENSUSE_423_DIB=${NODEPOOL_PAUSE_OPENSUSE_423_DIB:-true}
NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB=${NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB:-true}
NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB=${NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB:-false}
NODEPOOL_PAUSE_UBUNTU_XENIAL_DIB=${NODEPOOL_PAUSE_UBUNTU_XENIAL_DIB:-true}
@ -24,5 +25,5 @@ GLEAN_REPO_REF=${GLEAN_REPO_REF:-master}
enable_service geard
enable_service statsd
enable_service nodepool
enable_service nodepool-launcher
enable_service nodepool-builder

View File

@ -3,62 +3,11 @@
Configuration
=============
Nodepool reads its secure configuration from ``/etc/nodepool/secure.conf``
by default. The secure file is a standard ini config file, with
one section for database, and another section for the jenkins
secrets for each target::
[database]
dburi={dburi}
[jenkins "{target_name}"]
user={user}
apikey={apikey}
credentials={credentials}
url={url}
Following settings are available::
**required**
``dburi``
Indicates the URI for the database connection. See the `SQLAlchemy
documentation
<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>`_
for the syntax. Example::
dburi='mysql+pymysql://nodepool@localhost/nodepool'
**optional**
While it is possible to run Nodepool without any Jenkins targets,
if Jenkins is used, the `target_name` and `url` are required. The
`user`, `apikey` and `credentials` also may be needed depending on
the Jenkins security settings.
``target_name``
Name of the jenkins target. It needs to match with a target
specified in nodepool.yaml, in order to retrieve its settings.
``url``
Url to the Jenkins REST API.
``user``
Jenkins username.
``apikey``
API key generated by Jenkins (not the user password).
``credentials``
If provided, Nodepool will configure the Jenkins slave to use the Jenkins
credential identified by that ID, otherwise it will use the username and
ssh keys configured in the image.
Nodepool reads its configuration from ``/etc/nodepool/nodepool.yaml``
by default. The configuration file follows the standard YAML syntax
with a number of sections defined with top level keys. For example, a
full configuration file may have the ``diskimages``, ``labels``,
``providers``, and ``targets`` sections::
and ``providers`` sections::
diskimages:
...
@ -66,12 +15,29 @@ full configuration file may have the ``diskimages``, ``labels``,
...
providers:
...
targets:
...
.. note:: The builder daemon creates a UUID to uniquely identify itself and
to mark image builds in ZooKeeper that it owns. This file will be
named ``builder_id.txt`` and will live in the directory named by the
:ref:`images-dir` option. If this file does not exist, it will be
created on builder startup and a UUID will be created automatically.
The following sections are available. All are required unless
otherwise indicated.
.. _webapp-conf:
webapp
------
Define the webapp endpoint port and listen address.
Example::
webapp:
port: 8005
listen_address: '0.0.0.0'
.. _elements-dir:
elements-dir
@ -86,6 +52,8 @@ Example::
elements-dir: /path/to/elements/dir
.. _images-dir:
images-dir
----------
@ -97,44 +65,6 @@ Example::
images-dir: /path/to/images/dir
cron
----
This section is optional.
Nodepool runs several periodic tasks. The ``cleanup`` task deletes
old images and servers which may have encountered errors during their
initial deletion. The ``check`` task attempts to log into each node
that is waiting to be used to make sure that it is still operational.
The following illustrates how to change the schedule for these tasks
and also indicates their default values::
cron:
cleanup: '27 */6 * * *'
check: '*/15 * * * *'
zmq-publishers
--------------
Lists the ZeroMQ endpoints for the Jenkins masters. Nodepool uses
this to receive real-time notification that jobs are running on nodes
or are complete and nodes may be deleted. Example::
zmq-publishers:
- tcp://jenkins1.example.com:8888
- tcp://jenkins2.example.com:8888
gearman-servers
---------------
Lists the Zuul Gearman servers that should be consulted for real-time
demand. Nodepool will use information from these servers to determine
if additional nodes should be created to satisfy current demand.
Example::
gearman-servers:
- host: zuul.example.com
port: 4730
The ``port`` key is optional (default: 4730).
zookeeper-servers
-----------------
Lists the ZooKeeper servers uses for coordinating information between
@ -155,83 +85,54 @@ the supplied root path, is also optional and has no default.
labels
------
Defines the types of nodes that should be created. Maps node types to
the images that are used to back them and the providers that are used
to supply them. Jobs should be written to run on nodes of a certain
label (so targets such as Jenkins don't need to know about what
providers or images are used to create them). Example::
Defines the types of nodes that should be created. Jobs should be
written to run on nodes of a certain label. Example::
labels:
- name: my-precise
image: precise
max-ready-age: 3600
min-ready: 2
providers:
- name: provider1
- name: provider2
- name: multi-precise
image: precise
subnodes: 2
min-ready: 2
ready-script: setup_multinode.sh
providers:
- name: provider1
**required**
``name``
Unique name used to tie jobs to those instances.
``image``
Refers to providers images, see :ref:`images`.
``providers`` (list)
Required if any nodes should actually be created (e.g., the label is not
currently disabled, see ``min-ready`` below).
**optional**
``max-ready-age`` (int)
Maximum number of seconds the node shall be in ready state. If
this is exceeded the node will be deleted. A value of 0 disables this.
Defaults to 0.
``min-ready`` (default: 2)
Minimum instances that should be in a ready state. Set to -1 to have the
label considered disabled. ``min-ready`` is best-effort based on available
capacity and is not a guaranteed allocation.
``subnodes``
Used to configure multi-node support. If a `subnodes` key is supplied to
an image, it indicates that the specified number of additional nodes of the
same image type should be created and associated with each node for that
image.
Only one node from each such group will be added to the target, the
subnodes are expected to communicate directly with each other. In the
example above, for each Precise node added to the target system, two
additional nodes will be created and associated with it.
``ready-script``
A script to be used to perform any last minute changes to a node after it
has been launched but before it is put in the READY state to receive jobs.
For more information, see :ref:`scripts`.
.. _diskimages:
diskimages
----------
This section lists the images to be built using diskimage-builder. The
name of the diskimage is mapped to the :ref:`images` section of the
provider, to determine which providers should received uploads of each
name of the diskimage is mapped to the :ref:`provider_diskimages` section
of the provider, to determine which providers should received uploads of each
image. The diskimage will be built in every format required by the
providers with which it is associated. Because Nodepool needs to know
which formats to build, if the diskimage will only be built if it
appears in at least one provider.
To remove a diskimage from the system entirely, remove all associated
entries in :ref:`images` and remove its entry from `diskimages`. All
uploads will be deleted as well as the files on disk.
entries in :ref:`provider_diskimages` and remove its entry from `diskimages`.
All uploads will be deleted as well as the files on disk.
Example configuration::
diskimages:
- name: precise
- name: ubuntu-precise
pause: False
rebuild-age: 86400
elements:
@ -245,6 +146,7 @@ Example configuration::
- growroot
- infra-package-needs
release: precise
username: zuul
env-vars:
TMPDIR: /opt/dib_tmp
DIB_CHECKSUM: '1'
@ -252,7 +154,7 @@ Example configuration::
DIB_APT_LOCAL_CACHE: '0'
DIB_DISABLE_APT_CLEANUP: '1'
FS_TYPE: ext3
- name: xenial
- name: ubuntu-xenial
pause: True
rebuild-age: 86400
formats:
@ -269,6 +171,7 @@ Example configuration::
- growroot
- infra-package-needs
release: precise
username: ubuntu
env-vars:
TMPDIR: /opt/dib_tmp
DIB_CHECKSUM: '1'
@ -281,7 +184,8 @@ Example configuration::
**required**
``name``
Identifier to reference the disk image in :ref:`images` and :ref:`labels`.
Identifier to reference the disk image in :ref:`provider_diskimages`
and :ref:`labels`.
**optional**
@ -312,124 +216,124 @@ Example configuration::
``pause`` (bool)
When set to True, nodepool-builder will not build the diskimage.
``username`` (string)
The username that a consumer should use when connecting onto the node. Defaults
to ``zuul``.
.. _provider:
provider
providers
---------
Lists the OpenStack cloud providers Nodepool should use. Within each
provider, the Nodepool image types are also defined (see
:ref:`images` for details). Example::
providers:
- name: provider1
cloud: example
region-name: 'region1'
max-servers: 96
rate: 1.0
availability-zones:
- az1
boot-timeout: 120
launch-timeout: 900
template-hostname: 'template-{image.name}-{timestamp}'
ipv6-preferred: False
networks:
- name: 'some-network-name'
images:
- name: trusty
min-ram: 8192
name-filter: 'something to match'
username: jenkins
user-home: '/home/jenkins'
private-key: /var/lib/jenkins/.ssh/id_rsa
meta:
key: value
key2: value
- name: precise
min-ram: 8192
username: jenkins
user-home: '/home/jenkins'
private-key: /var/lib/jenkins/.ssh/id_rsa
- name: devstack-trusty
min-ram: 30720
username: jenkins
private-key: /home/nodepool/.ssh/id_rsa
- name: provider2
username: 'username'
password: 'password'
auth-url: 'http://auth.provider2.example.com/'
project-name: 'project'
service-type: 'compute'
service-name: 'compute'
region-name: 'region1'
max-servers: 96
rate: 1.0
template-hostname: '{image.name}-{timestamp}-nodepool-template'
images:
- name: precise
min-ram: 8192
username: jenkins
user-home: '/home/jenkins'
private-key: /var/lib/jenkins/.ssh/id_rsa
meta:
key: value
key2: value
**cloud configuration***
**preferred**
``cloud``
There are two methods supported for configuring cloud entries. The preferred
method is to create an ``~/.config/openstack/clouds.yaml`` file containing
your cloud configuration information. Then, use ``cloud`` to refer to a
named entry in that file.
More information about the contents of `clouds.yaml` can be found in
`the os-client-config documentation <http://docs.openstack.org/developer/os-client-config/>`_.
**compatablity**
For backwards compatibility reasons, you can also include
portions of the cloud configuration directly in ``nodepool.yaml``. Not all
of the options settable via ``clouds.yaml`` are available.
``username``
``password``
``project-id`` OR ``project-name``
Some clouds may refer to the ``project-id`` as ``tenant-id``.
Some clouds may refer to the ``project-name`` as ``tenant-name``.
``auth-url``
Keystone URL.
``image-type``
Specifies the image type supported by this provider. The disk images built
by diskimage-builder will output an image for each ``image-type`` specified
by a provider using that particular diskimage.
By default, ``image-type`` is set to the value returned from
``os-client-config`` and can be omitted in most cases.
Lists the providers Nodepool should use. Each provider is associated to
a driver listed below.
**required**
``name``
``max-servers``
Maximum number of servers spawnable on this provider.
**optional**
``availability-zones`` (list)
Without it nodepool will rely on nova to schedule an availability zone.
``driver``
Default to *openstack*
If it is provided the value should be a list of availability zone names.
Nodepool will select one at random and provide that to nova. This should
give a good distribution of availability zones being used. If you need more
control of the distribution you can use multiple logical providers each
providing a different list of availabiltiy zones.
``max-concurrency``
Maximum number of node requests that this provider is allowed to handle
concurrently. The default, if not specified, is to have no maximum. Since
each node request is handled by a separate thread, this can be useful for
limiting the number of threads used by the nodepool-launcher daemon.
OpenStack driver
^^^^^^^^^^^^^^^^
Within each OpenStack provider the available Nodepool image types are defined
(see :ref:`provider_diskimages`).
An OpenStack provider's resources are partitioned into groups called "pools"
(see :ref:`pools` for details), and within a pool, the node types which are
to be made available are listed (see :ref:`pool_labels` for
details).
Example::
providers:
- name: provider1
driver: openstack
cloud: example
region-name: 'region1'
rate: 1.0
boot-timeout: 120
launch-timeout: 900
launch-retries: 3
image-name-format: '{image_name}-{timestamp}'
hostname-format: '{label.name}-{provider.name}-{node.id}'
diskimages:
- name: trusty
meta:
key: value
key2: value
- name: precise
- name: devstack-trusty
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- some-network-name
labels:
- name: trusty
min-ram: 8192
diskimage: trusty
console-log: True
- name: precise
min-ram: 8192
diskimage: precise
- name: devstack-trusty
min-ram: 8192
diskimage: devstack-trusty
- name: provider2
driver: openstack
cloud: example2
region-name: 'region1'
rate: 1.0
image-name-format: '{image_name}-{timestamp}'
hostname-format: '{label.name}-{provider.name}-{node.id}'
diskimages:
- name: precise
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
labels:
- name: trusty
min-ram: 8192
diskimage: trusty
- name: precise
min-ram: 8192
diskimage: precise
- name: devstack-trusty
min-ram: 8192
diskimage: devstack-trusty
**required**
``cloud``
Name of a cloud configured in ``clouds.yaml``.
The instances spawned by nodepool will inherit the default security group
of the project specified in the cloud definition in `clouds.yaml`. This means
that when working with Zuul, for example, SSH traffic (TCP/22) must be allowed
in the project's default security group for Zuul to be able to reach instances.
More information about the contents of `clouds.yaml` can be found in
`the os-client-config documentation <http://docs.openstack.org/developer/os-client-config/>`_.
**optional**
``boot-timeout``
Once an instance is active, how long to try connecting to the
@ -454,31 +358,22 @@ provider, the Nodepool image types are also defined (see
Default None
``networks`` (dict)
Specify custom Neutron networks that get attached to each
node. Specify the ``name`` of the network (a string).
``launch-retries``
``ipv6-preferred``
If it is set to True, nodepool will try to find ipv6 in public net first
as the ip address for ssh connection to build snapshot images and create
jenkins slave definition. If ipv6 is not found or the key is not
specified or set to False, ipv4 address will be used.
The number of times to retry launching a server before considering the job
failed.
``api-timeout`` (compatability)
Timeout for the OpenStack API calls client in seconds. Prefer setting
this in `clouds.yaml`
``service-type`` (compatability)
Prefer setting this in `clouds.yaml`.
``service-name`` (compatability)
Prefer setting this in `clouds.yaml`.
Default 3.
``region-name``
``template-hostname``
``hostname-format``
Hostname template to use for the spawned instance.
Default ``template-{image.name}-{timestamp}``
Default ``{label.name}-{provider.name}-{node.id}``
``image-name-format``
Format for image names that are uploaded to providers.
Default ``{image_name}-{timestamp}``
``rate``
In seconds, amount to wait between operations on the provider.
@ -489,12 +384,88 @@ provider, the Nodepool image types are also defined (see
OpenStack project and will attempt to clean unattached floating ips that
may have leaked around restarts.
.. _images:
.. _pools:
images
~~~~~~
pools
~~~~~
Each entry in a provider's `images` section must correspond to an
A pool defines a group of resources from an OpenStack provider. Each pool has a
maximum number of nodes which can be launched from it, along with a
number of cloud-related attributes used when launching nodes.
Example::
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- some-network-name
auto-floating-ip: False
labels:
- name: trusty
min-ram: 8192
diskimage: trusty
console-log: True
- name: precise
min-ram: 8192
diskimage: precise
- name: devstack-trusty
min-ram: 8192
diskimage: devstack-trusty
**required**
``name``
**optional**
``max-cores``
Maximum number of cores usable from this pool. This can be used to limit
usage of the tenant. If not defined nodepool can use all cores up to the
quota of the tenant.
``max-servers``
Maximum number of servers spawnable from this pool. This can be used to
limit the number of servers. If not defined nodepool can create as many
servers the tenant allows.
``max-ram``
Maximum ram usable from this pool. This can be used to limit the amount of
ram allocated by nodepool. If not defined nodepool can use as much ram as
the tenant allows.
``availability-zones`` (list)
A list of availability zones to use.
If this setting is omitted, nodepool will fetch the list of all
availability zones from nova. To restrict nodepool to a subset
of availability zones, supply a list of availability zone names
in this setting.
Nodepool chooses an availability zone from the list at random
when creating nodes but ensures that all nodes for a given
request are placed in the same availability zone.
``networks`` (list)
Specify custom Neutron networks that get attached to each
node. Specify the name or id of the network as a string.
``auto-floating-ip`` (bool)
Specify custom behavior of allocating floating ip for each node.
When set to False, nodepool-launcher will not apply floating ip
for nodes. When zuul instances and nodes are deployed in the same
internal private network, set the option to False to save floating ip
for cloud provider. The default value is True.
.. _provider_diskimages:
diskimages
~~~~~~~~~~
Each entry in a provider's `diskimages` section must correspond to an
entry in :ref:`diskimages`. Such an entry indicates that the
corresponding diskimage should be uploaded for use in this provider.
Additionally, any nodes that are created using the uploaded image will
@ -505,16 +476,14 @@ images will be deleted from the provider.
Example configuration::
images:
diskimages:
- name: precise
pause: False
min-ram: 8192
name-filter: 'something to match'
username: jenkins
private-key: /var/lib/jenkins/.ssh/id_rsa
meta:
key: value
key2: value
- name: windows
connection-type: winrm
**required**
@ -522,86 +491,143 @@ Example configuration::
Identifier to refer this image from :ref:`labels` and :ref:`diskimages`
sections.
``min-ram``
Determine the flavor to use (e.g. ``m1.medium``, ``m1.large``,
etc). The smallest flavor that meets the ``min-ram`` requirements
will be chosen. To further filter by flavor name, see optional
``name-filter`` below.
**optional**
``name-filter``
Additional filter complementing ``min-ram``, will be required to match on
the flavor-name (e.g. Rackspace offer a "Performance" flavour; setting
`name-filter` to ``Performance`` will ensure the chosen flavor also
contains this string as well as meeting `min-ram` requirements).
``pause`` (bool)
When set to True, nodepool-builder will not upload the image to the
provider.
``username``
Nodepool expects that user to exist after running the script indicated by
``setup``. Default ``jenkins``
``key-name``
If provided, named keypair in nova that will be provided to server create.
``private-key``
Default ``/var/lib/jenkins/.ssh/id_rsa``
``config-drive`` (boolean)
Whether config drive should be used for the image. Default ``True``
Whether config drive should be used for the image. Defaults to unset which
will use the cloud's default behavior.
``meta`` (dict)
Arbitrary key/value metadata to store for this server using the Nova
metadata service. A maximum of five entries is allowed, and both keys and
values must be 255 characters or less.
.. _targets:
``connection-type`` (string)
The connection type that a consumer should use when connecting onto the
node. For most diskimages this is not necessary. However when creating
Windows images this could be 'winrm' to enable access via ansible.
targets
-------
Lists the Jenkins masters to which Nodepool should attach nodes after
they are created. Nodes of each label will be evenly distributed
across all of the targets which are on-line::
.. _provider_cloud_images:
targets:
- name: jenkins1
hostname: '{label.name}-{provider.name}-{node_id}'
subnode-hostname: '{label.name}-{provider.name}-{node_id}-{subnode_id}'
- name: jenkins2
hostname: '{label.name}-{provider.name}-{node_id}'
subnode-hostname: '{label.name}-{provider.name}-{node_id}-{subnode_id}'
cloud-images
~~~~~~~~~~~~
Each cloud-image entry in :ref:`labels` refers to an entry in this section.
This is a way for modifying launch parameters of the nodes (currently only
config-drive).
Example configuration::
cloud-images:
- name: trusty-external
config-drive: False
- name: windows-external
connection-type: winrm
**required**
``name``
Identifier for the system an instance is attached to.
Identifier to refer this cloud-image from :ref:`labels` section.
Since this name appears elsewhere in the nodepool configuration
file, you may want to use your own descriptive name here and use
one of ``image-id`` or ``image-name`` to specify the cloud image
so that if the image name or id changes on the cloud, the impact
to your Nodepool configuration will be minimal. However, if
neither of those attributes are provided, this is also assumed to
be the image name or ID in the cloud.
**optional**
``hostname``
Default ``{label.name}-{provider.name}-{node_id}``
``config-drive`` (boolean)
Whether config drive should be used for the cloud image. Defaults to
unset which will use the cloud's default behavior.
``subnode-hostname``
Default ``{label.name}-{provider.name}-{node_id}-{subnode_id}``
``image-id`` (str)
If this is provided, it is used to select the image from the cloud
provider by ID, rather than name. Mutually exclusive with ``image-name``.
``rate``
In seconds. Default 1.0
``image-name`` (str)
If this is provided, it is used to select the image from the cloud
provider by this name or ID. Mutually exclusive with ``image-id``.
``jenkins`` (dict)
``username`` (str)
The username that a consumer should use when connecting onto the node.
``test-job`` (optional)
Setting this would cause a newly created instance to be in a TEST state.
The job name given will then be executed with the node name as a
parameter.
``connection-type`` (str)
The connection type that a consumer should use when connecting onto the
node. For most diskimages this is not necessary. However when creating
Windows images this could be 'winrm' to enable access via ansible.
If the job succeeds, move the node into READY state and relabel it with
the appropriate label (from the image name).
.. _pool_labels:
If it fails, immediately delete the node.
labels
~~~~~~
If the job never runs, the node will eventually be cleaned up by the
periodic cleanup task.
Each entry in a pool`s `labels` section indicates that the
corresponding label is available for use in this pool. When creating
nodes for a label, the flavor-related attributes in that label's
section will be used.
Example configuration::
labels:
- name: precise
min-ram: 8192
flavor-name: 'something to match'
console-log: True
**required**
``name``
Identifier to refer this image from :ref:`labels` and :ref:`diskimages`
sections.
**one of**
``diskimage``
Refers to provider's diskimages, see :ref:`provider_diskimages`.
``cloud-image``
Refers to the name of an externally managed image in the cloud that already
exists on the provider. The value of ``cloud-image`` should match the
``name`` of a previously configured entry from the ``cloud-images`` section
of the provider. See :ref:`provider_cloud_images`.
**at least one of**
``flavor-name``
Name or id of the flavor to use. If ``min-ram`` is omitted, it
must be an exact match. If ``min-ram`` is given, ``flavor-name`` will
be used to find flavor names that meet ``min-ram`` and also contain
``flavor-name``.
``min-ram``
Determine the flavor to use (e.g. ``m1.medium``, ``m1.large``,
etc). The smallest flavor that meets the ``min-ram`` requirements
will be chosen.
**optional**
``boot-from-volume`` (bool)
If given, the label for use in this pool will create a volume from the
image and boot the node from it.
Default: False
``key-name``
If given, is the name of a keypair that will be used when booting each
server.
``console-log`` (default: False)
On the failure of the ssh ready check, download the server console log to
aid in debuging the problem.
``volume-size``
When booting an image from volume, how big should the created volume be.
In gigabytes. Default 50.

View File

@ -4,7 +4,7 @@ Nodepool
Nodepool is a system for launching single-use test nodes on demand
based on images built with cached data. It is designed to work with
any OpenStack based cloud, and is part of a suite of tools that form a
comprehensive test system including Jenkins and Zuul.
comprehensive test system, including Zuul.
Contents:
@ -13,7 +13,6 @@ Contents:
installation
configuration
scripts
operation
devguide
@ -21,5 +20,6 @@ Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -3,51 +3,12 @@
Installation
============
Nodepool consists of a set of long-running daemons which use an SQL
database, a ZooKeeper cluster, and communicates with Jenkins using
ZeroMQ.
Nodepool consists of a long-running daemon which uses ZooKeeper
for coordination with Zuul.
External Requirements
---------------------
Jenkins
~~~~~~~
You should have a Jenkins server running with the `ZMQ Event Publisher
<http://git.openstack.org/cgit/openstack-infra/zmq-event-publisher/tree/README>`_
plugin installed (it is available in the Jenkins Update Center). Be
sure that the machine where you plan to run Nodepool can connect to
the ZMQ port specified by the plugin on your Jenkins master(s).
Zuul
~~~~
If you plan to use Nodepool with Zuul (it is optional), you should
ensure that Nodepool can connect to the gearman port on your Zuul
server (TCP 4730 by default). This will allow Nodepool to respond to
current Zuul demand. If you elect not to connect Nodepool to Zuul, it
will still operate in a node-replacement mode.
Database
~~~~~~~~
Nodepool requires an SQL server. MySQL with the InnoDB storage engine
is tested and recommended. PostgreSQL should work fine. Due to the
high number of concurrent connections from Nodepool, SQLite is not
recommended. When adding or deleting nodes, Nodepool will hold open a
database connection for each node. Be sure to configure the database
server to support at least a number of connections equal to twice the
number of nodes you expect to be in use at once.
All that is necessary is that the database is created. Nodepool will
handle the schema by itself when it is run.
MySQL Example::
CREATE USER 'nodepool'@'localhost' IDENTIFIED BY '<password>';
CREATE DATABASE nodepooldb;
GRANT ALL ON nodepooldb.* TO 'nodepool'@'localhost';
ZooKeeper
~~~~~~~~~
@ -88,22 +49,28 @@ Or install directly from a git checkout with::
pip install .
Note that some distributions provide a libzmq1 which does not support
RCVTIMEO. Removing this libzmq1 from the system libraries will ensure
pip compiles a libzmq1 with appropriate options for the version of
pyzmq used by nodepool.
Configuration
-------------
Nodepool has two required configuration files: secure.conf and
nodepool.yaml, and an optional logging configuration file logging.conf.
The secure.conf file is used to store nodepool configurations that contain
sensitive data, such as the Nodepool database password and Jenkins
api key. The nodepool.yaml files is used to store all other
configurations.
The logging configuration file is in the standard python logging
`configuration file format
<http://docs.python.org/2/library/logging.config.html#configuration-file-format>`_.
Nodepool has one required configuration file, which defaults to
``/etc/nodepool/nodepool.yaml``. This can be changed with the ``-c`` option.
The Nodepool configuration file is described in :ref:`configuration`.
There is support for a secure file that is used to store nodepool
configurations that contain sensitive data. It currently only supports
specifying ZooKeeper credentials. If ZooKeeper credentials are defined in
both configuration files, the data in the secure file takes precedence.
The secure file location can be changed with the ``-s`` option and follows
the same file format as the Nodepool configuration file.
There is an optional logging configuration file, specified with the ``-l``
option. The logging configuration file can accept either:
* the traditional ini python logging `configuration file format
<https://docs.python.org/2/library/logging.config.html#configuration-file-format>`_.
* a `.yml` or `.yaml` suffixed file that will be parsed and loaded as the newer
`dictConfig format
<https://docs.python.org/2/library/logging.config.html#configuration-dictionary-schema>`_.
The Nodepool configuration file is described in :ref:`configuration`.

View File

@ -5,13 +5,17 @@ Operation
Nodepool has two components which run as daemons. The
``nodepool-builder`` daemon is responsible for building diskimages and
uploading them to providers, and the ``nodepoold`` daemon is
uploading them to providers, and the ``nodepool-launcher`` daemon is
responsible for launching and deleting nodes.
Both daemons frequently re-read their configuration file after
starting to support adding or removing new images and providers, or
otherwise altering the configuration.
These daemons communicate with each other via a Zookeeper database.
You must run Zookeeper and at least one of each of these daemons to
have a functioning Nodepool installation.
Nodepool-builder
----------------
@ -31,14 +35,14 @@ safe, it is recommended to run a single instance of
only a single build thread (the default).
Nodepoold
---------
Nodepool-launcher
-----------------
The main nodepool daemon is named ``nodepoold`` and is responsible for
launching instances from the images created and uploaded by
``nodepool-builder``.
The main nodepool daemon is named ``nodepool-launcher`` and is
responsible for managing cloud instances launched from the images
created and uploaded by ``nodepool-builder``.
When a new image is created and uploaded, ``nodepoold`` will
When a new image is created and uploaded, ``nodepool-launcher`` will
immediately start using it when launching nodes (Nodepool always uses
the most recent image for a given provider in the ``ready`` state).
Nodepool will delete images if they are not the most recent or second
@ -51,9 +55,9 @@ using the previous image.
Daemon usage
------------
To start the main Nodepool daemon, run **nodepoold**:
To start the main Nodepool daemon, run **nodepool-launcher**:
.. program-output:: nodepoold --help
.. program-output:: nodepool-launcher --help
:nostderr:
To start the nodepool-builder daemon, run **nodepool--builder**:
@ -77,21 +81,73 @@ When Nodepool creates instances, it will assign the following nova
metadata:
groups
A json-encoded list containing the name of the image and the name
A comma separated list containing the name of the image and the name
of the provider. This may be used by the Ansible OpenStack
inventory plugin.
nodepool
A json-encoded dictionary with the following entries:
nodepool_image_name
The name of the image as a string.
image_name
The name of the image as a string.
nodepool_provider_name
The name of the provider as a string.
provider_name
The name of the provider as a string.
nodepool_node_id
The nodepool id of the node as an integer.
node_id
The nodepool id of the node as an integer.
Common Management Tasks
-----------------------
In the course of running a Nodepool service you will find that there are
some common operations that will be performed. Like the services
themselves these are split into two groups, image management and
instance management.
Image Management
~~~~~~~~~~~~~~~~
Before Nodepool can launch any cloud instances it must have images to boot
off of. ``nodepool dib-image-list`` will show you which images are available
locally on disk. These images on disk are then uploaded to clouds,
``nodepool image-list`` will show you what images are bootable in your
various clouds.
If you need to force a new image to be built to pick up a new feature more
quickly than the normal rebuild cycle (which defaults to 24 hours) you can
manually trigger a rebuild. Using ``nodepool image-build`` you can tell
Nodepool to begin a new image build now. Note that depending on work that
the nodepool-builder is already performing this may queue the build. Check
``nodepool dib-image-list`` to see the current state of the builds. Once
the image is built it is automatically uploaded to all of the clouds
configured to use that image.
At times you may need to stop using an existing image because it is broken.
Your two major options here are to build a new image to replace the existing
image or to delete the existing image and have Nodepool fall back on using
the previous image. Rebuilding and uploading can be slow so typically the
best option is to simply ``nodepool image-delete`` the most recent image
which will cause Nodepool to fallback on using the previous image. Howevever,
if you do this without "pausing" the image it will be immediately reuploaded.
You will want to pause the image if you need to further investigate why
the image is not being built correctly. If you know the image will be built
correctly you can simple delete the built image and remove it from all clouds
which will cause it to be rebuilt using ``nodepool dib-image-delete``.
Instance Management
~~~~~~~~~~~~~~~~~~~
With working images in providers you should see Nodepool launching instances
in these providers using the images it built. You may find that you need to
debug a particular job failure manually. An easy way to do this is to
``nodepool hold`` an instance then log in to the instance and perform any
necessary debugging steps. Note that this doesn't stop the job running there,
what it will do is prevent Nodepool from automatically deleting this instance
once the job is complete.
In some circumstances like manually holding an instance above, or wanting to
force a job restart you may want to delete a running instance. You can issue
a ``nodepool delete`` to force nodepool to do this.
Complete command help info is below.
Command Line Tools
------------------
@ -151,38 +207,11 @@ If Nodepool's database gets out of sync with reality, the following
commands can help identify compute instances or images that are
unknown to Nodepool:
alien-list
^^^^^^^^^^
.. program-output:: nodepool alien-list --help
:nostderr:
alien-image-list
^^^^^^^^^^^^^^^^
.. program-output:: nodepool alien-image-list --help
:nostderr:
In the case that a job is randomly failing for an unknown cause, it
may be necessary to instruct nodepool to automatically hold a node on
which that job has failed. To do so, use the ``job-create``
command to specify the job name and how many failed nodes should be
held. When debugging is complete, use ''job-delete'' to disable the
feature.
job-create
^^^^^^^^^^
.. program-output:: nodepool job-create --help
:nostderr:
job-list
^^^^^^^^
.. program-output:: nodepool job-list --help
:nostderr:
job-delete
^^^^^^^^^^
.. program-output:: nodepool job-delete --help
:nostderr:
Removing a Provider
-------------------

View File

@ -1,45 +0,0 @@
.. _scripts:
Node Ready Scripts
==================
Each label can specify a ready script with `ready-script`. This script can be
used to perform any last minute changes to a node after it has been launched
but before it is put in the READY state to receive jobs. In particular, it
can read the files in /etc/nodepool to perform multi-node related setup.
Those files include:
**/etc/nodepool/role**
Either the string ``primary`` or ``sub`` indicating whether this
node is the primary (the node added to the target and which will run
the job), or a sub-node.
**/etc/nodepool/node**
The IP address of this node.
**/etc/nodepool/node_private**
The private IP address of this node.
**/etc/nodepool/primary_node**
The IP address of the primary node, usable for external access.
**/etc/nodepool/primary_node_private**
The Private IP address of the primary node, for internal communication.
**/etc/nodepool/sub_nodes**
The IP addresses of the sub nodes, one on each line,
usable for external access.
**/etc/nodepool/sub_nodes_private**
The Private IP addresses of the sub nodes, one on each line.
**/etc/nodepool/id_rsa**
An OpenSSH private key generated specifically for this node group.
**/etc/nodepool/id_rsa.pub**
The corresponding public key.
**/etc/nodepool/provider**
Information about the provider in a shell-usable form. This
includes the following information:
**NODEPOOL_PROVIDER**
The name of the provider
**NODEPOOL_CLOUD**
The name of the cloud
**NODEPOOL_REGION**
The name of the region
**NODEPOOL_AZ**
The name of the availability zone (if available)

View File

@ -1,418 +0,0 @@
#!/usr/bin/env python
# Copyright (C) 2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module holds classes that represent concepts in nodepool's
allocation algorithm.
The algorithm is:
Setup:
* Establish the node providers with their current available
capacity.
* Establish requests that are to be made of each provider for a
certain label.
* Indicate which providers can supply nodes of that label.
* Indicate to which targets nodes of a certain label from a certain
provider may be distributed (and the weight that should be
given to each target when distributing).
Run:
* For each label, set the requested number of nodes from each
provider to be proportional to that providers overall capacity.
* Define the 'priority' of a request as the number of requests for
the same label from other providers.
* For each provider, sort the requests by the priority. This puts
requests that can be serviced by the fewest providers first.
* Grant each such request in proportion to that requests portion of
the total amount requested by requests of the same priority.
* The nodes allocated by a grant are then distributed to the targets
which are associated with the provider and label, in proportion to
that target's portion of the sum of the weights of each target for
that label.
"""
import functools
# History allocation tracking
# The goal of the history allocation tracking is to ensure forward
# progress by not starving any particular label when in over-quota
# situations. For example, if you have two labels, say 'fedora' and
# 'ubuntu', and 'ubuntu' is requesting many more nodes than 'fedora',
# it is quite possible that 'fedora' never gets any allocations. If
# 'fedora' is required for a gate-check job, older changes may wait
# in Zuul's pipelines longer than expected while jobs for newer
# changes continue to receive 'ubuntu' nodes and overall merge
# throughput decreases during such contention.
#
# We track the history of allocations by label. A persistent
# AllocationHistory object should be kept and passed along with each
# AllocationRequest, which records its initial request in the history
# via recordRequest().
#
# When a sub-allocation gets a grant, it records this via a call to
# AllocationHistory.recordGrant(). All the sub-allocations
# contribute to tracking the total grants for the parent
# AllocationRequest.
#
# When finished requesting grants from all providers,
# AllocationHistory.grantsDone() should be called to store the
# allocation state in the history.
#
# This history is used AllocationProvider.makeGrants() to prioritize
# requests that have not been granted in prior iterations.
# AllocationHistory.getWaitTime will return how many iterations
# each label has been waiting for an allocation.
class AllocationHistory(object):
'''A history of allocation requests and grants'''
def __init__(self, history=100):
# current allocations for this iteration
# keeps elements of type
# label -> (request, granted)
self.current_allocations = {}
self.history = history
# list of up to <history> previous current_allocation
# dictionaries
self.past_allocations = []
def recordRequest(self, label, amount):
try:
a = self.current_allocations[label]
a['requested'] += amount
except KeyError:
self.current_allocations[label] = dict(requested=amount,
allocated=0)
def recordGrant(self, label, amount):
try:
a = self.current_allocations[label]
a['allocated'] += amount
except KeyError:
# granted but not requested? shouldn't happen
raise
def grantsDone(self):
# save this round of allocations/grants up to our history
self.past_allocations.insert(0, self.current_allocations)
self.past_allocations = self.past_allocations[:self.history]
self.current_allocations = {}
def getWaitTime(self, label):
# go through the history of allocations and calculate how many
# previous iterations this label has received none of its
# requested allocations.
wait = 0
# We don't look at the current_alloctions here; only
# historical. With multiple providers, possibly the first
# provider has given nodes to the waiting label (which would
# be recorded in current_allocations), and a second provider
# should fall back to using the usual ratio-based mechanism?
for i, a in enumerate(self.past_allocations):
if (label in a) and (a[label]['allocated'] == 0):
wait = i + 1
continue
# only interested in consecutive failures to allocate.
break
return wait
class AllocationProvider(object):
"""A node provider and its capacity."""
def __init__(self, name, available):
self.name = name
# if this is negative, many of the calcuations turn around and
# we start handing out nodes that don't exist.
self.available = available if available >= 0 else 0
self.sub_requests = []
self.grants = []
def __repr__(self):
return '<AllocationProvider %s>' % self.name
def makeGrants(self):
# build a list of (request,wait-time) tuples
all_reqs = [(x, x.getWaitTime()) for x in self.sub_requests]
# reqs with no wait time get processed via ratio mechanism
reqs = [x[0] for x in all_reqs if x[1] == 0]
# we prioritize whoever has been waiting the longest and give
# them whatever is available. If we run out, put them back in
# the ratio queue
waiters = [x for x in all_reqs if x[1] != 0]
waiters.sort(key=lambda x: x[1], reverse=True)
for w in waiters:
w = w[0]
if self.available > 0:
w.grant(min(int(w.amount), self.available))
else:
reqs.append(w)
# Sort the remaining requests by priority so we fill the most
# specific requests first (e.g., if this provider is the only
# one that can supply foo nodes, then it should focus on
# supplying them and leave bar nodes to other providers).
reqs.sort(lambda a, b: cmp(a.getPriority(), b.getPriority()))
for req in reqs:
total_requested = 0.0
# Within a specific priority, limit the number of
# available nodes to a value proportionate to the request.
reqs_at_this_level = [r for r in reqs
if r.getPriority() == req.getPriority()]
for r in reqs_at_this_level:
total_requested += r.amount
if total_requested:
ratio = float(req.amount) / total_requested
else:
ratio = 0.0
grant = int(round(req.amount))
grant = min(grant, int(round(self.available * ratio)))
# This adjusts our availability as well as the values of
# other requests, so values will be correct the next time
# through the loop.
req.grant(grant)
class AllocationRequest(object):
"""A request for a number of labels."""
def __init__(self, name, amount, history=None):
self.name = name
self.amount = float(amount)
# Sub-requests of individual providers that make up this
# request. AllocationProvider -> AllocationSubRequest
self.sub_requests = {}
# Targets to which nodes from this request may be assigned.
# AllocationTarget -> AllocationRequestTarget
self.request_targets = {}
if history is not None:
self.history = history
else:
self.history = AllocationHistory()
self.history.recordRequest(name, amount)
# subrequests use these
self.recordGrant = functools.partial(self.history.recordGrant, name)
self.getWaitTime = functools.partial(self.history.getWaitTime, name)
def __repr__(self):
return '<AllocationRequest for %s of %s>' % (self.amount, self.name)
def addTarget(self, target, current):
art = AllocationRequestTarget(self, target, current)
self.request_targets[target] = art
def addProvider(self, provider, target, subnodes):
# Handle being called multiple times with different targets.
s = self.sub_requests.get(provider)
if not s:
s = AllocationSubRequest(self, provider, subnodes)
agt = s.addTarget(self.request_targets[target])
self.sub_requests[provider] = s
if s not in provider.sub_requests:
provider.sub_requests.append(s)
self.makeRequests()
return s, agt
def makeRequests(self):
# (Re-)distribute this request across all of its providers.
total_available = 0.0
for sub_request in self.sub_requests.values():
total_available += sub_request.provider.available
for sub_request in self.sub_requests.values():
if total_available:
ratio = float(sub_request.provider.available) / total_available
else:
ratio = 0.0
sub_request.setAmount(ratio * self.amount)
class AllocationSubRequest(object):
"""A request for a number of images from a specific provider."""
def __init__(self, request, provider, subnodes):
self.request = request
self.provider = provider
self.amount = 0.0
self.subnodes = subnodes
self.targets = []
def __repr__(self):
return '<AllocationSubRequest for %s (out of %s) of %s from %s>' % (
self.amount, self.request.amount, self.request.name,
self.provider.name)
def addTarget(self, request_target):
agt = AllocationGrantTarget(self, request_target)
self.targets.append(agt)
return agt
def setAmount(self, amount):
self.amount = amount
def getPriority(self):
return len(self.request.sub_requests)
def getWaitTime(self):
return self.request.getWaitTime()
def grant(self, amount):
# Grant this request (with the supplied amount). Adjust this
# sub-request's value to the actual, as well as the values of
# any remaining sub-requests.
# fractional amounts don't make sense
assert int(amount) == amount
# Remove from the set of sub-requests so that this is not
# included in future calculations.
self.provider.sub_requests.remove(self)
del self.request.sub_requests[self.provider]
if amount > 0:
grant = AllocationGrant(self.request, self.provider,
amount, self.targets)
self.request.recordGrant(amount)
# This is now a grant instead of a request.
self.provider.grants.append(grant)
else:
grant = None
amount = 0
self.amount = amount
# Adjust provider and request values accordingly.
self.request.amount -= amount
subnode_factor = 1 + self.subnodes
self.provider.available -= (amount * subnode_factor)
# Adjust the requested values for related sub-requests.
self.request.makeRequests()
# Allocate these granted nodes to targets.
if grant:
grant.makeAllocations()
class AllocationGrant(object):
"""A grant of a certain number of nodes of an image from a
specific provider."""
def __init__(self, request, provider, amount, targets):
self.request = request
self.provider = provider
self.amount = amount
self.targets = targets
def __repr__(self):
return '<AllocationGrant of %s of %s from %s>' % (
self.amount, self.request.name, self.provider.name)
def makeAllocations(self):
# Allocate this grant to the linked targets.
total_current = 0
for agt in self.targets:
total_current += agt.request_target.current
amount = self.amount
# Add the nodes in this allocation to the total number of
# nodes for this image so that we're setting our target
# allocations based on a portion of the total future nodes.
total_current += amount
remaining_targets = len(self.targets)
for agt in self.targets:
# Evenly distribute the grants across all targets
ratio = 1.0 / remaining_targets
# Take the weight and apply it to the total number of
# nodes to this image to figure out how many of the total
# nodes should ideally be on this target.
desired_count = int(round(ratio * total_current))
# The number of nodes off from our calculated target.
delta = desired_count - agt.request_target.current
# Use the delta as the allocation for this target, but
# make sure it's bounded by 0 and the number of nodes we
# have available to allocate.
allocation = min(delta, amount)
allocation = max(allocation, 0)
# The next time through the loop, we have reduced our
# grant by this amount.
amount -= allocation
# Don't consider this target's count in the total number
# of nodes in the next iteration, nor the nodes we have
# just allocated.
total_current -= agt.request_target.current
total_current -= allocation
# Since we aren't considering this target's count, also
# don't consider this target itself when calculating the
# ratio.
remaining_targets -= 1
# Set the amount of this allocation.
agt.allocate(allocation)
class AllocationTarget(object):
"""A target to which nodes may be assigned."""
def __init__(self, name):
self.name = name
def __repr__(self):
return '<AllocationTarget %s>' % (self.name)
class AllocationRequestTarget(object):
"""A request associated with a target to which nodes may be assigned."""
def __init__(self, request, target, current):
self.target = target
self.request = request
self.current = current
class AllocationGrantTarget(object):
"""A target for a specific grant to which nodes may be assigned."""
def __init__(self, sub_request, request_target):
self.sub_request = sub_request
self.request_target = request_target
self.amount = 0
def __repr__(self):
return '<AllocationGrantTarget for %s of %s to %s>' % (
self.amount, self.sub_request.request.name,
self.request_target.target.name)
def allocate(self, amount):
# This is essentially the output of this system. This
# represents the number of nodes of a specific image from a
# specific provider that should be assigned to a specific
# target.
self.amount = amount
# Update the number of nodes of this image that are assigned
# to this target to assist in other allocation calculations
self.request_target.current += amount

View File

@ -21,20 +21,22 @@ import subprocess
import threading
import time
import shlex
import sys
import uuid
import config as nodepool_config
import exceptions
import provider_manager
import stats
import zk
from nodepool import config as nodepool_config
from nodepool import exceptions
from nodepool import provider_manager
from nodepool import stats
from nodepool import zk
MINS = 60
HOURS = 60 * MINS
IMAGE_TIMEOUT = 6 * HOURS # How long to wait for an image save
SUSPEND_WAIT_TIME = 30 # How long to wait between checks for
# ZooKeeper connectivity if it disappears.
# How long to wait for an image save
IMAGE_TIMEOUT = 6 * HOURS
# How long to wait between checks for ZooKeeper connectivity if it disappears.
SUSPEND_WAIT_TIME = 30
# HP Cloud requires qemu compat with 0.10. That version works elsewhere,
# so just hardcode it for all qcow2 building
@ -108,17 +110,19 @@ class DibImageFile(object):
class BaseWorker(threading.Thread):
def __init__(self, config_path, interval, zk):
def __init__(self, builder_id, config_path, secure_path, interval, zk):
super(BaseWorker, self).__init__()
self.log = logging.getLogger("nodepool.builder.BaseWorker")
self.daemon = True
self._running = False
self._config = None
self._config_path = config_path
self._secure_path = secure_path
self._zk = zk
self._hostname = socket.gethostname()
self._statsd = stats.get_client()
self._interval = interval
self._builder_id = builder_id
def _checkForZooKeeperChanges(self, new_config):
'''
@ -129,7 +133,7 @@ class BaseWorker(threading.Thread):
'''
if self._config.zookeeper_servers != new_config.zookeeper_servers:
self.log.debug("Detected ZooKeeper server changes")
self._zk.resetHosts(new_config.zookeeper_servers.values())
self._zk.resetHosts(list(new_config.zookeeper_servers.values()))
@property
def running(self):
@ -145,9 +149,12 @@ class CleanupWorker(BaseWorker):
and any local DIB builds.
'''
def __init__(self, name, config_path, interval, zk):
super(CleanupWorker, self).__init__(config_path, interval, zk)
self.log = logging.getLogger("nodepool.builder.CleanupWorker.%s" % name)
def __init__(self, name, builder_id, config_path, secure_path,
interval, zk):
super(CleanupWorker, self).__init__(builder_id, config_path,
secure_path, interval, zk)
self.log = logging.getLogger(
"nodepool.builder.CleanupWorker.%s" % name)
self.name = 'CleanupWorker.%s' % name
def _buildUploadRecencyTable(self):
@ -178,7 +185,7 @@ class CleanupWorker(BaseWorker):
)
# Sort uploads by state_time (upload time) and keep the 2 most recent
for i in self._rtable.keys():
for i in list(self._rtable.keys()):
for p in self._rtable[i].keys():
self._rtable[i][p].sort(key=lambda x: x[2], reverse=True)
self._rtable[i][p] = self._rtable[i][p][:2]
@ -222,27 +229,32 @@ class CleanupWorker(BaseWorker):
if e.errno != 2: # No such file or directory
raise e
def _deleteLocalBuild(self, image, build_id, builder):
def _deleteLocalBuild(self, image, build):
'''
Remove expired image build from local disk.
:param str image: Name of the image whose build we are deleting.
:param str build_id: ID of the build we want to delete.
:param str builder: hostname of the build.
:param ImageBuild build: The build we want to delete.
:returns: True if files were deleted, False if none were found.
'''
base = "-".join([image, build_id])
base = "-".join([image, build.id])
files = DibImageFile.from_image_id(self._config.imagesdir, base)
if not files:
# NOTE(pabelanger): It is possible we don't have any files because
# diskimage-builder failed. So, check to see if we have the correct
# builder so we can removed the data from zookeeper.
if builder == self._hostname:
# To maintain backward compatibility with builders that didn't
# use unique builder IDs before, but do now, always compare to
# hostname as well since some ZK data may still reference that.
if (build.builder_id == self._builder_id or
build.builder == self._hostname
):
return True
return False
self.log.info("Doing cleanup for %s:%s" % (image, build_id))
self.log.info("Doing cleanup for %s:%s" % (image, build.id))
manifest_dir = None
@ -251,7 +263,8 @@ class CleanupWorker(BaseWorker):
if not manifest_dir:
path, ext = filename.rsplit('.', 1)
manifest_dir = path + ".d"
map(self._removeDibItem, [filename, f.md5_file, f.sha256_file])
items = [filename, f.md5_file, f.sha256_file]
list(map(self._removeDibItem, items))
try:
shutil.rmtree(manifest_dir)
@ -271,8 +284,7 @@ class CleanupWorker(BaseWorker):
self._deleteUpload(upload)
def _cleanupObsoleteProviderUploads(self, provider, image, build_id):
image_names_for_provider = provider.images.keys()
if image in image_names_for_provider:
if image in provider.diskimages:
# This image is in use for this provider
return
@ -353,9 +365,7 @@ class CleanupWorker(BaseWorker):
for build in builds:
base = "-".join([image, build.id])
files = DibImageFile.from_image_id(self._config.imagesdir, base)
# If we have local dib files OR if our hostname matches the
# recorded owner hostname, consider this our build.
if files or (self._hostname == build.builder):
if files:
ret.append(build)
return ret
@ -388,7 +398,8 @@ class CleanupWorker(BaseWorker):
self.log.info("Removing failed upload record: %s" % upload)
self._zk.deleteUpload(image, build_id, provider, upload.id)
elif upload.state == zk.DELETING:
self.log.info("Removing deleted upload and record: %s" % upload)
self.log.info(
"Removing deleted upload and record: %s" % upload)
self._deleteUpload(upload)
elif upload.state == zk.FAILED:
self.log.info("Removing failed upload and record: %s" % upload)
@ -403,7 +414,7 @@ class CleanupWorker(BaseWorker):
all_builds = self._zk.getBuilds(image)
builds_to_keep = set([b for b in sorted(all_builds, reverse=True,
key=lambda y: y.state_time)
if b.state==zk.READY][:2])
if b.state == zk.READY][:2])
local_builds = set(self._filterLocalBuilds(image, all_builds))
diskimage = self._config.diskimages.get(image)
if not diskimage and not local_builds:
@ -471,7 +482,7 @@ class CleanupWorker(BaseWorker):
self._zk.storeBuild(image, build, build.id)
# Release the lock here so we can delete the build znode
if self._deleteLocalBuild(image, build.id, build.builder):
if self._deleteLocalBuild(image, build):
if not self._zk.deleteBuild(image, build.id):
self.log.error("Unable to delete build %s because"
" uploads still remain.", build)
@ -483,9 +494,13 @@ class CleanupWorker(BaseWorker):
self._running = True
while self._running:
# Don't do work if we've lost communication with the ZK cluster
did_suspend = False
while self._zk and (self._zk.suspended or self._zk.lost):
did_suspend = True
self.log.info("ZooKeeper suspended. Waiting")
time.sleep(SUSPEND_WAIT_TIME)
if did_suspend:
self.log.info("ZooKeeper available. Resuming")
try:
self._run()
@ -502,6 +517,8 @@ class CleanupWorker(BaseWorker):
Body of run method for exception handling purposes.
'''
new_config = nodepool_config.loadConfig(self._config_path)
if self._secure_path:
nodepool_config.loadSecureConfig(new_config, self._secure_path)
if not self._config:
self._config = new_config
@ -514,38 +531,14 @@ class CleanupWorker(BaseWorker):
class BuildWorker(BaseWorker):
def __init__(self, name, config_path, interval, zk, dib_cmd):
super(BuildWorker, self).__init__(config_path, interval, zk)
def __init__(self, name, builder_id, config_path, secure_path,
interval, zk, dib_cmd):
super(BuildWorker, self).__init__(builder_id, config_path, secure_path,
interval, zk)
self.log = logging.getLogger("nodepool.builder.BuildWorker.%s" % name)
self.name = 'BuildWorker.%s' % name
self.dib_cmd = dib_cmd
def _running_under_virtualenv(self):
# NOTE: borrowed from pip:locations.py
if hasattr(sys, 'real_prefix'):
return True
elif sys.prefix != getattr(sys, "base_prefix", sys.prefix):
return True
return False
def _activate_virtualenv(self):
"""Run as a pre-exec function to activate current virtualenv
If we are invoked directly as /path/ENV/nodepool-builer (as
done by an init script, for example) then /path/ENV/bin will
not be in our $PATH, meaning we can't find disk-image-create.
Apart from that, dib also needs to run in an activated
virtualenv so it can find utils like dib-run-parts. Run this
before exec of dib to ensure the current virtualenv (if any)
is activated.
"""
if self._running_under_virtualenv():
activate_this = os.path.join(sys.prefix, "bin", "activate_this.py")
if not os.path.exists(activate_this):
raise exceptions.BuilderError("Running in a virtualenv, but "
"cannot find: %s" % activate_this)
execfile(activate_this, dict(__file__=activate_this))
def _checkForScheduledImageUpdates(self):
'''
Check every DIB image to see if it has aged out and needs rebuilt.
@ -553,7 +546,7 @@ class BuildWorker(BaseWorker):
for diskimage in self._config.diskimages.values():
# Check if we've been told to shutdown
# or if ZK connection is suspended
if not self.running or self._zk.suspended or self._zk.lost:
if not self._running or self._zk.suspended or self._zk.lost:
return
try:
self._checkImageForScheduledImageUpdates(diskimage)
@ -586,7 +579,8 @@ class BuildWorker(BaseWorker):
if (not builds
or (now - builds[0].state_time) >= diskimage.rebuild_age
or not set(builds[0].formats).issuperset(diskimage.image_types)
):
):
try:
with self._zk.imageBuildLock(diskimage.name, blocking=False):
# To avoid locking each image repeatedly, we have an
@ -595,7 +589,8 @@ class BuildWorker(BaseWorker):
# lock acquisition. If it's not the same build as
# identified in the first check above, assume another
# BuildWorker created the build for us and continue.
builds2 = self._zk.getMostRecentBuilds(1, diskimage.name, zk.READY)
builds2 = self._zk.getMostRecentBuilds(
1, diskimage.name, zk.READY)
if builds2 and builds[0].id != builds2[0].id:
return
@ -603,6 +598,7 @@ class BuildWorker(BaseWorker):
data = zk.ImageBuild()
data.state = zk.BUILDING
data.builder_id = self._builder_id
data.builder = self._hostname
data.formats = list(diskimage.image_types)
@ -620,7 +616,7 @@ class BuildWorker(BaseWorker):
for diskimage in self._config.diskimages.values():
# Check if we've been told to shutdown
# or if ZK connection is suspended
if not self.running or self._zk.suspended or self._zk.lost:
if not self._running or self._zk.suspended or self._zk.lost:
return
try:
self._checkImageForManualBuildRequest(diskimage)
@ -653,6 +649,7 @@ class BuildWorker(BaseWorker):
data = zk.ImageBuild()
data.state = zk.BUILDING
data.builder_id = self._builder_id
data.builder = self._hostname
data.formats = list(diskimage.image_types)
@ -719,7 +716,6 @@ class BuildWorker(BaseWorker):
shlex.split(cmd),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
preexec_fn=self._activate_virtualenv,
env=env)
except OSError as e:
raise exceptions.BuilderError(
@ -738,19 +734,26 @@ class BuildWorker(BaseWorker):
# interrupted during the build. If so, wait for it to return.
# It could transition directly from SUSPENDED to CONNECTED, or go
# through the LOST state before CONNECTED.
did_suspend = False
while self._zk.suspended or self._zk.lost:
did_suspend = True
self.log.info("ZooKeeper suspended during build. Waiting")
time.sleep(SUSPEND_WAIT_TIME)
if did_suspend:
self.log.info("ZooKeeper available. Resuming")
build_data = zk.ImageBuild()
build_data.builder_id = self._builder_id
build_data.builder = self._hostname
build_data.username = diskimage.username
if self._zk.didLoseConnection:
self.log.info("ZooKeeper lost while building %s" % diskimage.name)
self._zk.resetLostFlag()
build_data.state = zk.FAILED
elif p.returncode:
self.log.info("DIB failed creating %s" % diskimage.name)
self.log.info(
"DIB failed creating %s (%s)" % (diskimage.name, p.returncode))
build_data.state = zk.FAILED
else:
self.log.info("DIB image %s is built" % diskimage.name)
@ -760,7 +763,8 @@ class BuildWorker(BaseWorker):
if self._statsd:
# record stats on the size of each image we create
for ext in img_types.split(','):
key = 'nodepool.dib_image_build.%s.%s.size' % (diskimage.name, ext)
key = 'nodepool.dib_image_build.%s.%s.size' % (
diskimage.name, ext)
# A bit tricky because these image files may be sparse
# files; we only want the true size of the file for
# purposes of watching if we've added too much stuff
@ -780,9 +784,13 @@ class BuildWorker(BaseWorker):
self._running = True
while self._running:
# Don't do work if we've lost communication with the ZK cluster
did_suspend = False
while self._zk and (self._zk.suspended or self._zk.lost):
did_suspend = True
self.log.info("ZooKeeper suspended. Waiting")
time.sleep(SUSPEND_WAIT_TIME)
if did_suspend:
self.log.info("ZooKeeper available. Resuming")
try:
self._run()
@ -798,6 +806,8 @@ class BuildWorker(BaseWorker):
'''
# NOTE: For the first iteration, we expect self._config to be None
new_config = nodepool_config.loadConfig(self._config_path)
if self._secure_path:
nodepool_config.loadSecureConfig(new_config, self._secure_path)
if not self._config:
self._config = new_config
@ -809,8 +819,10 @@ class BuildWorker(BaseWorker):
class UploadWorker(BaseWorker):
def __init__(self, name, config_path, interval, zk):
super(UploadWorker, self).__init__(config_path, interval, zk)
def __init__(self, name, builder_id, config_path, secure_path,
interval, zk):
super(UploadWorker, self).__init__(builder_id, config_path,
secure_path, interval, zk)
self.log = logging.getLogger("nodepool.builder.UploadWorker.%s" % name)
self.name = 'UploadWorker.%s' % name
@ -819,6 +831,8 @@ class UploadWorker(BaseWorker):
Reload the nodepool configuration file.
'''
new_config = nodepool_config.loadConfig(self._config_path)
if self._secure_path:
nodepool_config.loadSecureConfig(new_config, self._secure_path)
if not self._config:
self._config = new_config
@ -827,7 +841,8 @@ class UploadWorker(BaseWorker):
use_taskmanager=False)
self._config = new_config
def _uploadImage(self, build_id, upload_id, image_name, images, provider):
def _uploadImage(self, build_id, upload_id, image_name, images, provider,
username):
'''
Upload a local DIB image build to a provider.
@ -837,6 +852,7 @@ class UploadWorker(BaseWorker):
:param list images: A list of DibImageFile objects from this build
that available for uploading.
:param provider: The provider from the parsed config file.
:param username:
'''
start_time = time.time()
timestamp = int(start_time)
@ -858,19 +874,15 @@ class UploadWorker(BaseWorker):
filename = image.to_path(self._config.imagesdir, with_extension=True)
dummy_image = type('obj', (object,),
{'name': image_name, 'id': image.image_id})
ext_image_name = provider.template_hostname.format(
provider=provider, image=dummy_image,
timestamp=str(timestamp)
ext_image_name = provider.image_name_format.format(
image_name=image_name, timestamp=str(timestamp)
)
self.log.info("Uploading DIB image build %s from %s to %s" %
(build_id, filename, provider.name))
manager = self._config.provider_managers[provider.name]
provider_image = provider.images.get(image_name)
provider_image = provider.diskimages.get(image_name)
if provider_image is None:
raise exceptions.BuilderInvalidCommandError(
"Could not find matching provider image for %s" % image_name
@ -910,6 +922,9 @@ class UploadWorker(BaseWorker):
data.state = zk.READY
data.external_id = external_id
data.external_name = ext_image_name
data.format = image.extension
data.username = username
return data
def _checkForProviderUploads(self):
@ -920,12 +935,12 @@ class UploadWorker(BaseWorker):
to providers, do the upload if they are available on the local disk.
'''
for provider in self._config.providers.values():
for image in provider.images.values():
for image in provider.diskimages.values():
uploaded = False
# Check if we've been told to shutdown
# or if ZK connection is suspended
if not self.running or self._zk.suspended or self._zk.lost:
if not self._running or self._zk.suspended or self._zk.lost:
return
try:
uploaded = self._checkProviderImageUpload(provider, image)
@ -952,7 +967,7 @@ class UploadWorker(BaseWorker):
:returns: True if an upload was attempted, False otherwise.
'''
# Check if image uploads are paused.
if provider.images.get(image.name).pause:
if provider.diskimages.get(image.name).pause:
return False
# Search for the most recent 'ready' image build
@ -1003,11 +1018,14 @@ class UploadWorker(BaseWorker):
# New upload number with initial state 'uploading'
data = zk.ImageUpload()
data.state = zk.UPLOADING
data.username = build.username
upnum = self._zk.storeImageUpload(
image.name, build.id, provider.name, data)
data = self._uploadImage(build.id, upnum, image.name,
local_images, provider)
local_images, provider,
build.username)
# Set final state
self._zk.storeImageUpload(image.name, build.id,
@ -1025,9 +1043,13 @@ class UploadWorker(BaseWorker):
self._running = True
while self._running:
# Don't do work if we've lost communication with the ZK cluster
did_suspend = False
while self._zk and (self._zk.suspended or self._zk.lost):
did_suspend = True
self.log.info("ZooKeeper suspended. Waiting")
time.sleep(SUSPEND_WAIT_TIME)
if did_suspend:
self.log.info("ZooKeeper available. Resuming")
try:
self._reloadConfig()
@ -1051,15 +1073,19 @@ class NodePoolBuilder(object):
'''
log = logging.getLogger("nodepool.builder.NodePoolBuilder")
def __init__(self, config_path, num_builders=1, num_uploaders=4):
def __init__(self, config_path, secure_path=None,
num_builders=1, num_uploaders=4, fake=False):
'''
Initialize the NodePoolBuilder object.
:param str config_path: Path to configuration file.
:param str secure_path: Path to secure configuration file.
:param int num_builders: Number of build workers to start.
:param int num_uploaders: Number of upload workers to start.
:param bool fake: Whether to fake the image builds.
'''
self._config_path = config_path
self._secure_path = secure_path
self._config = None
self._num_builders = num_builders
self._build_workers = []
@ -1070,7 +1096,11 @@ class NodePoolBuilder(object):
self.cleanup_interval = 60
self.build_interval = 10
self.upload_interval = 10
self.dib_cmd = 'disk-image-create'
if fake:
self.dib_cmd = os.path.join(os.path.dirname(__file__), '..',
'nodepool/tests/fake-image-create')
else:
self.dib_cmd = 'disk-image-create'
self.zk = None
# This lock is needed because the run() method is started in a
@ -1079,21 +1109,34 @@ class NodePoolBuilder(object):
# startup process has completed.
self._start_lock = threading.Lock()
#=======================================================================
# ======================================================================
# Private methods
#=======================================================================
# ======================================================================
def _getBuilderID(self, id_file):
if not os.path.exists(id_file):
with open(id_file, "w") as f:
builder_id = str(uuid.uuid4())
f.write(builder_id)
return builder_id
with open(id_file, "r") as f:
builder_id = f.read()
return builder_id
def _getAndValidateConfig(self):
config = nodepool_config.loadConfig(self._config_path)
if self._secure_path:
nodepool_config.loadSecureConfig(config, self._secure_path)
if not config.zookeeper_servers.values():
raise RuntimeError('No ZooKeeper servers specified in config.')
if not config.imagesdir:
raise RuntimeError('No images-dir specified in config.')
return config
#=======================================================================
# ======================================================================
# Public methods
#=======================================================================
# ======================================================================
def start(self):
'''
@ -1110,28 +1153,36 @@ class NodePoolBuilder(object):
self._config = self._getAndValidateConfig()
self._running = True
builder_id_file = os.path.join(self._config.imagesdir,
"builder_id.txt")
builder_id = self._getBuilderID(builder_id_file)
# All worker threads share a single ZooKeeper instance/connection.
self.zk = zk.ZooKeeper()
self.zk.connect(self._config.zookeeper_servers.values())
self.zk.connect(list(self._config.zookeeper_servers.values()))
self.log.debug('Starting listener for build jobs')
# Create build and upload worker objects
for i in range(self._num_builders):
w = BuildWorker(i, self._config_path, self.build_interval,
self.zk, self.dib_cmd)
w = BuildWorker(i, builder_id,
self._config_path, self._secure_path,
self.build_interval, self.zk, self.dib_cmd)
w.start()
self._build_workers.append(w)
for i in range(self._num_uploaders):
w = UploadWorker(i, self._config_path, self.upload_interval,
self.zk)
w = UploadWorker(i, builder_id,
self._config_path, self._secure_path,
self.upload_interval, self.zk)
w.start()
self._upload_workers.append(w)
if self.cleanup_interval > 0:
self._janitor = CleanupWorker(
0, self._config_path, self.cleanup_interval, self.zk)
0, builder_id,
self._config_path, self._secure_path,
self.cleanup_interval, self.zk)
self._janitor.start()
# Wait until all threads are running. Otherwise, we have a race
@ -1154,7 +1205,14 @@ class NodePoolBuilder(object):
'''
with self._start_lock:
self.log.debug("Stopping. NodePoolBuilder shutting down workers")
workers = self._build_workers + self._upload_workers
# Note we do not add the upload workers to this list intentionally.
# The reason for this is that uploads can take many hours and there
# is no good way to stop the blocking writes performed by the
# uploads in order to join() below on a reasonable amount of time.
# Killing the process will stop the upload then both the record
# in zk and in the cloud will be deleted by any other running
# builders or when this builder starts again.
workers = self._build_workers
if self._janitor:
workers += [self._janitor]
for worker in (workers):

View File

@ -14,6 +14,10 @@
# License for the specific language governing permissions and limitations
# under the License.
import argparse
import daemon
import errno
import extras
import logging
import logging.config
import os
@ -22,6 +26,37 @@ import sys
import threading
import traceback
import yaml
from nodepool.version import version_info as npd_version_info
# as of python-daemon 1.6 it doesn't bundle pidlockfile anymore
# instead it depends on lockfile-0.9.1 which uses pidfile.
pid_file_module = extras.try_imports(['daemon.pidlockfile', 'daemon.pidfile'])
def is_pidfile_stale(pidfile):
""" Determine whether a PID file is stale.
Return 'True' ("stale") if the contents of the PID file are
valid but do not match the PID of a currently-running process;
otherwise return 'False'.
"""
result = False
pidfile_pid = pidfile.read_pid()
if pidfile_pid is not None:
try:
os.kill(pidfile_pid, 0)
except OSError as exc:
if exc.errno == errno.ESRCH:
# The specified PID does not exist
result = True
return result
def stack_dump_handler(signum, frame):
signal.signal(signal.SIGUSR2, signal.SIG_IGN)
@ -45,17 +80,99 @@ def stack_dump_handler(signum, frame):
class NodepoolApp(object):
app_name = None
app_description = 'Node pool.'
def __init__(self):
self.parser = None
self.args = None
def create_parser(self):
parser = argparse.ArgumentParser(description=self.app_description)
parser.add_argument('-l',
dest='logconfig',
help='path to log config file')
parser.add_argument('--version',
action='version',
version=npd_version_info.version_string())
return parser
def setup_logging(self):
if self.args.logconfig:
fp = os.path.expanduser(self.args.logconfig)
if not os.path.exists(fp):
raise Exception("Unable to read logging config file at %s" %
fp)
logging.config.fileConfig(fp)
m = "Unable to read logging config file at %s" % fp
raise Exception(m)
if os.path.splitext(fp)[1] in ('.yml', '.yaml'):
with open(fp, 'r') as f:
logging.config.dictConfig(yaml.safe_load(f))
else:
logging.config.fileConfig(fp)
else:
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(name)s: '
'%(message)s')
m = '%(asctime)s %(levelname)s %(name)s: %(message)s'
logging.basicConfig(level=logging.DEBUG, format=m)
def _main(self, argv=None):
if argv is None:
argv = sys.argv[1:]
self.parser = self.create_parser()
self.args = self.parser.parse_args()
return self._do_run()
def _do_run(self):
# NOTE(jamielennox): setup logging a bit late so it's not done until
# after a DaemonContext is created.
self.setup_logging()
return self.run()
@classmethod
def main(cls, argv=None):
return cls()._main(argv=argv)
def run(self):
"""The app's primary function, override it with your logic."""
raise NotImplementedError()
class NodepoolDaemonApp(NodepoolApp):
def create_parser(self):
parser = super(NodepoolDaemonApp, self).create_parser()
parser.add_argument('-p',
dest='pidfile',
help='path to pid file',
default='/var/run/nodepool/%s.pid' % self.app_name)
parser.add_argument('-d',
dest='nodaemon',
action='store_true',
help='do not run as a daemon')
return parser
def _do_run(self):
if self.args.nodaemon:
return super(NodepoolDaemonApp, self)._do_run()
else:
pid = pid_file_module.TimeoutPIDLockFile(self.args.pidfile, 10)
if is_pidfile_stale(pid):
pid.break_lock()
with daemon.DaemonContext(pidfile=pid):
return super(NodepoolDaemonApp, self)._do_run()
@classmethod
def main(cls, argv=None):
signal.signal(signal.SIGUSR2, stack_dump_handler)
return super(NodepoolDaemonApp, cls).main(argv)

View File

@ -12,56 +12,51 @@
# License for the specific language governing permissions and limitations
# under the License.
import argparse
import extras
import signal
import sys
import daemon
from nodepool import builder
import nodepool.cmd
# as of python-daemon 1.6 it doesn't bundle pidlockfile anymore
# instead it depends on lockfile-0.9.1 which uses pidfile.
pid_file_module = extras.try_imports(['daemon.pidlockfile', 'daemon.pidfile'])
class NodePoolBuilderApp(nodepool.cmd.NodepoolDaemonApp):
class NodePoolBuilderApp(nodepool.cmd.NodepoolApp):
app_name = 'nodepool-builder'
app_description = 'NodePool Image Builder.'
def sigint_handler(self, signal, frame):
self.nb.stop()
sys.exit(0)
def parse_arguments(self):
parser = argparse.ArgumentParser(description='NodePool Image Builder.')
def create_parser(self):
parser = super(NodePoolBuilderApp, self).create_parser()
parser.add_argument('-c', dest='config',
default='/etc/nodepool/nodepool.yaml',
help='path to config file')
parser.add_argument('-l', dest='logconfig',
help='path to log config file')
parser.add_argument('-p', dest='pidfile',
help='path to pid file',
default='/var/run/nodepool-builder/'
'nodepool-builder.pid')
parser.add_argument('-d', dest='nodaemon', action='store_true',
help='do not run as a daemon')
parser.add_argument('-s', dest='secure',
help='path to secure config file')
parser.add_argument('--build-workers', dest='build_workers',
default=1, help='number of build workers',
type=int)
parser.add_argument('--upload-workers', dest='upload_workers',
default=4, help='number of upload workers',
type=int)
self.args = parser.parse_args()
parser.add_argument('--fake', action='store_true',
help='Do not actually run diskimage-builder '
'(used for testing)')
return parser
def main(self):
self.setup_logging()
def run(self):
self.nb = builder.NodePoolBuilder(
self.args.config, self.args.build_workers,
self.args.upload_workers)
self.args.config,
secure_path=self.args.secure,
num_builders=self.args.build_workers,
num_uploaders=self.args.upload_workers,
fake=self.args.fake)
signal.signal(signal.SIGINT, self.sigint_handler)
signal.signal(signal.SIGUSR2, nodepool.cmd.stack_dump_handler)
self.nb.start()
while True:
@ -69,15 +64,7 @@ class NodePoolBuilderApp(nodepool.cmd.NodepoolApp):
def main():
app = NodePoolBuilderApp()
app.parse_arguments()
if app.args.nodaemon:
app.main()
else:
pid = pid_file_module.TimeoutPIDLockFile(app.args.pidfile, 10)
with daemon.DaemonContext(pidfile=pid):
app.main()
return NodePoolBuilderApp.main()
if __name__ == "__main__":

View File

@ -14,6 +14,8 @@ import logging
import voluptuous as v
import yaml
from nodepool.config import get_provider_config
log = logging.getLogger(__name__)
@ -24,88 +26,19 @@ class ConfigValidator:
self.config_file = config_file
def validate(self):
cron = {
'check': str,
'cleanup': str,
}
images = {
'name': str,
'pause': bool,
'min-ram': int,
'name-filter': str,
'key-name': str,
'diskimage': str,
'meta': dict,
'username': str,
'user-home': str,
'private-key': str,
'config-drive': bool,
}
old_network = {
'net-id': str,
'net-label': str,
}
network = {
provider = {
'name': v.Required(str),
'public': bool, # Ignored, but kept for backwards compat
'driver': str,
'max-concurrency': int,
}
providers = {
label = {
'name': str,
'region-name': str,
'service-type': str,
'service-name': str,
'availability-zones': [str],
'cloud': str,
'username': str,
'password': str,
'auth-url': str,
'project-id': str,
'project-name': str,
'max-servers': int,
'pool': str, # Ignored, but kept for backwards compat
'image-type': str,
'networks': [v.Any(old_network, network)],
'ipv6-preferred': bool,
'boot-timeout': int,
'api-timeout': int,
'launch-timeout': int,
'nodepool-id': str,
'rate': float,
'images': [images],
'template-hostname': str,
'clean-floating-ips': bool,
}
labels = {
'name': str,
'image': str,
'min-ready': int,
'ready-script': str,
'subnodes': int,
'providers': [{
'name': str,
}],
'max-ready-age': int,
}
targets = {
'name': str,
'hostname': str,
'subnode-hostname': str,
'assign-via-gearman': bool,
'jenkins': {
'url': str,
'user': str,
'apikey': str,
'credentials-id': str,
'test-job': str
}
}
diskimages = {
diskimage = {
'name': str,
'pause': bool,
'elements': [str],
@ -113,27 +46,26 @@ class ConfigValidator:
'release': v.Any(str, int),
'rebuild-age': int,
'env-vars': {str: str},
'username': str,
}
webapp = {
'port': int,
'listen_address': str,
}
top_level = {
'webapp': webapp,
'elements-dir': str,
'images-dir': str,
'dburi': str,
'zmq-publishers': [str],
'gearman-servers': [{
'host': str,
'port': int,
}],
'zookeeper-servers': [{
'host': str,
'port': int,
'chroot': str,
}],
'cron': cron,
'providers': [providers],
'labels': [labels],
'targets': [targets],
'diskimages': [diskimages],
'providers': list,
'labels': [label],
'diskimages': [diskimage],
}
log.info("validating %s" % self.config_file)
@ -142,12 +74,6 @@ class ConfigValidator:
# validate the overall schema
schema = v.Schema(top_level)
schema(config)
# labels must list valid providers
all_providers = [p['name'] for p in config['providers']]
for label in config['labels']:
for provider in label['providers']:
if not provider['name'] in all_providers:
raise AssertionError('label %s requests '
'non-existent provider %s'
% (label['name'], provider['name']))
for provider_dict in config.get('providers', []):
provider_schema = get_provider_config(provider_dict).get_schema()
provider_schema.extend(provider)(provider_dict)

81
nodepool/cmd/launcher.py Executable file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env python
# Copyright 2012 Hewlett-Packard Development Company, L.P.
# Copyright 2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import sys
import signal
import nodepool.cmd
import nodepool.launcher
import nodepool.webapp
log = logging.getLogger(__name__)
class NodePoolLauncherApp(nodepool.cmd.NodepoolDaemonApp):
app_name = 'nodepool'
def create_parser(self):
parser = super(NodePoolLauncherApp, self).create_parser()
parser.add_argument('-c', dest='config',
default='/etc/nodepool/nodepool.yaml',
help='path to config file')
parser.add_argument('-s', dest='secure',
help='path to secure file')
parser.add_argument('--no-webapp', action='store_true')
return parser
def exit_handler(self, signum, frame):
self.pool.stop()
if not self.args.no_webapp:
self.webapp.stop()
sys.exit(0)
def term_handler(self, signum, frame):
os._exit(0)
def run(self):
self.pool = nodepool.launcher.NodePool(self.args.secure,
self.args.config)
if not self.args.no_webapp:
config = self.pool.loadConfig()
self.webapp = nodepool.webapp.WebApp(self.pool,
**config.webapp)
signal.signal(signal.SIGINT, self.exit_handler)
# For back compatibility:
signal.signal(signal.SIGUSR1, self.exit_handler)
signal.signal(signal.SIGTERM, self.term_handler)
self.pool.start()
if not self.args.no_webapp:
self.webapp.start()
while True:
signal.pause()
def main():
return NodePoolLauncherApp.main()
if __name__ == "__main__":
sys.exit(main())

218
nodepool/cmd/nodepoolcmd.py Normal file → Executable file
View File

@ -14,37 +14,31 @@
# License for the specific language governing permissions and limitations
# under the License.
import argparse
import logging.config
import sys
from nodepool import nodedb
from nodepool import nodepool
from prettytable import PrettyTable
from nodepool import launcher
from nodepool import provider_manager
from nodepool import status
from nodepool import zk
from nodepool.cmd import NodepoolApp
from nodepool.version import version_info as npc_version_info
from config_validator import ConfigValidator
from prettytable import PrettyTable
from nodepool.cmd.config_validator import ConfigValidator
log = logging.getLogger(__name__)
class NodePoolCmd(NodepoolApp):
def parse_arguments(self):
parser = argparse.ArgumentParser(description='Node pool.')
def create_parser(self):
parser = super(NodePoolCmd, self).create_parser()
parser.add_argument('-c', dest='config',
default='/etc/nodepool/nodepool.yaml',
help='path to config file')
parser.add_argument('-s', dest='secure',
default='/etc/nodepool/secure.conf',
help='path to secure file')
parser.add_argument('-l', dest='logconfig',
help='path to log config file')
parser.add_argument('--version', action='version',
version=npc_version_info.version_string(),
help='show version')
parser.add_argument('--debug', dest='debug', action='store_true',
help='show DEBUG level logging')
@ -55,6 +49,9 @@ class NodePoolCmd(NodepoolApp):
cmd_list = subparsers.add_parser('list', help='list nodes')
cmd_list.set_defaults(func=self.list)
cmd_list.add_argument('--detail', action='store_true',
help='Output detailed node info')
cmd_image_list = subparsers.add_parser(
'image-list', help='list images from providers')
cmd_image_list.set_defaults(func=self.image_list)
@ -70,13 +67,6 @@ class NodePoolCmd(NodepoolApp):
cmd_image_build.add_argument('image', help='image name')
cmd_image_build.set_defaults(func=self.image_build)
cmd_alien_list = subparsers.add_parser(
'alien-list',
help='list nodes not accounted for by nodepool')
cmd_alien_list.set_defaults(func=self.alien_list)
cmd_alien_list.add_argument('provider', help='provider name',
nargs='?')
cmd_alien_image_list = subparsers.add_parser(
'alien-image-list',
help='list images not accounted for by nodepool')
@ -90,7 +80,8 @@ class NodePoolCmd(NodepoolApp):
cmd_hold.set_defaults(func=self.hold)
cmd_hold.add_argument('id', help='node id')
cmd_hold.add_argument('--reason',
help='Optional reason this node is held')
help='Reason this node is held',
required=True)
cmd_delete = subparsers.add_parser(
'delete',
@ -116,7 +107,8 @@ class NodePoolCmd(NodepoolApp):
cmd_dib_image_delete = subparsers.add_parser(
'dib-image-delete',
help='delete image built with diskimage-builder')
help='Delete a dib built image from disk along with all cloud '
'uploads of this image')
cmd_dib_image_delete.set_defaults(func=self.dib_image_delete)
cmd_dib_image_delete.add_argument('id', help='dib image id')
@ -125,47 +117,39 @@ class NodePoolCmd(NodepoolApp):
help='Validate configuration file')
cmd_config_validate.set_defaults(func=self.config_validate)
cmd_job_list = subparsers.add_parser('job-list', help='list jobs')
cmd_job_list.set_defaults(func=self.job_list)
cmd_request_list = subparsers.add_parser(
'request-list',
help='list the current node requests')
cmd_request_list.set_defaults(func=self.request_list)
cmd_job_create = subparsers.add_parser('job-create', help='create job')
cmd_job_create.add_argument(
'name',
help='job name')
cmd_job_create.add_argument('--hold-on-failure',
help='number of nodes to hold when this job fails')
cmd_job_create.set_defaults(func=self.job_create)
cmd_job_delete = subparsers.add_parser(
'job-delete',
help='delete job')
cmd_job_delete.set_defaults(func=self.job_delete)
cmd_job_delete.add_argument('id', help='job id')
self.args = parser.parse_args()
return parser
def setup_logging(self):
# NOTE(jamielennox): This should just be the same as other apps
if self.args.debug:
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(name)s: '
'%(message)s')
m = '%(asctime)s %(levelname)s %(name)s: %(message)s'
logging.basicConfig(level=logging.DEBUG, format=m)
elif self.args.logconfig:
NodepoolApp.setup_logging(self)
super(NodePoolCmd, self).setup_logging()
else:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(name)s: '
'%(message)s')
m = '%(asctime)s %(levelname)s %(name)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=m)
l = logging.getLogger('kazoo')
l.setLevel(logging.WARNING)
def list(self, node_id=None):
print status.node_list(self.pool.getDB(), node_id)
def list(self, node_id=None, detail=False):
if hasattr(self.args, 'detail'):
detail = self.args.detail
print(status.node_list(self.zk, node_id, detail))
def dib_image_list(self):
print status.dib_image_list(self.zk)
print(status.dib_image_list(self.zk))
def image_list(self):
print status.image_list(self.zk)
print(status.image_list(self.zk))
def image_build(self, diskimage=None):
diskimage = diskimage or self.args.image
@ -180,31 +164,8 @@ class NodePoolCmd(NodepoolApp):
self.zk.submitBuildRequest(diskimage)
def alien_list(self):
self.pool.reconfigureManagers(self.pool.config, False)
t = PrettyTable(["Provider", "Hostname", "Server ID", "IP"])
t.align = 'l'
with self.pool.getDB().getSession() as session:
for provider in self.pool.config.providers.values():
if (self.args.provider and
provider.name != self.args.provider):
continue
manager = self.pool.getProviderManager(provider)
try:
for server in manager.listServers():
if not session.getNodeByExternalID(
provider.name, server['id']):
t.add_row([provider.name, server['name'],
server['id'], server['public_v4']])
except Exception as e:
log.warning("Exception listing aliens for %s: %s"
% (provider.name, str(e.message)))
print t
def alien_image_list(self):
self.pool.reconfigureManagers(self.pool.config, False)
self.pool.updateConfig()
t = PrettyTable(["Provider", "Name", "Image ID"])
t.align = 'l'
@ -213,7 +174,7 @@ class NodePoolCmd(NodepoolApp):
if (self.args.provider and
provider.name != self.args.provider):
continue
manager = self.pool.getProviderManager(provider)
manager = self.pool.getProviderManager(provider.name)
# Build list of provider images as known by the provider
provider_images = []
@ -227,11 +188,11 @@ class NodePoolCmd(NodepoolApp):
if 'nodepool_build_id' in image['properties']]
except Exception as e:
log.warning("Exception listing alien images for %s: %s"
% (provider.name, str(e.message)))
% (provider.name, str(e)))
alien_ids = []
uploads = []
for image in provider.images:
for image in provider.diskimages:
# Build list of provider images as recorded in ZK
for bnum in self.zk.getBuildNumbers(image):
uploads.extend(
@ -249,30 +210,46 @@ class NodePoolCmd(NodepoolApp):
if image['id'] in alien_ids:
t.add_row([provider.name, image['name'], image['id']])
print t
print(t)
def hold(self):
node_id = None
with self.pool.getDB().getSession() as session:
node = session.getNode(self.args.id)
node.state = nodedb.HOLD
if self.args.reason:
node.comment = self.args.reason
node_id = node.id
self.list(node_id=node_id)
node = self.zk.getNode(self.args.id)
if not node:
print("Node id %s not found" % self.args.id)
return
node.state = zk.HOLD
node.comment = self.args.reason
print("Waiting for lock...")
self.zk.lockNode(node, blocking=True)
self.zk.storeNode(node)
self.zk.unlockNode(node)
self.list(node_id=self.args.id)
def delete(self):
node = self.zk.getNode(self.args.id)
if not node:
print("Node id %s not found" % self.args.id)
return
self.zk.lockNode(node, blocking=True, timeout=5)
if self.args.now:
self.pool.reconfigureManagers(self.pool.config)
with self.pool.getDB().getSession() as session:
node = session.getNode(self.args.id)
if not node:
print "Node %s not found." % self.args.id
elif self.args.now:
self.pool._deleteNode(session, node)
else:
node.state = nodedb.DELETE
self.list(node_id=node.id)
if node.provider not in self.pool.config.providers:
print("Provider %s for node %s not defined on this launcher" %
(node.provider, node.id))
return
provider = self.pool.config.providers[node.provider]
manager = provider_manager.get_provider(provider, True)
manager.start()
launcher.NodeDeleter.delete(self.zk, manager, node)
manager.stop()
else:
node.state = zk.DELETING
self.zk.storeNode(node)
self.zk.unlockNode(node)
self.list(node_id=node.id)
def dib_image_delete(self):
(image, build_num) = self.args.id.rsplit('-', 1)
@ -312,53 +289,38 @@ class NodePoolCmd(NodepoolApp):
validator = ConfigValidator(self.args.config)
validator.validate()
log.info("Configuration validation complete")
#TODO(asselin,yolanda): add validation of secure.conf
# TODO(asselin,yolanda): add validation of secure.conf
def job_list(self):
t = PrettyTable(["ID", "Name", "Hold on Failure"])
t.align = 'l'
with self.pool.getDB().getSession() as session:
for job in session.getJobs():
t.add_row([job.id, job.name, job.hold_on_failure])
print t
def job_create(self):
with self.pool.getDB().getSession() as session:
session.createJob(self.args.name,
hold_on_failure=self.args.hold_on_failure)
self.job_list()
def job_delete(self):
with self.pool.getDB().getSession() as session:
job = session.getJob(self.args.id)
if not job:
print "Job %s not found." % self.args.id
else:
job.delete()
def request_list(self):
print(status.request_list(self.zk))
def _wait_for_threads(self, threads):
for t in threads:
if t:
t.join()
def main(self):
def run(self):
self.zk = None
# no arguments, print help messaging, then exit with error(1)
if not self.args.command:
self.parser.print_help()
return 1
# commands which do not need to start-up or parse config
if self.args.command in ('config-validate'):
return self.args.func()
self.pool = nodepool.NodePool(self.args.secure, self.args.config)
self.pool = launcher.NodePool(self.args.secure, self.args.config)
config = self.pool.loadConfig()
# commands needing ZooKeeper
if self.args.command in ('image-build', 'dib-image-list',
'image-list', 'dib-image-delete',
'image-delete', 'alien-image-list'):
'image-delete', 'alien-image-list',
'list', 'hold', 'delete',
'request-list'):
self.zk = zk.ZooKeeper()
self.zk.connect(config.zookeeper_servers.values())
else:
self.pool.reconfigureDatabase(config)
self.zk.connect(list(config.zookeeper_servers.values()))
self.pool.setConfig(config)
self.args.func()
@ -366,11 +328,9 @@ class NodePoolCmd(NodepoolApp):
if self.zk:
self.zk.disconnect()
def main():
npc = NodePoolCmd()
npc.parse_arguments()
npc.setup_logging()
return npc.main()
return NodePoolCmd.main()
if __name__ == "__main__":

View File

@ -1,160 +0,0 @@
#!/usr/bin/env python
# Copyright 2012 Hewlett-Packard Development Company, L.P.
# Copyright 2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import argparse
import daemon
import errno
import extras
# as of python-daemon 1.6 it doesn't bundle pidlockfile anymore
# instead it depends on lockfile-0.9.1 which uses pidfile.
pid_file_module = extras.try_imports(['daemon.pidlockfile', 'daemon.pidfile'])
import logging
import os
import sys
import signal
import nodepool.cmd
import nodepool.nodepool
import nodepool.webapp
log = logging.getLogger(__name__)
def is_pidfile_stale(pidfile):
""" Determine whether a PID file is stale.
Return 'True' ("stale") if the contents of the PID file are
valid but do not match the PID of a currently-running process;
otherwise return 'False'.
"""
result = False
pidfile_pid = pidfile.read_pid()
if pidfile_pid is not None:
try:
os.kill(pidfile_pid, 0)
except OSError as exc:
if exc.errno == errno.ESRCH:
# The specified PID does not exist
result = True
return result
class NodePoolDaemon(nodepool.cmd.NodepoolApp):
def parse_arguments(self):
parser = argparse.ArgumentParser(description='Node pool.')
parser.add_argument('-c', dest='config',
default='/etc/nodepool/nodepool.yaml',
help='path to config file')
parser.add_argument('-s', dest='secure',
default='/etc/nodepool/secure.conf',
help='path to secure file')
parser.add_argument('-d', dest='nodaemon', action='store_true',
help='do not run as a daemon')
parser.add_argument('-l', dest='logconfig',
help='path to log config file')
parser.add_argument('-p', dest='pidfile',
help='path to pid file',
default='/var/run/nodepool/nodepool.pid')
# TODO(pabelanger): Deprecated flag, remove in the future.
parser.add_argument('--no-builder', dest='builder',
action='store_false')
# TODO(pabelanger): Deprecated flag, remove in the future.
parser.add_argument('--build-workers', dest='build_workers',
default=1, help='number of build workers',
type=int)
# TODO(pabelanger): Deprecated flag, remove in the future.
parser.add_argument('--upload-workers', dest='upload_workers',
default=4, help='number of upload workers',
type=int)
parser.add_argument('--no-deletes', action='store_true')
parser.add_argument('--no-launches', action='store_true')
parser.add_argument('--no-webapp', action='store_true')
parser.add_argument('--version', dest='version', action='store_true',
help='show version')
self.args = parser.parse_args()
def exit_handler(self, signum, frame):
self.pool.stop()
if not self.args.no_webapp:
self.webapp.stop()
sys.exit(0)
def term_handler(self, signum, frame):
os._exit(0)
def main(self):
self.setup_logging()
self.pool = nodepool.nodepool.NodePool(self.args.secure,
self.args.config,
self.args.no_deletes,
self.args.no_launches)
if self.args.builder:
log.warning(
"Note: nodepool no longer automatically builds images, "
"please ensure the separate nodepool-builder process is "
"running if you haven't already")
else:
log.warning(
"--no-builder is deprecated and will be removed in the near "
"future. Update your service scripts to avoid a breakage.")
if not self.args.no_webapp:
self.webapp = nodepool.webapp.WebApp(self.pool)
signal.signal(signal.SIGINT, self.exit_handler)
# For back compatibility:
signal.signal(signal.SIGUSR1, self.exit_handler)
signal.signal(signal.SIGUSR2, nodepool.cmd.stack_dump_handler)
signal.signal(signal.SIGTERM, self.term_handler)
self.pool.start()
if not self.args.no_webapp:
self.webapp.start()
while True:
signal.pause()
def main():
npd = NodePoolDaemon()
npd.parse_arguments()
if npd.args.version:
from nodepool.version import version_info as npd_version_info
print "Nodepool version: %s" % npd_version_info.version_string()
return(0)
pid = pid_file_module.TimeoutPIDLockFile(npd.args.pidfile, 10)
if is_pidfile_stale(pid):
pid.break_lock()
if npd.args.nodaemon:
npd.main()
else:
with daemon.DaemonContext(pidfile=pid):
npd.main()
if __name__ == "__main__":
sys.exit(main())

368
nodepool/config.py Normal file → Executable file
View File

@ -16,114 +16,56 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os_client_config
from six.moves import configparser as ConfigParser
import time
import yaml
import fakeprovider
import zk
class ConfigValue(object):
def __eq__(self, other):
if isinstance(other, ConfigValue):
if other.__dict__ == self.__dict__:
return True
return False
from nodepool import zk
from nodepool.driver import ConfigValue
from nodepool.driver.fake.config import FakeProviderConfig
from nodepool.driver.openstack.config import OpenStackProviderConfig
class Config(ConfigValue):
pass
class Provider(ConfigValue):
def __eq__(self, other):
if (other.cloud_config != self.cloud_config or
other.nodepool_id != self.nodepool_id or
other.max_servers != self.max_servers or
other.pool != self.pool or
other.image_type != self.image_type or
other.rate != self.rate or
other.api_timeout != self.api_timeout or
other.boot_timeout != self.boot_timeout or
other.launch_timeout != self.launch_timeout or
other.networks != self.networks or
other.ipv6_preferred != self.ipv6_preferred or
other.clean_floating_ips != self.clean_floating_ips or
other.azs != self.azs):
return False
new_images = other.images
old_images = self.images
# Check if images have been added or removed
if set(new_images.keys()) != set(old_images.keys()):
return False
# check if existing images have been updated
for k in new_images:
if (new_images[k].min_ram != old_images[k].min_ram or
new_images[k].name_filter != old_images[k].name_filter or
new_images[k].key_name != old_images[k].key_name or
new_images[k].username != old_images[k].username or
new_images[k].user_home != old_images[k].user_home or
new_images[k].private_key != old_images[k].private_key or
new_images[k].meta != old_images[k].meta or
new_images[k].config_drive != old_images[k].config_drive):
return False
return True
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "<Provider %s>" % self.name
class ProviderImage(ConfigValue):
def __repr__(self):
return "<ProviderImage %s>" % self.name
class Target(ConfigValue):
def __repr__(self):
return "<Target %s>" % self.name
class Label(ConfigValue):
def __repr__(self):
return "<Label %s>" % self.name
class LabelProvider(ConfigValue):
def __repr__(self):
return "<LabelProvider %s>" % self.name
class Cron(ConfigValue):
def __repr__(self):
return "<Cron %s>" % self.name
class ZMQPublisher(ConfigValue):
def __repr__(self):
return "<ZMQPublisher %s>" % self.name
class GearmanServer(ConfigValue):
def __repr__(self):
return "<GearmanServer %s>" % self.name
class DiskImage(ConfigValue):
def __eq__(self, other):
if (other.name != self.name or
other.elements != self.elements or
other.release != self.release or
other.rebuild_age != self.rebuild_age or
other.env_vars != self.env_vars or
other.image_types != self.image_types or
other.pause != self.pause or
other.username != self.username):
return False
return True
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return "<DiskImage %s>" % self.name
class Network(ConfigValue):
def __repr__(self):
return "<Network name:%s id:%s>" % (self.name, self.id)
def get_provider_config(provider):
provider.setdefault('driver', 'openstack')
# Ensure legacy configuration still works when using fake cloud
if provider.get('name', '').startswith('fake'):
provider['driver'] = 'fake'
if provider['driver'] == 'fake':
return FakeProviderConfig(provider)
elif provider['driver'] == 'openstack':
return OpenStackProviderConfig(provider)
def loadConfig(config_path):
def openConfig(path):
retry = 3
# Since some nodepool code attempts to dynamically re-read its config
@ -132,7 +74,7 @@ def loadConfig(config_path):
# attempt to reload it.
while True:
try:
config = yaml.load(open(config_path))
config = yaml.load(open(path))
break
except IOError as e:
if e.errno == 2:
@ -142,48 +84,29 @@ def loadConfig(config_path):
raise e
if retry == 0:
raise e
return config
cloud_config = os_client_config.OpenStackConfig()
def loadConfig(config_path):
config = openConfig(config_path)
# Reset the shared os_client_config instance
OpenStackProviderConfig.os_client_config = None
newconfig = Config()
newconfig.db = None
newconfig.dburi = None
newconfig.webapp = {
'port': config.get('webapp', {}).get('port', 8005),
'listen_address': config.get('webapp', {}).get('listen_address',
'0.0.0.0')
}
newconfig.providers = {}
newconfig.targets = {}
newconfig.labels = {}
newconfig.elementsdir = config.get('elements-dir')
newconfig.imagesdir = config.get('images-dir')
newconfig.dburi = None
newconfig.provider_managers = {}
newconfig.jenkins_managers = {}
newconfig.zmq_publishers = {}
newconfig.gearman_servers = {}
newconfig.zookeeper_servers = {}
newconfig.diskimages = {}
newconfig.crons = {}
for name, default in [
('cleanup', '* * * * *'),
('check', '*/15 * * * *'),
]:
c = Cron()
c.name = name
newconfig.crons[c.name] = c
c.job = None
c.timespec = config.get('cron', {}).get(name, default)
for addr in config.get('zmq-publishers', []):
z = ZMQPublisher()
z.name = addr
z.listener = None
newconfig.zmq_publishers[z.name] = z
for server in config.get('gearman-servers', []):
g = GearmanServer()
g.host = server['host']
g.port = server.get('port', 4730)
g.name = g.host + '_' + str(g.port)
newconfig.gearman_servers[g.name] = g
for server in config.get('zookeeper-servers', []):
z = zk.ZooKeeperConnectionConfig(server['host'],
@ -192,185 +115,54 @@ def loadConfig(config_path):
name = z.host + '_' + str(z.port)
newconfig.zookeeper_servers[name] = z
for provider in config.get('providers', []):
p = Provider()
p.name = provider['name']
newconfig.providers[p.name] = p
cloud_kwargs = _cloudKwargsFromProvider(provider)
p.cloud_config = _get_one_cloud(cloud_config, cloud_kwargs)
p.nodepool_id = provider.get('nodepool-id', None)
p.region_name = provider.get('region-name')
p.max_servers = provider['max-servers']
p.pool = provider.get('pool', None)
p.rate = provider.get('rate', 1.0)
p.api_timeout = provider.get('api-timeout')
p.boot_timeout = provider.get('boot-timeout', 60)
p.launch_timeout = provider.get('launch-timeout', 3600)
p.networks = []
for network in provider.get('networks', []):
n = Network()
p.networks.append(n)
if 'net-id' in network:
n.id = network['net-id']
n.name = None
elif 'net-label' in network:
n.name = network['net-label']
n.id = None
else:
n.name = network.get('name')
n.id = None
p.ipv6_preferred = provider.get('ipv6-preferred')
p.clean_floating_ips = provider.get('clean-floating-ips')
p.azs = provider.get('availability-zones')
p.template_hostname = provider.get(
'template-hostname',
'template-{image.name}-{timestamp}'
)
p.image_type = provider.get(
'image-type', p.cloud_config.config['image_format'])
p.images = {}
for image in provider['images']:
i = ProviderImage()
i.name = image['name']
p.images[i.name] = i
i.min_ram = image['min-ram']
i.name_filter = image.get('name-filter', None)
i.key_name = image.get('key-name', None)
i.username = image.get('username', 'jenkins')
i.user_home = image.get('user-home', '/home/jenkins')
i.pause = bool(image.get('pause', False))
i.private_key = image.get('private-key',
'/var/lib/jenkins/.ssh/id_rsa')
i.config_drive = image.get('config-drive', True)
# This dict is expanded and used as custom properties when
# the image is uploaded.
i.meta = image.get('meta', {})
# 5 elements, and no key or value can be > 255 chars
# per Nova API rules
if i.meta:
if len(i.meta) > 5 or \
any([len(k) > 255 or len(v) > 255
for k, v in i.meta.iteritems()]):
# soft-fail
#self.log.error("Invalid metadata for %s; ignored"
# % i.name)
i.meta = {}
if 'diskimages' in config:
for diskimage in config['diskimages']:
d = DiskImage()
d.name = diskimage['name']
newconfig.diskimages[d.name] = d
if 'elements' in diskimage:
d.elements = u' '.join(diskimage['elements'])
else:
d.elements = ''
# must be a string, as it's passed as env-var to
# d-i-b, but might be untyped in the yaml and
# interpreted as a number (e.g. "21" for fedora)
d.release = str(diskimage.get('release', ''))
d.rebuild_age = int(diskimage.get('rebuild-age', 86400))
d.env_vars = diskimage.get('env-vars', {})
if not isinstance(d.env_vars, dict):
#self.log.error("%s: ignoring env-vars; "
# "should be a dict" % d.name)
d.env_vars = {}
d.image_types = set(diskimage.get('formats', []))
d.pause = bool(diskimage.get('pause', False))
# Do this after providers to build the image-types
for provider in newconfig.providers.values():
for image in provider.images.values():
diskimage = newconfig.diskimages[image.name]
diskimage.image_types.add(provider.image_type)
for diskimage in config.get('diskimages', []):
d = DiskImage()
d.name = diskimage['name']
newconfig.diskimages[d.name] = d
if 'elements' in diskimage:
d.elements = u' '.join(diskimage['elements'])
else:
d.elements = ''
# must be a string, as it's passed as env-var to
# d-i-b, but might be untyped in the yaml and
# interpreted as a number (e.g. "21" for fedora)
d.release = str(diskimage.get('release', ''))
d.rebuild_age = int(diskimage.get('rebuild-age', 86400))
d.env_vars = diskimage.get('env-vars', {})
if not isinstance(d.env_vars, dict):
d.env_vars = {}
d.image_types = set(diskimage.get('formats', []))
d.pause = bool(diskimage.get('pause', False))
d.username = diskimage.get('username', 'zuul')
for label in config.get('labels', []):
l = Label()
l.name = label['name']
newconfig.labels[l.name] = l
l.image = label['image']
l.max_ready_age = label.get('max-ready-age', 0)
l.min_ready = label.get('min-ready', 2)
l.subnodes = label.get('subnodes', 0)
l.ready_script = label.get('ready-script')
l.providers = {}
for provider in label['providers']:
p = LabelProvider()
p.name = provider['name']
l.providers[p.name] = p
for target in config.get('targets', []):
t = Target()
t.name = target['name']
newconfig.targets[t.name] = t
jenkins = target.get('jenkins', {})
t.online = True
t.rate = target.get('rate', 1.0)
t.jenkins_test_job = jenkins.get('test-job')
t.jenkins_url = None
t.jenkins_user = None
t.jenkins_apikey = None
t.jenkins_credentials_id = None
t.assign_via_gearman = target.get('assign-via-gearman', False)
t.hostname = target.get(
'hostname',
'{label.name}-{provider.name}-{node_id}'
)
t.subnode_hostname = target.get(
'subnode-hostname',
'{label.name}-{provider.name}-{node_id}-{subnode_id}'
)
l.pools = []
for provider in config.get('providers', []):
p = get_provider_config(provider)
p.load(newconfig)
newconfig.providers[p.name] = p
return newconfig
def loadSecureConfig(config, secure_config_path):
secure = ConfigParser.ConfigParser()
secure.readfp(open(secure_config_path))
secure = openConfig(secure_config_path)
if not secure: # empty file
return
config.dburi = secure.get('database', 'dburi')
# Eliminate any servers defined in the normal config
if secure.get('zookeeper-servers', []):
config.zookeeper_servers = {}
for target in config.targets.values():
section_name = 'jenkins "%s"' % target.name
if secure.has_section(section_name):
target.jenkins_url = secure.get(section_name, 'url')
target.jenkins_user = secure.get(section_name, 'user')
target.jenkins_apikey = secure.get(section_name, 'apikey')
try:
target.jenkins_credentials_id = secure.get(
section_name, 'credentials')
except:
pass
def _cloudKwargsFromProvider(provider):
cloud_kwargs = {}
for arg in ['region-name', 'api-timeout', 'cloud']:
if arg in provider:
cloud_kwargs[arg] = provider[arg]
# These are named from back when we only talked to Nova. They're
# actually compute service related
if 'service-type' in provider:
cloud_kwargs['compute-service-type'] = provider['service-type']
if 'service-name' in provider:
cloud_kwargs['compute-service-name'] = provider['service-name']
auth_kwargs = {}
for auth_key in (
'username', 'password', 'auth-url', 'project-id', 'project-name'):
if auth_key in provider:
auth_kwargs[auth_key] = provider[auth_key]
cloud_kwargs['auth'] = auth_kwargs
return cloud_kwargs
def _get_one_cloud(cloud_config, cloud_kwargs):
'''This is a function to allow for overriding it in tests.'''
if cloud_kwargs.get('auth', {}).get('auth-url', '') == 'fake':
return fakeprovider.fake_get_one_cloud(cloud_config, cloud_kwargs)
return cloud_config.get_one_cloud(**cloud_kwargs)
# TODO(Shrews): Support ZooKeeper auth
for server in secure.get('zookeeper-servers', []):
z = zk.ZooKeeperConnectionConfig(server['host'],
server.get('port', 2181),
server.get('chroot', None))
name = z.host + '_' + str(z.port)
config.zookeeper_servers[name] = z

360
nodepool/driver/__init__.py Normal file
View File

@ -0,0 +1,360 @@
# Copyright (C) 2011-2014 OpenStack Foundation
# Copyright (C) 2017 Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import six
from nodepool import zk
from nodepool import exceptions
@six.add_metaclass(abc.ABCMeta)
class Provider(object):
"""The Provider interface
The class or instance attribute **name** must be provided as a string.
"""
@abc.abstractmethod
def start(self):
pass
@abc.abstractmethod
def stop(self):
pass
@abc.abstractmethod
def join(self):
pass
@abc.abstractmethod
def labelReady(self, name):
pass
@abc.abstractmethod
def cleanupNode(self, node_id):
pass
@abc.abstractmethod
def waitForNodeCleanup(self, node_id):
pass
@abc.abstractmethod
def cleanupLeakedResources(self):
pass
@abc.abstractmethod
def listNodes(self):
pass
@six.add_metaclass(abc.ABCMeta)
class NodeRequestHandler(object):
'''
Class to process a single node request.
The PoolWorker thread will instantiate a class of this type for each
node request that it pulls from ZooKeeper.
Subclasses are required to implement the run_handler method and the
NodeLaunchManager to kick off any threads needed to satisfy the request.
'''
def __init__(self, pw, request):
'''
:param PoolWorker pw: The parent PoolWorker object.
:param NodeRequest request: The request to handle.
'''
self.pw = pw
self.request = request
self.launch_manager = None
self.nodeset = []
self.done = False
self.paused = False
self.launcher_id = self.pw.launcher_id
def _setFromPoolWorker(self):
'''
Set values that we pull from the parent PoolWorker.
We don't do this in __init__ because this class is re-entrant and we
want the updated values.
'''
self.provider = self.pw.getProviderConfig()
self.pool = self.pw.getPoolConfig()
self.zk = self.pw.getZK()
self.manager = self.pw.getProviderManager()
@property
def alive_thread_count(self):
if not self.launch_manager:
return 0
return self.launch_manager.alive_thread_count
# ---------------------------------------------------------------
# Public methods
# ---------------------------------------------------------------
def unlockNodeSet(self, clear_allocation=False):
'''
Attempt unlocking all Nodes in the node set.
:param bool clear_allocation: If true, clears the node allocated_to
attribute.
'''
for node in self.nodeset:
if not node.lock:
continue
if clear_allocation:
node.allocated_to = None
self.zk.storeNode(node)
try:
self.zk.unlockNode(node)
except Exception:
self.log.exception("Error unlocking node:")
self.log.debug("Unlocked node %s for request %s",
node.id, self.request.id)
self.nodeset = []
def decline_request(self):
self.request.declined_by.append(self.launcher_id)
launchers = set(self.zk.getRegisteredLaunchers())
if launchers.issubset(set(self.request.declined_by)):
# All launchers have declined it
self.log.debug("Failing declined node request %s",
self.request.id)
self.request.state = zk.FAILED
else:
self.request.state = zk.REQUESTED
def run(self):
'''
Execute node request handling.
This code is designed to be re-entrant. Because we can't always
satisfy a request immediately (due to lack of provider resources), we
need to be able to call run() repeatedly until the request can be
fulfilled. The node set is saved and added to between calls.
'''
try:
self.run_handler()
except Exception:
self.log.exception(
"Declining node request %s due to exception in "
"NodeRequestHandler:", self.request.id)
self.decline_request()
self.unlockNodeSet(clear_allocation=True)
self.zk.storeNodeRequest(self.request)
self.zk.unlockNodeRequest(self.request)
self.done = True
def poll(self):
'''
Check if the request has been handled.
Once the request has been handled, the 'nodeset' attribute will be
filled with the list of nodes assigned to the request, or it will be
empty if the request could not be fulfilled.
:returns: True if we are done with the request, False otherwise.
'''
if self.paused:
return False
if self.done:
return True
if not self.launch_manager.poll():
return False
# If the request has been pulled, unallocate the node set so other
# requests can use them.
if not self.zk.getNodeRequest(self.request.id):
self.log.info("Node request %s disappeared", self.request.id)
for node in self.nodeset:
node.allocated_to = None
self.zk.storeNode(node)
self.unlockNodeSet()
try:
self.zk.unlockNodeRequest(self.request)
except exceptions.ZKLockException:
# If the lock object is invalid that is "ok" since we no
# longer have a request either. Just do our best, log and
# move on.
self.log.debug("Request lock invalid for node request %s "
"when attempting to clean up the lock",
self.request.id)
return True
if self.launch_manager.failed_nodes:
self.log.debug("Declining node request %s because nodes failed",
self.request.id)
self.decline_request()
else:
# The assigned nodes must be added to the request in the order
# in which they were requested.
assigned = []
for requested_type in self.request.node_types:
for node in self.nodeset:
if node.id in assigned:
continue
if node.type == requested_type:
# Record node ID in the request
self.request.nodes.append(node.id)
assigned.append(node.id)
self.log.debug("Fulfilled node request %s",
self.request.id)
self.request.state = zk.FULFILLED
self.unlockNodeSet()
self.zk.storeNodeRequest(self.request)
self.zk.unlockNodeRequest(self.request)
return True
@abc.abstractmethod
def run_handler(self):
pass
@six.add_metaclass(abc.ABCMeta)
class NodeLaunchManager(object):
'''
Handle launching multiple nodes in parallel.
Subclasses are required to implement the launch method.
'''
def __init__(self, zk, pool, provider_manager,
requestor, retries):
'''
Initialize the launch manager.
:param ZooKeeper zk: A ZooKeeper object.
:param ProviderPool pool: A config ProviderPool object.
:param ProviderManager provider_manager: The manager object used to
interact with the selected provider.
:param str requestor: Identifier for the request originator.
:param int retries: Number of times to retry failed launches.
'''
self._retries = retries
self._nodes = []
self._failed_nodes = []
self._ready_nodes = []
self._threads = []
self._zk = zk
self._pool = pool
self._provider_manager = provider_manager
self._requestor = requestor
@property
def alive_thread_count(self):
count = 0
for t in self._threads:
if t.isAlive():
count += 1
return count
@property
def failed_nodes(self):
return self._failed_nodes
@property
def ready_nodes(self):
return self._ready_nodes
def poll(self):
'''
Check if all launch requests have completed.
When all of the Node objects have reached a final state (READY or
FAILED), we'll know all threads have finished the launch process.
'''
if not self._threads:
return True
# Give the NodeLaunch threads time to finish.
if self.alive_thread_count:
return False
node_states = [node.state for node in self._nodes]
# NOTE: It very important that NodeLauncher always sets one of
# these states, no matter what.
if not all(s in (zk.READY, zk.FAILED) for s in node_states):
return False
for node in self._nodes:
if node.state == zk.READY:
self._ready_nodes.append(node)
else:
self._failed_nodes.append(node)
return True
@abc.abstractmethod
def launch(self, node):
pass
class ConfigValue(object):
def __eq__(self, other):
if isinstance(other, ConfigValue):
if other.__dict__ == self.__dict__:
return True
return False
def __ne__(self, other):
return not self.__eq__(other)
class Driver(ConfigValue):
pass
@six.add_metaclass(abc.ABCMeta)
class ProviderConfig(ConfigValue):
"""The Provider config interface
The class or instance attribute **name** must be provided as a string.
"""
def __init__(self, provider):
self.name = provider['name']
self.provider = provider
self.driver = Driver()
self.driver.name = provider.get('driver', 'openstack')
self.max_concurrency = provider.get('max-concurrency', -1)
self.driver.manage_images = False
def __repr__(self):
return "<Provider %s>" % self.name
@abc.abstractmethod
def __eq__(self, other):
pass
@abc.abstractmethod
def load(self, newconfig):
pass
@abc.abstractmethod
def get_schema(self):
pass

View File

View File

@ -0,0 +1,22 @@
# Copyright 2017 Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from nodepool.driver.openstack.config import OpenStackProviderConfig
class FakeProviderConfig(OpenStackProviderConfig):
def _cloudKwargs(self):
cloud_kwargs = super(FakeProviderConfig, self)._cloudKwargs()
cloud_kwargs['validate'] = False
return cloud_kwargs

View File

@ -0,0 +1,19 @@
# Copyright 2017 Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from nodepool.driver.openstack.handler import OpenStackNodeRequestHandler
class FakeNodeRequestHandler(OpenStackNodeRequestHandler):
launcher_id = "Fake"

View File

@ -1,35 +1,35 @@
#!/usr/bin/env python
# Copyright (C) 2011-2013 OpenStack Foundation
#
# Copyright 2013 OpenStack Foundation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import StringIO
import logging
import threading
import time
import uuid
from jenkins import JenkinsException
import shade
import exceptions
from nodepool import exceptions
from nodepool.driver.openstack.provider import OpenStackProvider
class Dummy(object):
IMAGE = 'Image'
INSTANCE = 'Instance'
FLAVOR = 'Flavor'
LOCATION = 'Server.Location'
def __init__(self, kind, **kw):
self.__kind = kind
@ -40,6 +40,9 @@ class Dummy(object):
if self.should_fail:
raise shade.OpenStackCloudException('This image has '
'SHOULD_FAIL set to True.')
if self.over_quota:
raise shade.exc.OpenStackCloudHTTPError(
'Quota exceeded for something', 403)
except AttributeError:
pass
@ -63,16 +66,15 @@ class Dummy(object):
setattr(self, key, value)
def fake_get_one_cloud(cloud_config, cloud_kwargs):
cloud_kwargs['validate'] = False
cloud_kwargs['image_format'] = 'qcow2'
return cloud_config.get_one_cloud(**cloud_kwargs)
def get_fake_quota():
return 100, 20, 1000000
class FakeOpenStackCloud(object):
log = logging.getLogger("nodepool.FakeOpenStackCloud")
def __init__(self, images=None, networks=None):
self.pause_creates = False
self._image_list = images
if self._image_list is None:
self._image_list = [
@ -87,13 +89,18 @@ class FakeOpenStackCloud(object):
networks = [dict(id='fake-public-network-uuid',
name='fake-public-network-name'),
dict(id='fake-private-network-uuid',
name='fake-private-network-name')]
name='fake-private-network-name'),
dict(id='fake-ipv6-network-uuid',
name='fake-ipv6-network-name')]
self.networks = networks
self._flavor_list = [
Dummy(Dummy.FLAVOR, id='f1', ram=8192, name='Fake Flavor'),
Dummy(Dummy.FLAVOR, id='f2', ram=8192, name='Unreal Flavor'),
Dummy(Dummy.FLAVOR, id='f1', ram=8192, name='Fake Flavor',
vcpus=4),
Dummy(Dummy.FLAVOR, id='f2', ram=8192, name='Unreal Flavor',
vcpus=4),
]
self._server_list = []
self.max_cores, self.max_instances, self.max_ram = get_fake_quota()
def _get(self, name_or_id, instance_list):
self.log.debug("Get %s in %s" % (name_or_id, repr(instance_list)))
@ -103,19 +110,20 @@ class FakeOpenStackCloud(object):
return None
def get_network(self, name_or_id, filters=None):
return dict(id='fake-network-uuid',
name='fake-network-name')
for net in self.networks:
if net['id'] == name_or_id or net['name'] == name_or_id:
return net
return self.networks[0]
def _create(
self, instance_list, instance_type=Dummy.INSTANCE,
done_status='ACTIVE', **kw):
def _create(self, instance_list, instance_type=Dummy.INSTANCE,
done_status='ACTIVE', max_quota=-1, **kw):
should_fail = kw.get('SHOULD_FAIL', '').lower() == 'true'
nics = kw.get('nics', [])
addresses = None
# if keyword 'ipv6-uuid' is found in provider config,
# ipv6 address will be available in public addr dict.
for nic in nics:
if 'ipv6-uuid' not in nic['net-id']:
if nic['net-id'] != 'fake-ipv6-network-uuid':
continue
addresses = dict(
public=[dict(version=4, addr='fake'),
@ -125,6 +133,7 @@ class FakeOpenStackCloud(object):
public_v6 = 'fake_v6'
public_v4 = 'fake'
private_v4 = 'fake'
interface_ip = 'fake_v6'
break
if not addresses:
addresses = dict(
@ -134,6 +143,12 @@ class FakeOpenStackCloud(object):
public_v6 = ''
public_v4 = 'fake'
private_v4 = 'fake'
interface_ip = 'fake'
over_quota = False
if (instance_type == Dummy.INSTANCE and
self.max_instances > -1 and
len(instance_list) >= self.max_instances):
over_quota = True
s = Dummy(instance_type,
id=uuid.uuid4().hex,
@ -144,10 +159,14 @@ class FakeOpenStackCloud(object):
public_v4=public_v4,
public_v6=public_v6,
private_v4=private_v4,
interface_ip=interface_ip,
location=Dummy(Dummy.LOCATION, zone=kw.get('az')),
metadata=kw.get('meta', {}),
manager=self,
key_name=kw.get('key_name', None),
should_fail=should_fail)
should_fail=should_fail,
over_quota=over_quota,
event=threading.Event())
instance_list.append(s)
t = threading.Thread(target=self._finish,
name='FakeProvider create',
@ -166,7 +185,13 @@ class FakeOpenStackCloud(object):
self.log.debug("Deleted from %s" % (repr(instance_list),))
def _finish(self, obj, delay, status):
time.sleep(delay)
self.log.debug("Pause creates %s", self.pause_creates)
if self.pause_creates:
self.log.debug("Pausing")
obj.event.wait()
self.log.debug("Continuing")
else:
time.sleep(delay)
obj.status = status
def create_image(self, **kwargs):
@ -198,6 +223,7 @@ class FakeOpenStackCloud(object):
server.public_v4 = 'fake'
server.public_v6 = 'fake'
server.private_v4 = 'fake'
server.interface_ip = 'fake'
return server
def create_server(self, **kw):
@ -207,8 +233,18 @@ class FakeOpenStackCloud(object):
result = self._get(name_or_id, self._server_list)
return result
def _clean_floating_ip(self, server):
server.public_v4 = ''
server.public_v6 = ''
server.interface_ip = server.private_v4
return server
def wait_for_server(self, server, **kwargs):
server.status = 'ACTIVE'
while server.status == 'BUILD':
time.sleep(0.1)
auto_ip = kwargs.get('auto_ip')
if not auto_ip:
server = self._clean_floating_ip(server)
return server
def list_servers(self):
@ -217,8 +253,19 @@ class FakeOpenStackCloud(object):
def delete_server(self, name_or_id, delete_ips=True):
self._delete(name_or_id, self._server_list)
def list_networks(self):
return dict(networks=self.networks)
def list_availability_zone_names(self):
return ['fake-az1', 'fake-az2']
def get_compute_limits(self):
return Dummy(
'limits',
max_total_cores=self.max_cores,
max_total_instances=self.max_instances,
max_total_ram_size=self.max_ram,
total_cores_used=4 * len(self._server_list),
total_instances_used=len(self._server_list),
total_ram_used=8192 * len(self._server_list)
)
class FakeUploadFailCloud(FakeOpenStackCloud):
@ -239,79 +286,17 @@ class FakeUploadFailCloud(FakeOpenStackCloud):
return super(FakeUploadFailCloud, self).create_image(**kwargs)
class FakeFile(StringIO.StringIO):
def __init__(self, path):
StringIO.StringIO.__init__(self)
self.__path = path
class FakeProvider(OpenStackProvider):
def __init__(self, provider, use_taskmanager):
self.createServer_fails = 0
self.__client = FakeOpenStackCloud()
super(FakeProvider, self).__init__(provider, use_taskmanager)
def close(self):
print "Wrote to %s:" % self.__path
print self.getvalue()
StringIO.StringIO.close(self)
def _getClient(self):
return self.__client
class FakeSFTPClient(object):
def open(self, path, mode):
return FakeFile(path)
def close(self):
pass
class FakeSSHClient(object):
def __init__(self):
self.client = self
def ssh(self, description, cmd, output=False):
return True
def scp(self, src, dest):
return True
def open_sftp(self):
return FakeSFTPClient()
class FakeJenkins(object):
def __init__(self, user):
self._nodes = {}
self.quiet = False
self.down = False
if user == 'quiet':
self.quiet = True
if user == 'down':
self.down = True
def node_exists(self, name):
return name in self._nodes
def create_node(self, name, **kw):
self._nodes[name] = kw
def delete_node(self, name):
del self._nodes[name]
def get_info(self):
if self.down:
raise JenkinsException("Jenkins is down")
d = {u'assignedLabels': [{}],
u'description': None,
u'jobs': [{u'color': u'red',
u'name': u'test-job',
u'url': u'https://jenkins.example.com/job/test-job/'}],
u'mode': u'NORMAL',
u'nodeDescription': u'the master Jenkins node',
u'nodeName': u'',
u'numExecutors': 1,
u'overallLoad': {},
u'primaryView': {u'name': u'Overview',
u'url': u'https://jenkins.example.com/'},
u'quietingDown': self.quiet,
u'slaveAgentPort': 8090,
u'unlabeledLoad': {},
u'useCrumbs': False,
u'useSecurity': True,
u'views': [
{u'name': u'test-view',
u'url': u'https://jenkins.example.com/view/test-view/'}]}
return d
def createServer(self, *args, **kwargs):
while self.createServer_fails:
self.createServer_fails -= 1
raise Exception("Expected createServer exception")
return super(FakeProvider, self).createServer(*args, **kwargs)

View File

View File

@ -0,0 +1,272 @@
# Copyright (C) 2011-2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import os_client_config
import voluptuous as v
from nodepool.driver import ProviderConfig
from nodepool.driver import ConfigValue
class ProviderDiskImage(ConfigValue):
def __repr__(self):
return "<ProviderDiskImage %s>" % self.name
class ProviderCloudImage(ConfigValue):
def __repr__(self):
return "<ProviderCloudImage %s>" % self.name
@property
def external(self):
'''External identifier to pass to the cloud.'''
if self.image_id:
return dict(id=self.image_id)
else:
return self.image_name or self.name
@property
def external_name(self):
'''Human readable version of external.'''
return self.image_id or self.image_name or self.name
class ProviderLabel(ConfigValue):
def __eq__(self, other):
if (other.diskimage != self.diskimage or
other.cloud_image != self.cloud_image or
other.min_ram != self.min_ram or
other.flavor_name != self.flavor_name or
other.key_name != self.key_name):
return False
return True
def __repr__(self):
return "<ProviderLabel %s>" % self.name
class ProviderPool(ConfigValue):
def __eq__(self, other):
if (other.labels != self.labels or
other.max_cores != self.max_cores or
other.max_servers != self.max_servers or
other.max_ram != self.max_ram or
other.azs != self.azs or
other.networks != self.networks):
return False
return True
def __repr__(self):
return "<ProviderPool %s>" % self.name
class OpenStackProviderConfig(ProviderConfig):
os_client_config = None
def __eq__(self, other):
if (other.cloud_config != self.cloud_config or
other.pools != self.pools or
other.image_type != self.image_type or
other.rate != self.rate or
other.boot_timeout != self.boot_timeout or
other.launch_timeout != self.launch_timeout or
other.clean_floating_ips != self.clean_floating_ips or
other.max_concurrency != self.max_concurrency or
other.diskimages != self.diskimages):
return False
return True
def _cloudKwargs(self):
cloud_kwargs = {}
for arg in ['region-name', 'cloud']:
if arg in self.provider:
cloud_kwargs[arg] = self.provider[arg]
return cloud_kwargs
def load(self, config):
if OpenStackProviderConfig.os_client_config is None:
OpenStackProviderConfig.os_client_config = \
os_client_config.OpenStackConfig()
cloud_kwargs = self._cloudKwargs()
self.cloud_config = self.os_client_config.get_one_cloud(**cloud_kwargs)
self.image_type = self.cloud_config.config['image_format']
self.driver.manage_images = True
self.region_name = self.provider.get('region-name')
self.rate = self.provider.get('rate', 1.0)
self.boot_timeout = self.provider.get('boot-timeout', 60)
self.launch_timeout = self.provider.get('launch-timeout', 3600)
self.launch_retries = self.provider.get('launch-retries', 3)
self.clean_floating_ips = self.provider.get('clean-floating-ips')
self.hostname_format = self.provider.get(
'hostname-format',
'{label.name}-{provider.name}-{node.id}'
)
self.image_name_format = self.provider.get(
'image-name-format',
'{image_name}-{timestamp}'
)
self.diskimages = {}
for image in self.provider.get('diskimages', []):
i = ProviderDiskImage()
i.name = image['name']
self.diskimages[i.name] = i
diskimage = config.diskimages[i.name]
diskimage.image_types.add(self.image_type)
i.pause = bool(image.get('pause', False))
i.config_drive = image.get('config-drive', None)
i.connection_type = image.get('connection-type', 'ssh')
# This dict is expanded and used as custom properties when
# the image is uploaded.
i.meta = image.get('meta', {})
# 5 elements, and no key or value can be > 255 chars
# per Nova API rules
if i.meta:
if len(i.meta) > 5 or \
any([len(k) > 255 or len(v) > 255
for k, v in i.meta.items()]):
# soft-fail
# self.log.error("Invalid metadata for %s; ignored"
# % i.name)
i.meta = {}
self.cloud_images = {}
for image in self.provider.get('cloud-images', []):
i = ProviderCloudImage()
i.name = image['name']
i.config_drive = image.get('config-drive', None)
i.image_id = image.get('image-id', None)
i.image_name = image.get('image-name', None)
i.username = image.get('username', None)
i.connection_type = image.get('connection-type', 'ssh')
self.cloud_images[i.name] = i
self.pools = {}
for pool in self.provider.get('pools', []):
pp = ProviderPool()
pp.name = pool['name']
pp.provider = self
self.pools[pp.name] = pp
pp.max_cores = pool.get('max-cores', None)
pp.max_servers = pool.get('max-servers', None)
pp.max_ram = pool.get('max-ram', None)
pp.azs = pool.get('availability-zones')
pp.networks = pool.get('networks', [])
pp.auto_floating_ip = bool(pool.get('auto-floating-ip', True))
pp.labels = {}
for label in pool.get('labels', []):
pl = ProviderLabel()
pl.name = label['name']
pl.pool = pp
pp.labels[pl.name] = pl
diskimage = label.get('diskimage', None)
if diskimage:
pl.diskimage = config.diskimages[diskimage]
else:
pl.diskimage = None
cloud_image_name = label.get('cloud-image', None)
if cloud_image_name:
cloud_image = self.cloud_images.get(cloud_image_name, None)
if not cloud_image:
raise ValueError(
"cloud-image %s does not exist in provider %s"
" but is referenced in label %s" %
(cloud_image_name, self.name, pl.name))
else:
cloud_image = None
pl.cloud_image = cloud_image
pl.min_ram = label.get('min-ram', 0)
pl.flavor_name = label.get('flavor-name', None)
pl.key_name = label.get('key-name')
pl.console_log = label.get('console-log', False)
pl.boot_from_volume = bool(label.get('boot-from-volume',
False))
pl.volume_size = label.get('volume-size', 50)
top_label = config.labels[pl.name]
top_label.pools.append(pp)
def get_schema(self):
provider_diskimage = {
'name': str,
'pause': bool,
'meta': dict,
'config-drive': bool,
'connection-type': str,
}
provider_cloud_images = {
'name': str,
'config-drive': bool,
'connection-type': str,
v.Exclusive('image-id', 'cloud-image-name-or-id'): str,
v.Exclusive('image-name', 'cloud-image-name-or-id'): str,
'username': str,
}
pool_label_main = {
v.Required('name'): str,
v.Exclusive('diskimage', 'label-image'): str,
v.Exclusive('cloud-image', 'label-image'): str,
'min-ram': int,
'flavor-name': str,
'key-name': str,
'console-log': bool,
'boot-from-volume': bool,
'volume-size': int,
}
label_min_ram = v.Schema({v.Required('min-ram'): int}, extra=True)
label_flavor_name = v.Schema({v.Required('flavor-name'): str},
extra=True)
label_diskimage = v.Schema({v.Required('diskimage'): str}, extra=True)
label_cloud_image = v.Schema({v.Required('cloud-image'): str},
extra=True)
pool_label = v.All(pool_label_main,
v.Any(label_min_ram, label_flavor_name),
v.Any(label_diskimage, label_cloud_image))
pool = {
'name': str,
'networks': [str],
'auto-floating-ip': bool,
'max-cores': int,
'max-servers': int,
'max-ram': int,
'labels': [pool_label],
'availability-zones': [str],
}
return v.Schema({
'region-name': str,
v.Required('cloud'): str,
'boot-timeout': int,
'launch-timeout': int,
'launch-retries': int,
'nodepool-id': str,
'rate': float,
'hostname-format': str,
'image-name-format': str,
'clean-floating-ips': bool,
'pools': [pool],
'diskimages': [provider_diskimage],
'cloud-images': [provider_cloud_images],
})

View File

@ -0,0 +1,586 @@
# Copyright (C) 2011-2014 OpenStack Foundation
# Copyright 2017 Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import logging
import math
import pprint
import random
import threading
import time
from nodepool import exceptions
from nodepool import nodeutils as utils
from nodepool import stats
from nodepool import zk
from nodepool.driver import NodeLaunchManager
from nodepool.driver import NodeRequestHandler
from nodepool.driver.openstack.provider import QuotaInformation
class NodeLauncher(threading.Thread, stats.StatsReporter):
log = logging.getLogger("nodepool.driver.openstack."
"NodeLauncher")
def __init__(self, zk, provider_label, provider_manager, requestor,
node, retries):
'''
Initialize the launcher.
:param ZooKeeper zk: A ZooKeeper object.
:param ProviderLabel provider: A config ProviderLabel object.
:param ProviderManager provider_manager: The manager object used to
interact with the selected provider.
:param str requestor: Identifier for the request originator.
:param Node node: The node object.
:param int retries: Number of times to retry failed launches.
'''
threading.Thread.__init__(self, name="NodeLauncher-%s" % node.id)
stats.StatsReporter.__init__(self)
self.log = logging.getLogger("nodepool.NodeLauncher-%s" % node.id)
self._zk = zk
self._label = provider_label
self._provider_manager = provider_manager
self._node = node
self._retries = retries
self._image_name = None
self._requestor = requestor
self._pool = self._label.pool
self._provider_config = self._pool.provider
if self._label.diskimage:
self._diskimage = self._provider_config.diskimages[
self._label.diskimage.name]
else:
self._diskimage = None
def logConsole(self, server_id, hostname):
if not self._label.console_log:
return
console = self._provider_manager.getServerConsole(server_id)
if console:
self.log.debug('Console log from hostname %s:' % hostname)
for line in console.splitlines():
self.log.debug(line.rstrip())
def _launchNode(self):
if self._label.diskimage:
# launch using diskimage
cloud_image = self._zk.getMostRecentImageUpload(
self._diskimage.name, self._provider_config.name)
if not cloud_image:
raise exceptions.LaunchNodepoolException(
"Unable to find current cloud image %s in %s" %
(self._diskimage.name, self._provider_config.name)
)
config_drive = self._diskimage.config_drive
image_external = dict(id=cloud_image.external_id)
image_id = "{path}/{upload_id}".format(
path=self._zk._imageUploadPath(cloud_image.image_name,
cloud_image.build_id,
cloud_image.provider_name),
upload_id=cloud_image.id)
image_name = self._diskimage.name
username = cloud_image.username
connection_type = self._diskimage.connection_type
else:
# launch using unmanaged cloud image
config_drive = self._label.cloud_image.config_drive
image_external = self._label.cloud_image.external
image_id = self._label.cloud_image.name
image_name = self._label.cloud_image.name
username = self._label.cloud_image.username
connection_type = self._label.cloud_image.connection_type
hostname = self._provider_config.hostname_format.format(
label=self._label, provider=self._provider_config, node=self._node
)
self.log.info("Creating server with hostname %s in %s from image %s "
"for node id: %s" % (hostname,
self._provider_config.name,
image_name,
self._node.id))
# NOTE: We store the node ID in the server metadata to use for leaked
# instance detection. We cannot use the external server ID for this
# because that isn't available in ZooKeeper until after the server is
# active, which could cause a race in leak detection.
server = self._provider_manager.createServer(
hostname,
image=image_external,
min_ram=self._label.min_ram,
flavor_name=self._label.flavor_name,
key_name=self._label.key_name,
az=self._node.az,
config_drive=config_drive,
nodepool_node_id=self._node.id,
nodepool_node_label=self._node.type,
nodepool_image_name=image_name,
networks=self._pool.networks,
boot_from_volume=self._label.boot_from_volume,
volume_size=self._label.volume_size)
self._node.external_id = server.id
self._node.hostname = hostname
self._node.image_id = image_id
if username:
self._node.username = username
self._node.connection_type = connection_type
# Checkpoint save the updated node info
self._zk.storeNode(self._node)
self.log.debug("Waiting for server %s for node id: %s" %
(server.id, self._node.id))
server = self._provider_manager.waitForServer(
server, self._provider_config.launch_timeout,
auto_ip=self._pool.auto_floating_ip)
if server.status != 'ACTIVE':
raise exceptions.LaunchStatusException("Server %s for node id: %s "
"status: %s" %
(server.id, self._node.id,
server.status))
# If we didn't specify an AZ, set it to the one chosen by Nova.
# Do this after we are done waiting since AZ may not be available
# immediately after the create request.
if not self._node.az:
self._node.az = server.location.zone
interface_ip = server.interface_ip
if not interface_ip:
self.log.debug(
"Server data for failed IP: %s" % pprint.pformat(
server))
raise exceptions.LaunchNetworkException(
"Unable to find public IP of server")
self._node.interface_ip = interface_ip
self._node.public_ipv4 = server.public_v4
self._node.public_ipv6 = server.public_v6
self._node.private_ipv4 = server.private_v4
# devstack-gate multi-node depends on private_v4 being populated
# with something. On clouds that don't have a private address, use
# the public.
if not self._node.private_ipv4:
self._node.private_ipv4 = server.public_v4
# Checkpoint save the updated node info
self._zk.storeNode(self._node)
self.log.debug(
"Node %s is running [region: %s, az: %s, ip: %s ipv4: %s, "
"ipv6: %s]" %
(self._node.id, self._node.region, self._node.az,
self._node.interface_ip, self._node.public_ipv4,
self._node.public_ipv6))
# Get the SSH public keys for the new node and record in ZooKeeper
try:
self.log.debug("Gathering host keys for node %s", self._node.id)
host_keys = utils.keyscan(
interface_ip, timeout=self._provider_config.boot_timeout)
if not host_keys:
raise exceptions.LaunchKeyscanException(
"Unable to gather host keys")
except exceptions.SSHTimeoutException:
self.logConsole(self._node.external_id, self._node.hostname)
raise
self._node.host_keys = host_keys
self._zk.storeNode(self._node)
def _run(self):
attempts = 1
while attempts <= self._retries:
try:
self._launchNode()
break
except Exception as e:
if attempts <= self._retries:
self.log.exception(
"Launch attempt %d/%d failed for node %s:",
attempts, self._retries, self._node.id)
# If we created an instance, delete it.
if self._node.external_id:
self._provider_manager.cleanupNode(self._node.external_id)
self._provider_manager.waitForNodeCleanup(
self._node.external_id
)
self._node.external_id = None
self._node.public_ipv4 = None
self._node.public_ipv6 = None
self._node.interface_ip = None
self._zk.storeNode(self._node)
if attempts == self._retries:
raise
# Invalidate the quota cache if we encountered a quota error.
if 'quota exceeded' in str(e).lower():
self.log.info("Quota exceeded, invalidating quota cache")
self._provider_manager.invalidateQuotaCache()
attempts += 1
self._node.state = zk.READY
self._zk.storeNode(self._node)
self.log.info("Node id %s is ready", self._node.id)
def run(self):
start_time = time.time()
statsd_key = 'ready'
try:
self._run()
except Exception as e:
self.log.exception("Launch failed for node %s:",
self._node.id)
self._node.state = zk.FAILED
self._zk.storeNode(self._node)
if hasattr(e, 'statsd_key'):
statsd_key = e.statsd_key
else:
statsd_key = 'error.unknown'
try:
dt = int((time.time() - start_time) * 1000)
self.recordLaunchStats(statsd_key, dt, self._image_name,
self._node.provider, self._node.az,
self._requestor)
self.updateNodeStats(self._zk, self._provider_config)
except Exception:
self.log.exception("Exception while reporting stats:")
class OpenStackNodeLaunchManager(NodeLaunchManager):
def launch(self, node):
'''
Launch a new node as described by the supplied Node.
We expect each NodeLauncher thread to directly modify the node that
is passed to it. The poll() method will expect to see the node.state
attribute to change as the node is processed.
:param Node node: The node object.
'''
self._nodes.append(node)
provider_label = self._pool.labels[node.type]
t = NodeLauncher(self._zk, provider_label, self._provider_manager,
self._requestor, node, self._retries)
t.start()
self._threads.append(t)
class OpenStackNodeRequestHandler(NodeRequestHandler):
def __init__(self, pw, request):
super(OpenStackNodeRequestHandler, self).__init__(pw, request)
self.chosen_az = None
self.log = logging.getLogger(
"nodepool.driver.openstack.OpenStackNodeRequestHandler[%s]" %
self.launcher_id)
def _imagesAvailable(self):
'''
Determines if the requested images are available for this provider.
ZooKeeper is queried for an image uploaded to the provider that is
in the READY state.
:returns: True if it is available, False otherwise.
'''
for label in self.request.node_types:
if self.pool.labels[label].cloud_image:
if not self.manager.labelReady(self.pool.labels[label]):
return False
else:
if not self.zk.getMostRecentImageUpload(
self.pool.labels[label].diskimage.name,
self.provider.name):
return False
return True
def _invalidNodeTypes(self):
'''
Return any node types that are invalid for this provider.
:returns: A list of node type names that are invalid, or an empty
list if all are valid.
'''
invalid = []
for ntype in self.request.node_types:
if ntype not in self.pool.labels:
invalid.append(ntype)
return invalid
def _hasRemainingQuota(self, ntype):
"""
Checks if the predicted quota is enough for an additional node of type
ntype.
:param ntype: node type for the quota check
:return: True if there is enough quota, False otherwise
"""
needed_quota = self.manager.quotaNeededByNodeType(ntype, self.pool)
# Calculate remaining quota which is calculated as:
# quota = <total nodepool quota> - <used quota> - <quota for node>
cloud_quota = self.manager.estimatedNodepoolQuota()
cloud_quota.subtract(self.manager.estimatedNodepoolQuotaUsed(self.zk))
cloud_quota.subtract(needed_quota)
self.log.debug("Predicted remaining tenant quota: %s", cloud_quota)
if not cloud_quota.non_negative():
return False
# Now calculate pool specific quota. Values indicating no quota default
# to math.inf representing infinity that can be calculated with.
pool_quota = QuotaInformation(cores=self.pool.max_cores,
instances=self.pool.max_servers,
ram=self.pool.max_ram,
default=math.inf)
pool_quota.subtract(
self.manager.estimatedNodepoolQuotaUsed(self.zk, self.pool))
pool_quota.subtract(needed_quota)
self.log.debug("Predicted remaining pool quota: %s", pool_quota)
return pool_quota.non_negative()
def _hasProviderQuota(self, node_types):
"""
Checks if a provider has enough quota to handle a list of nodes.
This does not take our currently existing nodes into account.
:param node_types: list of node types to check
:return: True if the node list fits into the provider, False otherwise
"""
needed_quota = QuotaInformation()
for ntype in node_types:
needed_quota.add(
self.manager.quotaNeededByNodeType(ntype, self.pool))
cloud_quota = self.manager.estimatedNodepoolQuota()
cloud_quota.subtract(needed_quota)
if not cloud_quota.non_negative():
return False
# Now calculate pool specific quota. Values indicating no quota default
# to math.inf representing infinity that can be calculated with.
pool_quota = QuotaInformation(cores=self.pool.max_cores,
instances=self.pool.max_servers,
ram=self.pool.max_ram,
default=math.inf)
pool_quota.subtract(needed_quota)
return pool_quota.non_negative()
def _waitForNodeSet(self):
'''
Fill node set for the request.
Obtain nodes for the request, pausing all new request handling for
this provider until the node set can be filled.
We attempt to group the node set within the same provider availability
zone. For this to work properly, the provider entry in the nodepool
config must list the availability zones. Otherwise, new nodes will be
put in random AZs at nova's whim. The exception being if there is an
existing node in the READY state that we can select for this node set.
Its AZ will then be used for new nodes, as well as any other READY
nodes.
note:: This code is a bit racey in its calculation of the number of
nodes in use for quota purposes. It is possible for multiple
launchers to be doing this calculation at the same time. Since we
currently have no locking mechanism around the "in use"
calculation, if we are at the edge of the quota, one of the
launchers could attempt to launch a new node after the other
launcher has already started doing so. This would cause an
expected failure from the underlying library, which is ok for now.
'''
if not self.launch_manager:
self.launch_manager = OpenStackNodeLaunchManager(
self.zk, self.pool, self.manager,
self.request.requestor, retries=self.provider.launch_retries)
# Since this code can be called more than once for the same request,
# we need to calculate the difference between our current node set
# and what was requested. We cannot use set operations here since a
# node type can appear more than once in the requested types.
saved_types = collections.Counter([n.type for n in self.nodeset])
requested_types = collections.Counter(self.request.node_types)
diff = requested_types - saved_types
needed_types = list(diff.elements())
ready_nodes = self.zk.getReadyNodesOfTypes(needed_types)
for ntype in needed_types:
# First try to grab from the list of already available nodes.
got_a_node = False
if self.request.reuse and ntype in ready_nodes:
for node in ready_nodes[ntype]:
# Only interested in nodes from this provider and
# pool, and within the selected AZ.
if node.provider != self.provider.name:
continue
if node.pool != self.pool.name:
continue
if self.chosen_az and node.az != self.chosen_az:
continue
try:
self.zk.lockNode(node, blocking=False)
except exceptions.ZKLockException:
# It's already locked so skip it.
continue
else:
if self.paused:
self.log.debug("Unpaused request %s", self.request)
self.paused = False
self.log.debug(
"Locked existing node %s for request %s",
node.id, self.request.id)
got_a_node = True
node.allocated_to = self.request.id
self.zk.storeNode(node)
self.nodeset.append(node)
# If we haven't already chosen an AZ, select the
# AZ from this ready node. This will cause new nodes
# to share this AZ, as well.
if not self.chosen_az and node.az:
self.chosen_az = node.az
break
# Could not grab an existing node, so launch a new one.
if not got_a_node:
# Select grouping AZ if we didn't set AZ from a selected,
# pre-existing node
if not self.chosen_az:
self.chosen_az = random.choice(
self.pool.azs or self.manager.getAZs())
# If we calculate that we're at capacity, pause until nodes
# are released by Zuul and removed by the DeletedNodeWorker.
if not self._hasRemainingQuota(ntype):
if not self.paused:
self.log.debug(
"Pausing request handling to satisfy request %s",
self.request)
self.paused = True
return
if self.paused:
self.log.debug("Unpaused request %s", self.request)
self.paused = False
node = zk.Node()
node.state = zk.INIT
node.type = ntype
node.provider = self.provider.name
node.pool = self.pool.name
node.az = self.chosen_az
node.cloud = self.provider.cloud_config.name
node.region = self.provider.region_name
node.launcher = self.launcher_id
node.allocated_to = self.request.id
# Note: It should be safe (i.e., no race) to lock the node
# *after* it is stored since nodes in INIT state are not
# locked anywhere.
self.zk.storeNode(node)
self.zk.lockNode(node, blocking=False)
self.log.debug("Locked building node %s for request %s",
node.id, self.request.id)
# Set state AFTER lock so that it isn't accidentally cleaned
# up (unlocked BUILDING nodes will be deleted).
node.state = zk.BUILDING
self.zk.storeNode(node)
self.nodeset.append(node)
self.launch_manager.launch(node)
def run_handler(self):
'''
Main body for the OpenStackNodeRequestHandler.
'''
self._setFromPoolWorker()
if self.provider is None or self.pool is None:
# If the config changed out from underneath us, we could now be
# an invalid provider and should stop handling this request.
raise Exception("Provider configuration missing")
declined_reasons = []
invalid_types = self._invalidNodeTypes()
if invalid_types:
declined_reasons.append('node type(s) [%s] not available' %
','.join(invalid_types))
elif not self._imagesAvailable():
declined_reasons.append('images are not available')
elif (self.pool.max_servers == 0 or
not self._hasProviderQuota(self.request.node_types)):
declined_reasons.append('it would exceed quota')
# TODO(tobiash): Maybe also calculate the quota prediction here and
# backoff for some seconds if the used quota would be exceeded?
# This way we could give another (free) provider the chance to take
# this request earlier.
# For min-ready requests, which do not re-use READY nodes, let's
# decline if this provider is already at capacity. Otherwise, we
# could end up wedged until another request frees up a node.
if self.request.requestor == "NodePool:min-ready":
current_count = self.zk.countPoolNodes(self.provider.name,
self.pool.name)
# Use >= because dynamic config changes to max-servers can leave
# us with more than max-servers.
if current_count >= self.pool.max_servers:
declined_reasons.append("provider cannot satisify min-ready")
if declined_reasons:
self.log.debug("Declining node request %s because %s",
self.request.id, ', '.join(declined_reasons))
self.decline_request()
self.unlockNodeSet(clear_allocation=True)
# If conditions have changed for a paused request to now cause us
# to decline it, we need to unpause so we don't keep trying it
if self.paused:
self.paused = False
self.zk.storeNodeRequest(self.request)
self.zk.unlockNodeRequest(self.request)
self.done = True
return
if self.paused:
self.log.debug("Retrying node request %s", self.request.id)
else:
self.log.debug("Accepting node request %s", self.request.id)
self.request.state = zk.PENDING
self.zk.storeNodeRequest(self.request)
self._waitForNodeSet()

View File

@ -0,0 +1,540 @@
# Copyright (C) 2011-2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import logging
from contextlib import contextmanager
import math
import operator
import time
import shade
from nodepool import exceptions
from nodepool.driver import Provider
from nodepool.nodeutils import iterate_timeout
from nodepool.task_manager import ManagerStoppedException
from nodepool.task_manager import TaskManager
IPS_LIST_AGE = 5 # How long to keep a cached copy of the ip list
MAX_QUOTA_AGE = 5 * 60 # How long to keep the quota information cached
@contextmanager
def shade_inner_exceptions():
try:
yield
except shade.OpenStackCloudException as e:
e.log_error()
raise
class QuotaInformation:
def __init__(self, cores=None, instances=None, ram=None, default=0):
'''
Initializes the quota information with some values. None values will
be initialized with default which will be typically 0 or math.inf
indicating an infinite limit.
:param cores:
:param instances:
:param ram:
:param default:
'''
self.quota = {
'compute': {
'cores': self._get_default(cores, default),
'instances': self._get_default(instances, default),
'ram': self._get_default(ram, default),
}
}
@staticmethod
def construct_from_flavor(flavor):
return QuotaInformation(instances=1,
cores=flavor.vcpus,
ram=flavor.ram)
@staticmethod
def construct_from_limits(limits):
def bound_value(value):
if value == -1:
return math.inf
return value
return QuotaInformation(
instances=bound_value(limits.max_total_instances),
cores=bound_value(limits.max_total_cores),
ram=bound_value(limits.max_total_ram_size))
def _get_default(self, value, default):
return value if value is not None else default
def _add_subtract(self, other, add=True):
for category in self.quota.keys():
for resource in self.quota[category].keys():
second_value = other.quota.get(category, {}).get(resource, 0)
if add:
self.quota[category][resource] += second_value
else:
self.quota[category][resource] -= second_value
def subtract(self, other):
self._add_subtract(other, add=False)
def add(self, other):
self._add_subtract(other, True)
def non_negative(self):
for key_i, category in self.quota.items():
for resource, value in category.items():
if value < 0:
return False
return True
def __str__(self):
return str(self.quota)
class OpenStackProvider(Provider):
log = logging.getLogger("nodepool.driver.openstack.OpenStackProvider")
def __init__(self, provider, use_taskmanager):
self.provider = provider
self._images = {}
self._networks = {}
self.__flavors = {}
self.__azs = None
self._use_taskmanager = use_taskmanager
self._taskmanager = None
self._current_nodepool_quota = None
def start(self):
if self._use_taskmanager:
self._taskmanager = TaskManager(None, self.provider.name,
self.provider.rate)
self._taskmanager.start()
self.resetClient()
def stop(self):
if self._taskmanager:
self._taskmanager.stop()
def join(self):
if self._taskmanager:
self._taskmanager.join()
@property
def _flavors(self):
if not self.__flavors:
self.__flavors = self._getFlavors()
return self.__flavors
def _getClient(self):
if self._use_taskmanager:
manager = self._taskmanager
else:
manager = None
return shade.OpenStackCloud(
cloud_config=self.provider.cloud_config,
manager=manager,
**self.provider.cloud_config.config)
def quotaNeededByNodeType(self, ntype, pool):
provider_label = pool.labels[ntype]
flavor = self.findFlavor(provider_label.flavor_name,
provider_label.min_ram)
return QuotaInformation.construct_from_flavor(flavor)
def estimatedNodepoolQuota(self):
'''
Determine how much quota is available for nodepool managed resources.
This needs to take into account the quota of the tenant, resources
used outside of nodepool and the currently used resources by nodepool,
max settings in nodepool config. This is cached for MAX_QUOTA_AGE
seconds.
:return: Total amount of resources available which is currently
available to nodepool including currently existing nodes.
'''
if self._current_nodepool_quota:
now = time.time()
if now < self._current_nodepool_quota['timestamp'] + MAX_QUOTA_AGE:
return copy.deepcopy(self._current_nodepool_quota['quota'])
with shade_inner_exceptions():
limits = self._client.get_compute_limits()
# This is initialized with the full tenant quota and later becomes
# the quota available for nodepool.
nodepool_quota = QuotaInformation.construct_from_limits(limits)
self.log.debug("Provider quota for %s: %s",
self.provider.name, nodepool_quota)
# Subtract the unmanaged quota usage from nodepool_max
# to get the quota available for us.
nodepool_quota.subtract(self.unmanagedQuotaUsed())
self._current_nodepool_quota = {
'quota': nodepool_quota,
'timestamp': time.time()
}
self.log.debug("Available quota for %s: %s",
self.provider.name, nodepool_quota)
return copy.deepcopy(nodepool_quota)
def invalidateQuotaCache(self):
self._current_nodepool_quota['timestamp'] = 0
def estimatedNodepoolQuotaUsed(self, zk, pool=None):
'''
Sums up the quota used (or planned) currently by nodepool. If pool is
given it is filtered by the pool.
:param zk: the object to access zookeeper
:param pool: If given, filtered by the pool.
:return: Calculated quota in use by nodepool
'''
used_quota = QuotaInformation()
for node in zk.nodeIterator():
if node.provider == self.provider.name:
if pool and not node.pool == pool.name:
continue
provider_pool = self.provider.pools.get(node.pool)
if not provider_pool:
self.log.warning(
"Cannot find provider pool for node %s" % node)
# This node is in a funny state we log it for debugging
# but move on and don't account it as we can't properly
# calculate its cost without pool info.
continue
node_resources = self.quotaNeededByNodeType(
node.type, provider_pool)
used_quota.add(node_resources)
return used_quota
def unmanagedQuotaUsed(self):
'''
Sums up the quota used by servers unmanaged by nodepool.
:return: Calculated quota in use by unmanaged servers
'''
flavors = self.listFlavorsById()
used_quota = QuotaInformation()
for server in self.listNodes():
meta = server.get('metadata', {})
nodepool_provider_name = meta.get('nodepool_provider_name')
if nodepool_provider_name and \
nodepool_provider_name == self.provider.name:
# This provider (regardless of the launcher) owns this server
# so it must not be accounted for unmanaged quota.
continue
flavor = flavors.get(server.flavor.id)
used_quota.add(QuotaInformation.construct_from_flavor(flavor))
return used_quota
def resetClient(self):
self._client = self._getClient()
if self._use_taskmanager:
self._taskmanager.setClient(self._client)
def _getFlavors(self):
flavors = self.listFlavors()
flavors.sort(key=operator.itemgetter('ram'))
return flavors
# TODO(mordred): These next three methods duplicate logic that is in
# shade, but we can't defer to shade until we're happy
# with using shade's resource caching facility. We have
# not yet proven that to our satisfaction, but if/when
# we do, these should be able to go away.
def _findFlavorByName(self, flavor_name):
for f in self._flavors:
if flavor_name in (f['name'], f['id']):
return f
raise Exception("Unable to find flavor: %s" % flavor_name)
def _findFlavorByRam(self, min_ram, flavor_name):
for f in self._flavors:
if (f['ram'] >= min_ram
and (not flavor_name or flavor_name in f['name'])):
return f
raise Exception("Unable to find flavor with min ram: %s" % min_ram)
def findFlavor(self, flavor_name, min_ram):
# Note: this will throw an error if the provider is offline
# but all the callers are in threads (they call in via CreateServer) so
# the mainloop won't be affected.
if min_ram:
return self._findFlavorByRam(min_ram, flavor_name)
else:
return self._findFlavorByName(flavor_name)
def findImage(self, name):
if name in self._images:
return self._images[name]
with shade_inner_exceptions():
image = self._client.get_image(name)
self._images[name] = image
return image
def findNetwork(self, name):
if name in self._networks:
return self._networks[name]
with shade_inner_exceptions():
network = self._client.get_network(name)
self._networks[name] = network
return network
def deleteImage(self, name):
if name in self._images:
del self._images[name]
with shade_inner_exceptions():
return self._client.delete_image(name)
def createServer(self, name, image,
flavor_name=None, min_ram=None,
az=None, key_name=None, config_drive=True,
nodepool_node_id=None, nodepool_node_label=None,
nodepool_image_name=None,
networks=None, boot_from_volume=False, volume_size=50):
if not networks:
networks = []
if not isinstance(image, dict):
# if it's a dict, we already have the cloud id. If it's not,
# we don't know if it's name or ID so need to look it up
image = self.findImage(image)
flavor = self.findFlavor(flavor_name=flavor_name, min_ram=min_ram)
create_args = dict(name=name,
image=image,
flavor=flavor,
config_drive=config_drive)
if boot_from_volume:
create_args['boot_from_volume'] = boot_from_volume
create_args['volume_size'] = volume_size
# NOTE(pabelanger): Always cleanup volumes when we delete a server.
create_args['terminate_volume'] = True
if key_name:
create_args['key_name'] = key_name
if az:
create_args['availability_zone'] = az
nics = []
for network in networks:
net_id = self.findNetwork(network)['id']
nics.append({'net-id': net_id})
if nics:
create_args['nics'] = nics
# Put provider.name and image_name in as groups so that ansible
# inventory can auto-create groups for us based on each of those
# qualities
# Also list each of those values directly so that non-ansible
# consumption programs don't need to play a game of knowing that
# groups[0] is the image name or anything silly like that.
groups_list = [self.provider.name]
if nodepool_image_name:
groups_list.append(nodepool_image_name)
if nodepool_node_label:
groups_list.append(nodepool_node_label)
meta = dict(
groups=",".join(groups_list),
nodepool_provider_name=self.provider.name,
)
if nodepool_node_id:
meta['nodepool_node_id'] = nodepool_node_id
if nodepool_image_name:
meta['nodepool_image_name'] = nodepool_image_name
if nodepool_node_label:
meta['nodepool_node_label'] = nodepool_node_label
create_args['meta'] = meta
with shade_inner_exceptions():
return self._client.create_server(wait=False, **create_args)
def getServer(self, server_id):
with shade_inner_exceptions():
return self._client.get_server(server_id)
def getServerConsole(self, server_id):
try:
with shade_inner_exceptions():
return self._client.get_server_console(server_id)
except shade.OpenStackCloudException:
return None
def waitForServer(self, server, timeout=3600, auto_ip=True):
with shade_inner_exceptions():
return self._client.wait_for_server(
server=server, auto_ip=auto_ip,
reuse=False, timeout=timeout)
def waitForNodeCleanup(self, server_id, timeout=600):
for count in iterate_timeout(
timeout, exceptions.ServerDeleteException,
"server %s deletion" % server_id):
if not self.getServer(server_id):
return
def waitForImage(self, image_id, timeout=3600):
last_status = None
for count in iterate_timeout(
timeout, exceptions.ImageCreateException, "image creation"):
try:
image = self.getImage(image_id)
except exceptions.NotFound:
continue
except ManagerStoppedException:
raise
except Exception:
self.log.exception('Unable to list images while waiting for '
'%s will retry' % (image_id))
continue
# shade returns None when not found
if not image:
continue
status = image['status']
if (last_status != status):
self.log.debug(
'Status of image in {provider} {id}: {status}'.format(
provider=self.provider.name,
id=image_id,
status=status))
if status == 'ERROR' and 'fault' in image:
self.log.debug(
'ERROR in {provider} on {id}: {resason}'.format(
provider=self.provider.name,
id=image_id,
resason=image['fault']['message']))
last_status = status
# Glance client returns lower case statuses - but let's be sure
if status.lower() in ['active', 'error']:
return image
def createImage(self, server, image_name, meta):
with shade_inner_exceptions():
return self._client.create_image_snapshot(
image_name, server, **meta)
def getImage(self, image_id):
with shade_inner_exceptions():
return self._client.get_image(image_id)
def labelReady(self, label):
if not label.cloud_image:
return False
image = self.getImage(label.cloud_image.external)
if not image:
self.log.warning(
"Provider %s is configured to use %s as the"
" cloud-image for label %s and that"
" cloud-image could not be found in the"
" cloud." % (self.provider.name,
label.cloud_image.external_name,
label.name))
return False
return True
def uploadImage(self, image_name, filename, image_type=None, meta=None,
md5=None, sha256=None):
# configure glance and upload image. Note the meta flags
# are provided as custom glance properties
# NOTE: we have wait=True set here. This is not how we normally
# do things in nodepool, preferring to poll ourselves thankyouverymuch.
# However - two things to note:
# - PUT has no aysnc mechanism, so we have to handle it anyway
# - v2 w/task waiting is very strange and complex - but we have to
# block for our v1 clouds anyway, so we might as well
# have the interface be the same and treat faking-out
# a shade-level fake-async interface later
if not meta:
meta = {}
if image_type:
meta['disk_format'] = image_type
with shade_inner_exceptions():
image = self._client.create_image(
name=image_name,
filename=filename,
is_public=False,
wait=True,
md5=md5,
sha256=sha256,
**meta)
return image.id
def listImages(self):
with shade_inner_exceptions():
return self._client.list_images()
def listFlavors(self):
with shade_inner_exceptions():
return self._client.list_flavors(get_extra=False)
def listFlavorsById(self):
with shade_inner_exceptions():
flavors = {}
for flavor in self._client.list_flavors(get_extra=False):
flavors[flavor.id] = flavor
return flavors
def listNodes(self):
# shade list_servers carries the nodepool server list caching logic
with shade_inner_exceptions():
return self._client.list_servers()
def deleteServer(self, server_id):
with shade_inner_exceptions():
return self._client.delete_server(server_id, delete_ips=True)
def cleanupNode(self, server_id):
server = self.getServer(server_id)
if not server:
raise exceptions.NotFound()
self.log.debug('Deleting server %s' % server_id)
self.deleteServer(server_id)
def cleanupLeakedResources(self):
if self.provider.clean_floating_ips:
with shade_inner_exceptions():
self._client.delete_unattached_floating_ips()
def getAZs(self):
if self.__azs is None:
self.__azs = self._client.list_availability_zone_names()
if not self.__azs:
# If there are no zones, return a list containing None so that
# random.choice can pick None and pass that to Nova. If this
# feels dirty, please direct your ire to policy.json and the
# ability to turn off random portions of the OpenStack API.
self.__azs = [None]
return self.__azs

22
nodepool/exceptions.py Normal file → Executable file
View File

@ -13,6 +13,26 @@
# under the License.
class NotFound(Exception):
pass
class LaunchNodepoolException(Exception):
statsd_key = 'error.nodepool'
class LaunchStatusException(Exception):
statsd_key = 'error.status'
class LaunchNetworkException(Exception):
statsd_key = 'error.network'
class LaunchKeyscanException(Exception):
statsd_key = 'error.keyscan'
class BuilderError(RuntimeError):
pass
@ -44,8 +64,10 @@ class ServerDeleteException(TimeoutException):
class ImageCreateException(TimeoutException):
statsd_key = 'error.imagetimeout'
class ZKException(Exception):
pass
class ZKLockException(ZKException):
pass

View File

@ -1,145 +0,0 @@
#!/usr/bin/env python
# Copyright (C) 2011-2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
import jenkins
import fakeprovider
from task_manager import Task, TaskManager
class CreateNodeTask(Task):
def main(self, jenkins):
if 'credentials_id' in self.args:
launcher_params = {'port': 22,
'credentialsId': self.args['credentials_id'],
'sshHostKeyVerificationStrategy':
{'stapler-class':
('hudson.plugins.sshslaves.verifiers.'
'NonVerifyingKeyVerificationStrategy')},
'host': self.args['host']}
else:
launcher_params = {'port': 22,
'username': self.args['username'],
'privatekey': self.args['private_key'],
'sshHostKeyVerificationStrategy':
{'stapler-class':
('hudson.plugins.sshslaves.verifiers.'
'NonVerifyingKeyVerificationStrategy')},
'host': self.args['host']}
args = dict(
name=self.args['name'],
numExecutors=self.args['executors'],
nodeDescription=self.args['description'],
remoteFS=self.args['root'],
exclusive=True,
launcher='hudson.plugins.sshslaves.SSHLauncher',
launcher_params=launcher_params)
if self.args['labels']:
args['labels'] = self.args['labels']
try:
jenkins.create_node(**args)
except jenkins.JenkinsException as e:
if 'already exists' in str(e):
pass
else:
raise
class NodeExistsTask(Task):
def main(self, jenkins):
return jenkins.node_exists(self.args['name'])
class DeleteNodeTask(Task):
def main(self, jenkins):
return jenkins.delete_node(self.args['name'])
class GetNodeConfigTask(Task):
def main(self, jenkins):
return jenkins.get_node_config(self.args['name'])
class SetNodeConfigTask(Task):
def main(self, jenkins):
jenkins.reconfig_node(self.args['name'], self.args['config'])
class StartBuildTask(Task):
def main(self, jenkins):
jenkins.build_job(self.args['name'],
parameters=self.args['params'])
class GetInfoTask(Task):
def main(self, jenkins):
return jenkins.get_info()
class JenkinsManager(TaskManager):
log = logging.getLogger("nodepool.JenkinsManager")
def __init__(self, target):
super(JenkinsManager, self).__init__(None, target.name, target.rate)
self.target = target
self._client = self._getClient()
def _getClient(self):
if self.target.jenkins_apikey == 'fake':
return fakeprovider.FakeJenkins(self.target.jenkins_user)
return jenkins.Jenkins(self.target.jenkins_url,
self.target.jenkins_user,
self.target.jenkins_apikey)
def createNode(self, name, host, description, executors, root, labels=[],
credentials_id=None, username=None, private_key=None):
args = dict(name=name, host=host, description=description,
labels=labels, executors=executors, root=root)
if credentials_id:
args['credentials_id'] = credentials_id
else:
args['username'] = username
args['private_key'] = private_key
return self.submitTask(CreateNodeTask(**args))
def nodeExists(self, name):
return self.submitTask(NodeExistsTask(name=name))
def deleteNode(self, name):
return self.submitTask(DeleteNodeTask(name=name))
LABEL_RE = re.compile(r'<label>(.*)</label>')
def relabelNode(self, name, labels):
config = self.submitTask(GetNodeConfigTask(name=name))
old = None
m = self.LABEL_RE.search(config)
if m:
old = m.group(1)
config = self.LABEL_RE.sub('<label>%s</label>' % ' '.join(labels),
config)
self.submitTask(SetNodeConfigTask(name=name, config=config))
return old
def startBuild(self, name, params):
self.submitTask(StartBuildTask(name=name, params=params))
def getInfo(self):
return self._client.get_info()

View File

@ -1,78 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
import uuid
import threading
import gear
class WatchableJob(gear.Job):
def __init__(self, *args, **kwargs):
super(WatchableJob, self).__init__(*args, **kwargs)
self._completion_handlers = []
self._event = threading.Event()
def _handleCompletion(self, mode=None):
self._event.set()
for handler in self._completion_handlers:
handler(self)
def addCompletionHandler(self, handler):
self._completion_handlers.append(handler)
def onCompleted(self):
self._handleCompletion()
def onFailed(self):
self._handleCompletion()
def onDisconnect(self):
self._handleCompletion()
def onWorkStatus(self):
pass
def waitForCompletion(self, timeout=None):
return self._event.wait(timeout)
class NodepoolJob(WatchableJob):
def __init__(self, job_name, job_data_obj, nodepool):
job_uuid = str(uuid.uuid4().hex)
job_data = json.dumps(job_data_obj)
super(NodepoolJob, self).__init__(job_name, job_data, job_uuid)
self.nodepool = nodepool
def getDbSession(self):
return self.nodepool.getDB().getSession()
class NodeAssignmentJob(NodepoolJob):
log = logging.getLogger("jobs.NodeAssignmentJob")
def __init__(self, node_id, target_name, data, nodepool):
self.node_id = node_id
job_name = 'node_assign:%s' % target_name
super(NodeAssignmentJob, self).__init__(job_name, data, nodepool)
class NodeRevokeJob(NodepoolJob):
log = logging.getLogger("jobs.NodeRevokeJob")
def __init__(self, node_id, manager_name, data, nodepool):
self.node_id = node_id
job_name = 'node_revoke:%s' % manager_name
super(NodeRevokeJob, self).__init__(job_name, data, nodepool)

955
nodepool/launcher.py Executable file
View File

@ -0,0 +1,955 @@
#!/usr/bin/env python
# Copyright (C) 2011-2014 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import os.path
import socket
import threading
import time
from nodepool import exceptions
from nodepool import provider_manager
from nodepool import stats
from nodepool import config as nodepool_config
from nodepool import zk
from nodepool.driver.fake.handler import FakeNodeRequestHandler
from nodepool.driver.openstack.handler import OpenStackNodeRequestHandler
MINS = 60
HOURS = 60 * MINS
# Interval between checking if new servers needed
WATERMARK_SLEEP = 10
# When to delete node request lock znodes
LOCK_CLEANUP = 8 * HOURS
# How long to wait between checks for ZooKeeper connectivity if it disappears.
SUSPEND_WAIT_TIME = 30
class NodeDeleter(threading.Thread, stats.StatsReporter):
log = logging.getLogger("nodepool.NodeDeleter")
def __init__(self, zk, provider_manager, node):
threading.Thread.__init__(self, name='NodeDeleter for %s %s' %
(node.provider, node.external_id))
stats.StatsReporter.__init__(self)
self._zk = zk
self._provider_manager = provider_manager
self._node = node
@staticmethod
def delete(zk_conn, manager, node, node_exists=True):
'''
Delete a server instance and ZooKeeper node.
This is a class method so we can support instantaneous deletes.
:param ZooKeeper zk_conn: A ZooKeeper object to use.
:param ProviderManager provider_manager: ProviderManager object to
use fo deleting the server.
:param Node node: A locked Node object that describes the server to
delete.
:param bool node_exists: True if the node actually exists in ZooKeeper.
An artifical Node object can be passed that can be used to delete
a leaked instance.
'''
try:
node.state = zk.DELETING
zk_conn.storeNode(node)
if node.external_id:
manager.cleanupNode(node.external_id)
manager.waitForNodeCleanup(node.external_id)
except exceptions.NotFound:
NodeDeleter.log.info("Instance %s not found in provider %s",
node.external_id, node.provider)
except Exception:
NodeDeleter.log.exception(
"Exception deleting instance %s from %s:",
node.external_id, node.provider)
# Don't delete the ZK node in this case, but do unlock it
if node_exists:
zk_conn.unlockNode(node)
return
if node_exists:
NodeDeleter.log.info(
"Deleting ZK node id=%s, state=%s, external_id=%s",
node.id, node.state, node.external_id)
# This also effectively releases the lock
zk_conn.deleteNode(node)
def run(self):
# Since leaked instances won't have an actual node in ZooKeeper,
# we need to check 'id' to see if this is an artificial Node.
if self._node.id is None:
node_exists = False
else:
node_exists = True
self.delete(self._zk, self._provider_manager, self._node, node_exists)
try:
self.updateNodeStats(self._zk, self._provider_manager.provider)
except Exception:
self.log.exception("Exception while reporting stats:")
class PoolWorker(threading.Thread):
'''
Class that manages node requests for a single provider pool.
The NodePool thread will instantiate a class of this type for each
provider pool found in the nodepool configuration file. If the
pool or provider to which this thread is assigned is removed from
the configuration file, then that will be recognized and this
thread will shut itself down.
'''
def __init__(self, nodepool, provider_name, pool_name):
threading.Thread.__init__(
self, name='PoolWorker.%s-%s' % (provider_name, pool_name)
)
self.log = logging.getLogger("nodepool.%s" % self.name)
self.nodepool = nodepool
self.provider_name = provider_name
self.pool_name = pool_name
self.running = False
self.paused_handler = None
self.request_handlers = []
self.watermark_sleep = nodepool.watermark_sleep
self.zk = self.getZK()
self.launcher_id = "%s-%s-%s" % (socket.gethostname(),
os.getpid(),
self.name)
# ---------------------------------------------------------------
# Private methods
# ---------------------------------------------------------------
def _get_node_request_handler(self, provider, request):
if provider.driver.name == 'fake':
return FakeNodeRequestHandler(self, request)
elif provider.driver.name == 'openstack':
return OpenStackNodeRequestHandler(self, request)
else:
raise RuntimeError("Unknown provider driver %s" % provider.driver)
def _assignHandlers(self):
'''
For each request we can grab, create a NodeRequestHandler for it.
The NodeRequestHandler object will kick off any threads needed to
satisfy the request, then return. We will need to periodically poll
the handler for completion.
'''
provider = self.getProviderConfig()
if not provider:
self.log.info("Missing config. Deleted provider?")
return
if provider.max_concurrency == 0:
return
for req_id in self.zk.getNodeRequests():
if self.paused_handler:
return
# Get active threads for all pools for this provider
active_threads = sum([
w.activeThreads() for
w in self.nodepool.getPoolWorkers(self.provider_name)
])
# Short-circuit for limited request handling
if (provider.max_concurrency > 0 and
active_threads >= provider.max_concurrency):
self.log.debug("Request handling limited: %s active threads ",
"with max concurrency of %s",
active_threads, provider.max_concurrency)
return
req = self.zk.getNodeRequest(req_id)
if not req:
continue
# Only interested in unhandled requests
if req.state != zk.REQUESTED:
continue
# Skip it if we've already declined
if self.launcher_id in req.declined_by:
continue
try:
self.zk.lockNodeRequest(req, blocking=False)
except exceptions.ZKLockException:
continue
# Make sure the state didn't change on us after getting the lock
req2 = self.zk.getNodeRequest(req_id)
if req2 and req2.state != zk.REQUESTED:
self.zk.unlockNodeRequest(req)
continue
# Got a lock, so assign it
self.log.info("Assigning node request %s" % req)
rh = self._get_node_request_handler(provider, req)
rh.run()
if rh.paused:
self.paused_handler = rh
self.request_handlers.append(rh)
def _removeCompletedHandlers(self):
'''
Poll handlers to see which have completed.
'''
active_handlers = []
for r in self.request_handlers:
try:
if not r.poll():
active_handlers.append(r)
else:
self.log.debug("Removing handler for request %s",
r.request.id)
except Exception:
# If we fail to poll a request handler log it but move on
# and process the other handlers. We keep this handler around
# and will try again later.
self.log.exception("Error polling request handler for "
"request %s", r.request.id)
active_handlers.append(r)
self.request_handlers = active_handlers
active_reqs = [r.request.id for r in self.request_handlers]
self.log.debug("Active requests: %s", active_reqs)
# ---------------------------------------------------------------
# Public methods
# ---------------------------------------------------------------
def activeThreads(self):
'''
Return the number of alive threads in use by this provider.
This is an approximate, top-end number for alive threads, since some
threads obviously may have finished by the time we finish the
calculation.
'''
total = 0
for r in self.request_handlers:
total += r.alive_thread_count
return total
def getZK(self):
return self.nodepool.getZK()
def getProviderConfig(self):
return self.nodepool.config.providers.get(self.provider_name)
def getPoolConfig(self):
provider = self.getProviderConfig()
if provider:
return provider.pools[self.pool_name]
else:
return None
def getProviderManager(self):
return self.nodepool.getProviderManager(self.provider_name)
def run(self):
self.running = True
while self.running:
# Don't do work if we've lost communication with the ZK cluster
did_suspend = False
while self.zk and (self.zk.suspended or self.zk.lost):
did_suspend = True
self.log.info("ZooKeeper suspended. Waiting")
time.sleep(SUSPEND_WAIT_TIME)
if did_suspend:
self.log.info("ZooKeeper available. Resuming")
# Make sure we're always registered with ZK
self.zk.registerLauncher(self.launcher_id)
try:
if not self.paused_handler:
self._assignHandlers()
else:
# If we are paused, one request handler could not
# satisify its assigned request, so give it
# another shot. Unpause ourselves if it completed.
self.paused_handler.run()
if not self.paused_handler.paused:
self.paused_handler = None
self._removeCompletedHandlers()
except Exception:
self.log.exception("Error in PoolWorker:")
time.sleep(self.watermark_sleep)
# Cleanup on exit
if self.paused_handler:
self.paused_handler.unlockNodeSet(clear_allocation=True)
def stop(self):
'''
Shutdown the PoolWorker thread.
Do not wait for the request handlers to finish. Any nodes
that are in the process of launching will be cleaned up on a
restart. They will be unlocked and BUILDING in ZooKeeper.
'''
self.log.info("%s received stop" % self.name)
self.running = False
class BaseCleanupWorker(threading.Thread):
def __init__(self, nodepool, interval, name):
threading.Thread.__init__(self, name=name)
self._nodepool = nodepool
self._interval = interval
self._running = False
def _deleteInstance(self, node):
'''
Delete an instance from a provider.
A thread will be spawned to delete the actual instance from the
provider.
:param Node node: A Node object representing the instance to delete.
'''
self.log.info("Deleting %s instance %s from %s",
node.state, node.external_id, node.provider)
try:
t = NodeDeleter(
self._nodepool.getZK(),
self._nodepool.getProviderManager(node.provider),
node)
t.start()
except Exception:
self.log.exception("Could not delete instance %s on provider %s",
node.external_id, node.provider)
def run(self):
self.log.info("Starting")
self._running = True
while self._running:
# Don't do work if we've lost communication with the ZK cluster
did_suspend = False
zk_conn = self._nodepool.getZK()
while zk_conn and (zk_conn.suspended or zk_conn.lost):
did_suspend = True
self.log.info("ZooKeeper suspended. Waiting")
time.sleep(SUSPEND_WAIT_TIME)
if did_suspend:
self.log.info("ZooKeeper available. Resuming")
self._run()
time.sleep(self._interval)
self.log.info("Stopped")
def stop(self):
self._running = False
self.join()
class CleanupWorker(BaseCleanupWorker):
def __init__(self, nodepool, interval):
super(CleanupWorker, self).__init__(
nodepool, interval, name='CleanupWorker')
self.log = logging.getLogger("nodepool.CleanupWorker")
def _resetLostRequest(self, zk_conn, req):
'''
Reset the request state and deallocate nodes.
:param ZooKeeper zk_conn: A ZooKeeper connection object.
:param NodeRequest req: The lost NodeRequest object.
'''
# Double check the state after the lock
req = zk_conn.getNodeRequest(req.id)
if req.state != zk.PENDING:
return
for node in zk_conn.nodeIterator():
if node.allocated_to == req.id:
try:
zk_conn.lockNode(node)
except exceptions.ZKLockException:
self.log.warning(
"Unable to grab lock to deallocate node %s from "
"request %s", node.id, req.id)
return
node.allocated_to = None
try:
zk_conn.storeNode(node)
self.log.debug("Deallocated node %s for lost request %s",
node.id, req.id)
except Exception:
self.log.exception(
"Unable to deallocate node %s from request %s:",
node.id, req.id)
zk_conn.unlockNode(node)
req.state = zk.REQUESTED
req.nodes = []
zk_conn.storeNodeRequest(req)
self.log.info("Reset lost request %s", req.id)
def _cleanupLostRequests(self):
'''
Look for lost requests and reset them.
A lost request is a node request that was left in the PENDING state
when nodepool exited. We need to look for these (they'll be unlocked)
and disassociate any nodes we've allocated to the request and reset
the request state to REQUESTED so it will be processed again.
'''
zk_conn = self._nodepool.getZK()
for req in zk_conn.nodeRequestIterator():
if req.state == zk.PENDING:
try:
zk_conn.lockNodeRequest(req, blocking=False)
except exceptions.ZKLockException:
continue
try:
self._resetLostRequest(zk_conn, req)
except Exception:
self.log.exception("Error resetting lost request %s:",
req.id)
zk_conn.unlockNodeRequest(req)
def _cleanupNodeRequestLocks(self):
'''
Remove request locks where the request no longer exists.
Because the node request locks are not direct children of the request
znode, we need to remove the locks separately after the request has
been processed. Only remove them after LOCK_CLEANUP seconds have
passed. This helps reduce chances of the scenario where a request could
go away _while_ a lock is currently held for processing and the cleanup
thread attempts to delete it. The delay should reduce the chance that
we delete a currently held lock.
'''
zk = self._nodepool.getZK()
requests = zk.getNodeRequests()
now = time.time()
for lock_stat in zk.nodeRequestLockStatsIterator():
if lock_stat.lock_id in requests:
continue
if (now - lock_stat.stat.mtime / 1000) > LOCK_CLEANUP:
zk.deleteNodeRequestLock(lock_stat.lock_id)
def _cleanupLeakedInstances(self):
'''
Delete any leaked server instances.
Remove any servers we find in providers we know about that are not
recorded in the ZooKeeper data.
'''
zk_conn = self._nodepool.getZK()
for provider in self._nodepool.config.providers.values():
manager = self._nodepool.getProviderManager(provider.name)
for server in manager.listNodes():
meta = server.get('metadata', {})
if 'nodepool_provider_name' not in meta:
continue
if meta['nodepool_provider_name'] != provider.name:
# Another launcher, sharing this provider but configured
# with a different name, owns this.
continue
if not zk_conn.getNode(meta['nodepool_node_id']):
self.log.warning(
"Deleting leaked instance %s (%s) in %s "
"(unknown node id %s)",
server.name, server.id, provider.name,
meta['nodepool_node_id']
)
# Create an artifical node to use for deleting the server.
node = zk.Node()
node.external_id = server.id
node.provider = provider.name
self._deleteInstance(node)
manager.cleanupLeakedResources()
def _cleanupMaxReadyAge(self):
'''
Delete any server past their max-ready-age.
Remove any servers which are longer than max-ready-age in ready state.
'''
# first get all labels with max_ready_age > 0
label_names = []
for label_name in self._nodepool.config.labels:
if self._nodepool.config.labels[label_name].max_ready_age > 0:
label_names.append(label_name)
zk_conn = self._nodepool.getZK()
ready_nodes = zk_conn.getReadyNodesOfTypes(label_names)
for label_name in ready_nodes:
# get label from node
label = self._nodepool.config.labels[label_name]
for node in ready_nodes[label_name]:
# Can't do anything if we aren't configured for this provider.
if node.provider not in self._nodepool.config.providers:
continue
# check state time against now
now = int(time.time())
if (now - node.state_time) < label.max_ready_age:
continue
try:
zk_conn.lockNode(node, blocking=False)
except exceptions.ZKLockException:
continue
# Double check the state now that we have a lock since it
# may have changed on us.
if node.state != zk.READY:
zk_conn.unlockNode(node)
continue
self.log.debug("Node %s exceeds max ready age: %s >= %s",
node.id, now - node.state_time,
label.max_ready_age)
# The NodeDeleter thread will unlock and remove the
# node from ZooKeeper if it succeeds.
try:
self._deleteInstance(node)
except Exception:
self.log.exception("Failure deleting aged node %s:",
node.id)
zk_conn.unlockNode(node)
def _run(self):
'''
Catch exceptions individually so that other cleanup routines may
have a chance.
'''
try:
self._cleanupNodeRequestLocks()
except Exception:
self.log.exception(
"Exception in CleanupWorker (node request lock cleanup):")
try:
self._cleanupLeakedInstances()
except Exception:
self.log.exception(
"Exception in CleanupWorker (leaked instance cleanup):")
try:
self._cleanupLostRequests()
except Exception:
self.log.exception(
"Exception in CleanupWorker (lost request cleanup):")
try:
self._cleanupMaxReadyAge()
except Exception:
self.log.exception(
"Exception in CleanupWorker (max ready age cleanup):")
class DeletedNodeWorker(BaseCleanupWorker):
def __init__(self, nodepool, interval):
super(DeletedNodeWorker, self).__init__(
nodepool, interval, name='DeletedNodeWorker')
self.log = logging.getLogger("nodepool.DeletedNodeWorker")
def _cleanupNodes(self):
'''
Delete instances from providers and nodes entries from ZooKeeper.
'''
cleanup_states = (zk.USED, zk.IN_USE, zk.BUILDING, zk.FAILED,
zk.DELETING)
zk_conn = self._nodepool.getZK()
for node in zk_conn.nodeIterator():
# If a ready node has been allocated to a request, but that
# request is now missing, deallocate it.
if (node.state == zk.READY and node.allocated_to
and not zk_conn.getNodeRequest(node.allocated_to)):
try:
zk_conn.lockNode(node, blocking=False)
except exceptions.ZKLockException:
pass
else:
# Double check node conditions after lock
if node.state == zk.READY and node.allocated_to:
node.allocated_to = None
try:
zk_conn.storeNode(node)
self.log.debug(
"Deallocated node %s with missing request %s",
node.id, node.allocated_to)
except Exception:
self.log.exception(
"Failed to deallocate node %s for missing "
"request %s:", node.id, node.allocated_to)
zk_conn.unlockNode(node)
# Can't do anything if we aren't configured for this provider.
if node.provider not in self._nodepool.config.providers:
continue
# Any nodes in these states that are unlocked can be deleted.
if node.state in cleanup_states:
try:
zk_conn.lockNode(node, blocking=False)
except exceptions.ZKLockException:
continue
# Double check the state now that we have a lock since it
# may have changed on us.
if node.state not in cleanup_states:
zk_conn.unlockNode(node)
continue
self.log.debug(
"Marking for deletion unlocked node %s "
"(state: %s, allocated_to: %s)",
node.id, node.state, node.allocated_to)
# The NodeDeleter thread will unlock and remove the
# node from ZooKeeper if it succeeds.
try:
self._deleteInstance(node)
except Exception:
self.log.exception(
"Failure deleting node %s in cleanup state %s:",
node.id, node.state)
zk_conn.unlockNode(node)
def _run(self):
try:
self._cleanupNodes()
except Exception:
self.log.exception("Exception in DeletedNodeWorker:")
class NodePool(threading.Thread):
log = logging.getLogger("nodepool.NodePool")
def __init__(self, securefile, configfile,
watermark_sleep=WATERMARK_SLEEP):
threading.Thread.__init__(self, name='NodePool')
self.securefile = securefile
self.configfile = configfile
self.watermark_sleep = watermark_sleep
self.cleanup_interval = 60
self.delete_interval = 5
self._stopped = False
self.config = None
self.zk = None
self.statsd = stats.get_client()
self._pool_threads = {}
self._cleanup_thread = None
self._delete_thread = None
self._wake_condition = threading.Condition()
self._submittedRequests = {}
def stop(self):
self._stopped = True
self._wake_condition.acquire()
self._wake_condition.notify()
self._wake_condition.release()
if self.config:
provider_manager.ProviderManager.stopProviders(self.config)
if self._cleanup_thread:
self._cleanup_thread.stop()
self._cleanup_thread.join()
if self._delete_thread:
self._delete_thread.stop()
self._delete_thread.join()
# Don't let stop() return until all pool threads have been
# terminated.
self.log.debug("Stopping pool threads")
for thd in self._pool_threads.values():
if thd.isAlive():
thd.stop()
self.log.debug("Waiting for %s" % thd.name)
thd.join()
if self.isAlive():
self.join()
if self.zk:
self.zk.disconnect()
self.log.debug("Finished stopping")
def loadConfig(self):
config = nodepool_config.loadConfig(self.configfile)
if self.securefile:
nodepool_config.loadSecureConfig(config, self.securefile)
return config
def reconfigureZooKeeper(self, config):
if self.config:
running = list(self.config.zookeeper_servers.values())
else:
running = None
configured = list(config.zookeeper_servers.values())
if running == configured:
return
if not self.zk and configured:
self.log.debug("Connecting to ZooKeeper servers")
self.zk = zk.ZooKeeper()
self.zk.connect(configured)
else:
self.log.debug("Detected ZooKeeper server changes")
self.zk.resetHosts(configured)
def setConfig(self, config):
self.config = config
def getZK(self):
return self.zk
def getProviderManager(self, provider_name):
return self.config.provider_managers[provider_name]
def getPoolWorkers(self, provider_name):
return [t for t in self._pool_threads.values() if
t.provider_name == provider_name]
def updateConfig(self):
config = self.loadConfig()
provider_manager.ProviderManager.reconfigure(self.config, config)
self.reconfigureZooKeeper(config)
self.setConfig(config)
def removeCompletedRequests(self):
'''
Remove (locally and in ZK) fulfilled node requests.
We also must reset the allocated_to attribute for each Node assigned
to our request, since we are deleting the request.
'''
# Use a copy of the labels because we modify _submittedRequests
# within the loop below. Note that keys() returns an iterator in
# py3, so we need to explicitly make a new list.
requested_labels = list(self._submittedRequests.keys())
for label in requested_labels:
label_requests = self._submittedRequests[label]
active_requests = []
for req in label_requests:
req = self.zk.getNodeRequest(req.id)
if not req:
continue
if req.state == zk.FULFILLED:
# Reset node allocated_to
for node_id in req.nodes:
node = self.zk.getNode(node_id)
node.allocated_to = None
# NOTE: locking shouldn't be necessary since a node
# with allocated_to set should not be locked except
# by the creator of the request (us).
self.zk.storeNode(node)
self.zk.deleteNodeRequest(req)
elif req.state == zk.FAILED:
self.log.debug("min-ready node request failed: %s", req)
self.zk.deleteNodeRequest(req)
else:
active_requests.append(req)
if active_requests:
self._submittedRequests[label] = active_requests
else:
self.log.debug(
"No more active min-ready requests for label %s", label)
del self._submittedRequests[label]
def labelImageIsAvailable(self, label):
'''
Check if the image associated with a label is ready in any provider.
:param Label label: The label config object.
:returns: True if image associated with the label is uploaded and
ready in at least one provider. False otherwise.
'''
for pool in label.pools:
if not pool.provider.driver.manage_images:
# Provider doesn't manage images, assuming label is ready
return True
for pool_label in pool.labels.values():
if pool_label.diskimage:
if self.zk.getMostRecentImageUpload(
pool_label.diskimage.name, pool.provider.name):
return True
else:
manager = self.getProviderManager(pool.provider.name)
if manager.labelReady(pool_label):
return True
return False
def createMinReady(self):
'''
Create node requests to make the minimum amount of ready nodes.
Since this method will be called repeatedly, we need to take care to
note when we have already submitted node requests to satisfy min-ready.
Requests we've already submitted are stored in the _submittedRequests
dict, keyed by label.
'''
def createRequest(label_name):
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.requestor = "NodePool:min-ready"
req.node_types.append(label_name)
req.reuse = False # force new node launches
self.zk.storeNodeRequest(req, priority="100")
if label_name not in self._submittedRequests:
self._submittedRequests[label_name] = []
self._submittedRequests[label_name].append(req)
# Since we could have already submitted node requests, do not
# resubmit a request for a type if a request for that type is
# still in progress.
self.removeCompletedRequests()
label_names = list(self.config.labels.keys())
requested_labels = list(self._submittedRequests.keys())
needed_labels = list(set(label_names) - set(requested_labels))
ready_nodes = self.zk.getReadyNodesOfTypes(needed_labels)
for label in self.config.labels.values():
if label.name not in needed_labels:
continue
min_ready = label.min_ready
if min_ready == -1:
continue # disabled
# Calculate how many nodes of this type we need created
need = 0
if label.name not in ready_nodes:
need = label.min_ready
elif len(ready_nodes[label.name]) < min_ready:
need = min_ready - len(ready_nodes[label.name])
if need and self.labelImageIsAvailable(label):
# Create requests for 1 node at a time. This helps to split
# up requests across providers, and avoids scenario where a
# single provider might fail the entire request because of
# quota (e.g., min-ready=2, but max-servers=1).
self.log.info("Creating requests for %d %s nodes",
need, label.name)
for i in range(0, need):
createRequest(label.name)
def run(self):
'''
Start point for the NodePool thread.
'''
while not self._stopped:
try:
self.updateConfig()
# Don't do work if we've lost communication with the ZK cluster
did_suspend = False
while self.zk and (self.zk.suspended or self.zk.lost):
did_suspend = True
self.log.info("ZooKeeper suspended. Waiting")
time.sleep(SUSPEND_WAIT_TIME)
if did_suspend:
self.log.info("ZooKeeper available. Resuming")
self.createMinReady()
if not self._cleanup_thread:
self._cleanup_thread = CleanupWorker(
self, self.cleanup_interval)
self._cleanup_thread.start()
if not self._delete_thread:
self._delete_thread = DeletedNodeWorker(
self, self.delete_interval)
self._delete_thread.start()
# Stop any PoolWorker threads if the pool was removed
# from the config.
pool_keys = set()
for provider in self.config.providers.values():
for pool in provider.pools.values():
pool_keys.add(provider.name + '-' + pool.name)
new_pool_threads = {}
for key in self._pool_threads.keys():
if key not in pool_keys:
self._pool_threads[key].stop()
else:
new_pool_threads[key] = self._pool_threads[key]
self._pool_threads = new_pool_threads
# Start (or restart) provider threads for each provider in
# the config. Removing a provider from the config and then
# adding it back would cause a restart.
for provider in self.config.providers.values():
for pool in provider.pools.values():
key = provider.name + '-' + pool.name
if key not in self._pool_threads:
t = PoolWorker(self, provider.name, pool.name)
self.log.info("Starting %s" % t.name)
t.start()
self._pool_threads[key] = t
elif not self._pool_threads[key].isAlive():
self._pool_threads[key].join()
t = PoolWorker(self, provider.name, pool.name)
self.log.info("Restarting %s" % t.name)
t.start()
self._pool_threads[key] = t
except Exception:
self.log.exception("Exception in main loop:")
self._wake_condition.acquire()
self._wake_condition.wait(self.watermark_sleep)
self._wake_condition.release()

View File

@ -1,319 +0,0 @@
# Copyright (C) 2011-2014 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import time
# States:
# The cloud provider is building this machine. We have an ID, but it's
# not ready for use.
BUILDING = 1
# The machine is ready for use.
READY = 2
# This can mean in-use, or used but complete.
USED = 3
# Delete this machine immediately.
DELETE = 4
# Keep this machine indefinitely.
HOLD = 5
# Acceptance testing (pre-ready)
TEST = 6
STATE_NAMES = {
BUILDING: 'building',
READY: 'ready',
USED: 'used',
DELETE: 'delete',
HOLD: 'hold',
TEST: 'test',
}
from sqlalchemy import Table, Column, Integer, String, \
MetaData, create_engine
from sqlalchemy.orm import scoped_session, mapper, relationship, foreign
from sqlalchemy.orm.session import Session, sessionmaker
metadata = MetaData()
node_table = Table(
'node', metadata,
Column('id', Integer, primary_key=True),
Column('provider_name', String(255), index=True, nullable=False),
Column('label_name', String(255), index=True, nullable=False),
Column('target_name', String(255), index=True, nullable=False),
Column('manager_name', String(255)),
# Machine name
Column('hostname', String(255), index=True),
# Eg, jenkins node name
Column('nodename', String(255), index=True),
# Provider assigned id for this machine
Column('external_id', String(255)),
# Provider availability zone for this machine
Column('az', String(255)),
# Primary IP address
Column('ip', String(255)),
# Internal/fixed IP address
Column('ip_private', String(255)),
# One of the above values
Column('state', Integer),
# Time of last state change
Column('state_time', Integer),
# Comment about the state of the node - used to annotate held nodes
Column('comment', String(255)),
mysql_engine='InnoDB',
)
subnode_table = Table(
'subnode', metadata,
Column('id', Integer, primary_key=True),
Column('node_id', Integer, index=True, nullable=False),
# Machine name
Column('hostname', String(255), index=True),
# Provider assigned id for this machine
Column('external_id', String(255)),
# Primary IP address
Column('ip', String(255)),
# Internal/fixed IP address
Column('ip_private', String(255)),
# One of the above values
Column('state', Integer),
# Time of last state change
Column('state_time', Integer),
mysql_engine='InnoDB',
)
job_table = Table(
'job', metadata,
Column('id', Integer, primary_key=True),
# The name of the job
Column('name', String(255), index=True),
# Automatically hold up to this number of nodes that fail this job
Column('hold_on_failure', Integer),
mysql_engine='InnoDB',
)
class Node(object):
def __init__(self, provider_name, label_name, target_name, az,
hostname=None, external_id=None, ip=None, ip_private=None,
manager_name=None, state=BUILDING, comment=None):
self.provider_name = provider_name
self.label_name = label_name
self.target_name = target_name
self.manager_name = manager_name
self.external_id = external_id
self.az = az
self.ip = ip
self.ip_private = ip_private
self.hostname = hostname
self.state = state
self.comment = comment
def delete(self):
session = Session.object_session(self)
session.delete(self)
session.commit()
@property
def state(self):
return self._state
@state.setter
def state(self, state):
self._state = state
self.state_time = int(time.time())
session = Session.object_session(self)
if session:
session.commit()
class SubNode(object):
def __init__(self, node,
hostname=None, external_id=None, ip=None, ip_private=None,
state=BUILDING):
self.node_id = node.id
self.provider_name = node.provider_name
self.label_name = node.label_name
self.target_name = node.target_name
self.external_id = external_id
self.ip = ip
self.ip_private = ip_private
self.hostname = hostname
self.state = state
def delete(self):
session = Session.object_session(self)
session.delete(self)
session.commit()
@property
def state(self):
return self._state
@state.setter
def state(self, state):
self._state = state
self.state_time = int(time.time())
session = Session.object_session(self)
if session:
session.commit()
class Job(object):
def __init__(self, name=None, hold_on_failure=0):
self.name = name
self.hold_on_failure = hold_on_failure
def delete(self):
session = Session.object_session(self)
session.delete(self)
session.commit()
mapper(Job, job_table)
mapper(SubNode, subnode_table,
properties=dict(_state=subnode_table.c.state))
mapper(Node, node_table,
properties=dict(
_state=node_table.c.state,
subnodes=relationship(
SubNode,
cascade='all, delete-orphan',
uselist=True,
primaryjoin=foreign(subnode_table.c.node_id) == node_table.c.id,
backref='node')))
class NodeDatabase(object):
def __init__(self, dburi):
engine_kwargs = dict(echo=False, pool_recycle=3600)
if 'sqlite:' not in dburi:
engine_kwargs['max_overflow'] = -1
self.engine = create_engine(dburi, **engine_kwargs)
metadata.create_all(self.engine)
self.session_factory = sessionmaker(bind=self.engine)
self.session = scoped_session(self.session_factory)
def getSession(self):
return NodeDatabaseSession(self.session)
class NodeDatabaseSession(object):
def __init__(self, session):
self.session = session
def __enter__(self):
return self
def __exit__(self, etype, value, tb):
if etype:
self.session().rollback()
else:
self.session().commit()
self.session().close()
self.session = None
def abort(self):
self.session().rollback()
def commit(self):
self.session().commit()
def delete(self, obj):
self.session().delete(obj)
def getNodes(self, provider_name=None, label_name=None, target_name=None,
state=None):
exp = self.session().query(Node).order_by(
node_table.c.provider_name,
node_table.c.label_name)
if provider_name:
exp = exp.filter_by(provider_name=provider_name)
if label_name:
exp = exp.filter_by(label_name=label_name)
if target_name:
exp = exp.filter_by(target_name=target_name)
if state:
exp = exp.filter(node_table.c.state == state)
return exp.all()
def createNode(self, *args, **kwargs):
new = Node(*args, **kwargs)
self.session().add(new)
self.commit()
return new
def createSubNode(self, *args, **kwargs):
new = SubNode(*args, **kwargs)
self.session().add(new)
self.commit()
return new
def getNode(self, id):
nodes = self.session().query(Node).filter_by(id=id).all()
if not nodes:
return None
return nodes[0]
def getSubNode(self, id):
nodes = self.session().query(SubNode).filter_by(id=id).all()
if not nodes:
return None
return nodes[0]
def getNodeByHostname(self, hostname):
nodes = self.session().query(Node).filter_by(hostname=hostname).all()
if not nodes:
return None
return nodes[0]
def getNodeByNodename(self, nodename):
nodes = self.session().query(Node).filter_by(nodename=nodename).all()
if not nodes:
return None
return nodes[0]
def getNodeByExternalID(self, provider_name, external_id):
nodes = self.session().query(Node).filter_by(
provider_name=provider_name,
external_id=external_id).all()
if not nodes:
return None
return nodes[0]
def getJob(self, id):
jobs = self.session().query(Job).filter_by(id=id).all()
if not jobs:
return None
return jobs[0]
def getJobByName(self, name):
jobs = self.session().query(Job).filter_by(name=name).all()
if not jobs:
return None
return jobs[0]
def getJobs(self):
return self.session().query(Job).all()
def createJob(self, *args, **kwargs):
new = Job(*args, **kwargs)
self.session().add(new)
self.commit()
return new

File diff suppressed because it is too large Load Diff

78
nodepool/nodeutils.py Normal file → Executable file
View File

@ -17,21 +17,20 @@
# limitations under the License.
import errno
import ipaddress
import time
import six
import socket
import logging
from sshclient import SSHClient
import fakeprovider
import paramiko
import exceptions
from nodepool import exceptions
log = logging.getLogger("nodepool.utils")
ITERATE_INTERVAL = 2 # How long to sleep while waiting for something
# in a loop
# How long to sleep while waiting for something in a loop
ITERATE_INTERVAL = 2
def iterate_timeout(max_seconds, exc, purpose):
@ -44,32 +43,57 @@ def iterate_timeout(max_seconds, exc, purpose):
raise exc("Timeout waiting for %s" % purpose)
def ssh_connect(ip, username, connect_kwargs={}, timeout=60):
def keyscan(ip, port=22, timeout=60):
'''
Scan the IP address for public SSH keys.
Keys are returned formatted as: "<type> <base64_string>"
'''
if 'fake' in ip:
return fakeprovider.FakeSSHClient()
# HPcloud may return ECONNREFUSED or EHOSTUNREACH
# for about 30 seconds after adding the IP
return ['ssh-rsa FAKEKEY']
if ipaddress.ip_address(six.text_type(ip)).version < 6:
family = socket.AF_INET
sockaddr = (ip, port)
else:
family = socket.AF_INET6
sockaddr = (ip, port, 0, 0)
keys = []
key = None
for count in iterate_timeout(
timeout, exceptions.SSHTimeoutException, "ssh access"):
sock = None
t = None
try:
client = SSHClient(ip, username, **connect_kwargs)
sock = socket.socket(family, socket.SOCK_STREAM)
sock.settimeout(timeout)
sock.connect(sockaddr)
t = paramiko.transport.Transport(sock)
t.start_client(timeout=timeout)
key = t.get_remote_server_key()
break
except paramiko.SSHException as e:
# NOTE(pabelanger): Currently paramiko only returns a string with
# error code. If we want finer granularity we'll need to regex the
# string.
log.exception('Failed to negotiate SSH: %s' % (e))
except paramiko.AuthenticationException as e:
# This covers the case where the cloud user is created
# after sshd is up (Fedora for example)
log.info('Auth exception for %s@%s. Try number %i...' %
(username, ip, count))
except socket.error as e:
if e[0] not in [errno.ECONNREFUSED, errno.EHOSTUNREACH, None]:
if e.errno not in [errno.ECONNREFUSED, errno.EHOSTUNREACH, None]:
log.exception(
'Exception while testing ssh access to %s:' % ip)
'Exception with ssh access to %s:' % ip)
except Exception as e:
log.exception("ssh-keyscan failure: %s", e)
finally:
try:
if t:
t.close()
except Exception as e:
log.exception('Exception closing paramiko: %s', e)
try:
if sock:
sock.close()
except Exception as e:
log.exception('Exception closing socket: %s', e)
out = client.ssh("test ssh access", "echo access okay", output=True)
if "access okay" in out:
return client
return None
# Paramiko, at this time, seems to return only the ssh-rsa key, so
# only the single key is placed into the list.
if key:
keys.append("%s %s" % (key.get_name(), key.get_base64()))
return keys

304
nodepool/provider_manager.py Normal file → Executable file
View File

@ -16,39 +16,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
from contextlib import contextmanager
import shade
import exceptions
import fakeprovider
from nodeutils import iterate_timeout
from task_manager import TaskManager, ManagerStoppedException
from nodepool.driver.fake.provider import FakeProvider
from nodepool.driver.openstack.provider import OpenStackProvider
IPS_LIST_AGE = 5 # How long to keep a cached copy of the ip list
@contextmanager
def shade_inner_exceptions():
try:
yield
except shade.OpenStackCloudException as e:
e.log_error()
raise
class NotFound(Exception):
pass
def get_provider_manager(provider, use_taskmanager):
if (provider.cloud_config.get_auth_args().get('auth_url') == 'fake'):
return FakeProviderManager(provider, use_taskmanager)
def get_provider(provider, use_taskmanager):
if provider.driver.name == 'fake':
return FakeProvider(provider, use_taskmanager)
elif provider.driver.name == 'openstack':
return OpenStackProvider(provider, use_taskmanager)
else:
return ProviderManager(provider, use_taskmanager)
raise RuntimeError("Unknown provider driver %s" % provider.driver)
class ProviderManager(object):
@ -70,7 +50,7 @@ class ProviderManager(object):
ProviderManager.log.debug("Creating new ProviderManager object"
" for %s" % p.name)
new_config.provider_managers[p.name] = \
get_provider_manager(p, use_taskmanager)
get_provider(p, use_taskmanager)
new_config.provider_managers[p.name].start()
for stop_manager in stop_managers:
@ -81,269 +61,3 @@ class ProviderManager(object):
for m in config.provider_managers.values():
m.stop()
m.join()
def __init__(self, provider, use_taskmanager):
self.provider = provider
self._images = {}
self._networks = {}
self.__flavors = {}
self._use_taskmanager = use_taskmanager
self._taskmanager = None
def start(self):
if self._use_taskmanager:
self._taskmanager = TaskManager(None, self.provider.name,
self.provider.rate)
self._taskmanager.start()
self.resetClient()
def stop(self):
if self._taskmanager:
self._taskmanager.stop()
def join(self):
if self._taskmanager:
self._taskmanager.join()
@property
def _flavors(self):
if not self.__flavors:
self.__flavors = self._getFlavors()
return self.__flavors
def _getClient(self):
if self._use_taskmanager:
manager = self._taskmanager
else:
manager = None
return shade.OpenStackCloud(
cloud_config=self.provider.cloud_config,
manager=manager,
**self.provider.cloud_config.config)
def resetClient(self):
self._client = self._getClient()
if self._use_taskmanager:
self._taskmanager.setClient(self._client)
def _getFlavors(self):
flavors = self.listFlavors()
flavors.sort(lambda a, b: cmp(a['ram'], b['ram']))
return flavors
def findFlavor(self, min_ram, name_filter=None):
# Note: this will throw an error if the provider is offline
# but all the callers are in threads (they call in via CreateServer) so
# the mainloop won't be affected.
for f in self._flavors:
if (f['ram'] >= min_ram
and (not name_filter or name_filter in f['name'])):
return f
raise Exception("Unable to find flavor with min ram: %s" % min_ram)
def findImage(self, name):
if name in self._images:
return self._images[name]
with shade_inner_exceptions():
image = self._client.get_image(name)
self._images[name] = image
return image
def findNetwork(self, name):
if name in self._networks:
return self._networks[name]
with shade_inner_exceptions():
network = self._client.get_network(name)
self._networks[name] = network
return network
def deleteImage(self, name):
if name in self._images:
del self._images[name]
with shade_inner_exceptions():
return self._client.delete_image(name)
def createServer(self, name, min_ram, image_id=None, image_name=None,
az=None, key_name=None, name_filter=None,
config_drive=True, nodepool_node_id=None,
nodepool_image_name=None,
nodepool_snapshot_image_id=None):
if image_name:
image = self.findImage(image_name)
else:
image = {'id': image_id}
flavor = self.findFlavor(min_ram, name_filter)
create_args = dict(name=name,
image=image,
flavor=flavor,
config_drive=config_drive)
if key_name:
create_args['key_name'] = key_name
if az:
create_args['availability_zone'] = az
nics = []
for network in self.provider.networks:
if network.id:
nics.append({'net-id': network.id})
elif network.name:
net_id = self.findNetwork(network.name)['id']
nics.append({'net-id': net_id})
else:
raise Exception("Invalid 'networks' configuration.")
if nics:
create_args['nics'] = nics
# Put provider.name and image_name in as groups so that ansible
# inventory can auto-create groups for us based on each of those
# qualities
# Also list each of those values directly so that non-ansible
# consumption programs don't need to play a game of knowing that
# groups[0] is the image name or anything silly like that.
nodepool_meta = dict(provider_name=self.provider.name)
groups_meta = [self.provider.name]
if self.provider.nodepool_id:
nodepool_meta['nodepool_id'] = self.provider.nodepool_id
if nodepool_node_id:
nodepool_meta['node_id'] = nodepool_node_id
if nodepool_snapshot_image_id:
nodepool_meta['snapshot_image_id'] = nodepool_snapshot_image_id
if nodepool_image_name:
nodepool_meta['image_name'] = nodepool_image_name
groups_meta.append(nodepool_image_name)
create_args['meta'] = dict(
groups=json.dumps(groups_meta),
nodepool=json.dumps(nodepool_meta)
)
with shade_inner_exceptions():
return self._client.create_server(wait=False, **create_args)
def getServer(self, server_id):
with shade_inner_exceptions():
return self._client.get_server(server_id)
def waitForServer(self, server, timeout=3600):
with shade_inner_exceptions():
return self._client.wait_for_server(
server=server, auto_ip=True, reuse=False,
timeout=timeout)
def waitForServerDeletion(self, server_id, timeout=600):
for count in iterate_timeout(
timeout, exceptions.ServerDeleteException,
"server %s deletion" % server_id):
if not self.getServer(server_id):
return
def waitForImage(self, image_id, timeout=3600):
last_status = None
for count in iterate_timeout(
timeout, exceptions.ImageCreateException, "image creation"):
try:
image = self.getImage(image_id)
except NotFound:
continue
except ManagerStoppedException:
raise
except Exception:
self.log.exception('Unable to list images while waiting for '
'%s will retry' % (image_id))
continue
# shade returns None when not found
if not image:
continue
status = image['status']
if (last_status != status):
self.log.debug(
'Status of image in {provider} {id}: {status}'.format(
provider=self.provider.name,
id=image_id,
status=status))
if status == 'ERROR' and 'fault' in image:
self.log.debug(
'ERROR in {provider} on {id}: {resason}'.format(
provider=self.provider.name,
id=image_id,
resason=image['fault']['message']))
last_status = status
# Glance client returns lower case statuses - but let's be sure
if status.lower() in ['active', 'error']:
return image
def createImage(self, server, image_name, meta):
with shade_inner_exceptions():
return self._client.create_image_snapshot(
image_name, server, **meta)
def getImage(self, image_id):
with shade_inner_exceptions():
return self._client.get_image(image_id)
def uploadImage(self, image_name, filename, image_type=None, meta=None,
md5=None, sha256=None):
# configure glance and upload image. Note the meta flags
# are provided as custom glance properties
# NOTE: we have wait=True set here. This is not how we normally
# do things in nodepool, preferring to poll ourselves thankyouverymuch.
# However - two things to note:
# - PUT has no aysnc mechanism, so we have to handle it anyway
# - v2 w/task waiting is very strange and complex - but we have to
# block for our v1 clouds anyway, so we might as well
# have the interface be the same and treat faking-out
# a shade-level fake-async interface later
if not meta:
meta = {}
if image_type:
meta['disk_format'] = image_type
with shade_inner_exceptions():
image = self._client.create_image(
name=image_name,
filename=filename,
is_public=False,
wait=True,
md5=md5,
sha256=sha256,
**meta)
return image.id
def listImages(self):
with shade_inner_exceptions():
return self._client.list_images()
def listFlavors(self):
with shade_inner_exceptions():
return self._client.list_flavors(get_extra=False)
def listServers(self):
# shade list_servers carries the nodepool server list caching logic
with shade_inner_exceptions():
return self._client.list_servers()
def deleteServer(self, server_id):
with shade_inner_exceptions():
return self._client.delete_server(server_id, delete_ips=True)
def cleanupServer(self, server_id):
server = self.getServer(server_id)
if not server:
raise NotFound()
self.log.debug('Deleting server %s' % server_id)
self.deleteServer(server_id)
def cleanupLeakedFloaters(self):
with shade_inner_exceptions():
self._client.delete_unattached_floating_ips()
class FakeProviderManager(ProviderManager):
def __init__(self, provider, use_taskmanager):
self.__client = fakeprovider.FakeOpenStackCloud()
super(FakeProviderManager, self).__init__(provider, use_taskmanager)
def _getClient(self):
return self.__client

View File

@ -1,73 +0,0 @@
#!/usr/bin/env python
# Update the base image that is used for devstack VMs.
# Copyright (C) 2011-2012 OpenStack LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import paramiko
class SSHClient(object):
def __init__(self, ip, username, password=None, pkey=None,
key_filename=None, log=None, look_for_keys=False,
allow_agent=False):
self.client = paramiko.SSHClient()
self.client.set_missing_host_key_policy(paramiko.WarningPolicy())
self.client.connect(ip, username=username, password=password,
pkey=pkey, key_filename=key_filename,
look_for_keys=look_for_keys,
allow_agent=allow_agent)
self.log = log
def __del__(self):
self.client.close()
def ssh(self, action, command, get_pty=True, output=False):
if self.log:
self.log.debug("*** START to %s" % action)
self.log.debug("executing: %s" % command)
stdin, stdout, stderr = self.client.exec_command(
command, get_pty=get_pty)
out = ''
err = ''
for line in stdout:
if output:
out += line
if self.log:
self.log.info(line.rstrip())
for line in stderr:
if output:
err += line
if self.log:
self.log.error(line.rstrip())
ret = stdout.channel.recv_exit_status()
if ret:
if self.log:
self.log.debug("*** FAILED to %s (%s)" % (action, ret))
raise Exception(
"Unable to %s\ncommand: %s\nstdout: %s\nstderr: %s"
% (action, command, out, err))
if self.log:
self.log.debug("*** SUCCESSFULLY %s" % action)
return out
def scp(self, source, dest):
if self.log:
self.log.info("Copy %s -> %s" % (source, dest))
ftp = self.client.open_sftp()
ftp.put(source, dest)
ftp.close()

98
nodepool/stats.py Normal file → Executable file
View File

@ -20,8 +20,11 @@ import os
import logging
import statsd
from nodepool import zk
log = logging.getLogger("nodepool.stats")
def get_client():
"""Return a statsd client object setup from environment variables; or
None if they are not set
@ -38,3 +41,98 @@ def get_client():
return statsd.StatsClient(**statsd_args)
else:
return None
class StatsReporter(object):
'''
Class adding statsd reporting functionality.
'''
def __init__(self):
super(StatsReporter, self).__init__()
self._statsd = get_client()
def recordLaunchStats(self, subkey, dt, image_name,
provider_name, node_az, requestor):
'''
Record node launch statistics.
:param str subkey: statsd key
:param int dt: Time delta in milliseconds
:param str image_name: Name of the image used
:param str provider_name: Name of the provider
:param str node_az: AZ of the launched node
:param str requestor: Identifier for the request originator
'''
if not self._statsd:
return
keys = [
'nodepool.launch.provider.%s.%s' % (provider_name, subkey),
'nodepool.launch.image.%s.%s' % (image_name, subkey),
'nodepool.launch.%s' % (subkey,),
]
if node_az:
keys.append('nodepool.launch.provider.%s.%s.%s' %
(provider_name, node_az, subkey))
if requestor:
# Replace '.' which is a graphite hierarchy, and ':' which is
# a statsd delimeter.
requestor = requestor.replace('.', '_')
requestor = requestor.replace(':', '_')
keys.append('nodepool.launch.requestor.%s.%s' %
(requestor, subkey))
for key in keys:
self._statsd.timing(key, dt)
self._statsd.incr(key)
def updateNodeStats(self, zk_conn, provider):
'''
Refresh statistics for all known nodes.
:param ZooKeeper zk_conn: A ZooKeeper connection object.
:param Provider provider: A config Provider object.
'''
if not self._statsd:
return
states = {}
# Initialize things we know about to zero
for state in zk.Node.VALID_STATES:
key = 'nodepool.nodes.%s' % state
states[key] = 0
key = 'nodepool.provider.%s.nodes.%s' % (provider.name, state)
states[key] = 0
for node in zk_conn.nodeIterator():
# nodepool.nodes.STATE
key = 'nodepool.nodes.%s' % node.state
states[key] += 1
# nodepool.label.LABEL.nodes.STATE
key = 'nodepool.label.%s.nodes.%s' % (node.type, node.state)
# It's possible we could see node types that aren't in our config
if key in states:
states[key] += 1
else:
states[key] = 1
# nodepool.provider.PROVIDER.nodes.STATE
key = 'nodepool.provider.%s.nodes.%s' % (node.provider, node.state)
# It's possible we could see providers that aren't in our config
if key in states:
states[key] += 1
else:
states[key] = 1
for key, count in states.items():
self._statsd.gauge(key, count)
# nodepool.provider.PROVIDER.max_servers
key = 'nodepool.provider.%s.max_servers' % provider.name
max_servers = sum([p.max_servers for p in provider.pools.values()
if p.max_servers])
self._statsd.gauge(key, max_servers)

127
nodepool/status.py Normal file → Executable file
View File

@ -17,8 +17,6 @@
import json
import time
from nodepool import nodedb
from prettytable import PrettyTable
@ -31,21 +29,101 @@ def age(timestamp):
return '%02d:%02d:%02d:%02d' % (d, h, m, s)
def node_list(db, node_id=None):
t = PrettyTable(["ID", "Provider", "AZ", "Label", "Target",
"Manager", "Hostname", "NodeName", "Server ID",
"IP", "State", "Age", "Comment"])
def node_list(zk, node_id=None, detail=False):
headers = [
"ID",
"Provider",
"Label",
"Server ID",
"Public IPv4",
"IPv6",
"State",
"Age",
"Locked"
]
detail_headers = [
"Hostname",
"Private IPv4",
"AZ",
"Port",
"Launcher",
"Allocated To",
"Hold Job",
"Comment"
]
if detail:
headers += detail_headers
t = PrettyTable(headers)
t.align = 'l'
with db.getSession() as session:
for node in session.getNodes():
if node_id and node.id != node_id:
continue
t.add_row([node.id, node.provider_name, node.az,
node.label_name, node.target_name,
node.manager_name, node.hostname,
node.nodename, node.external_id, node.ip,
nodedb.STATE_NAMES[node.state],
age(node.state_time), node.comment])
if node_id:
node = zk.getNode(node_id)
if node:
locked = "unlocked"
try:
zk.lockNode(node, blocking=False)
except Exception:
locked = "locked"
else:
zk.unlockNode(node)
values = [
node.id,
node.provider,
node.type,
node.external_id,
node.public_ipv4,
node.public_ipv6,
node.state,
age(node.state_time),
locked
]
if detail:
values += [
node.hostname,
node.private_ipv4,
node.az,
node.connection_port,
node.launcher,
node.allocated_to,
node.hold_job,
node.comment
]
t.add_row(values)
else:
for node in zk.nodeIterator():
locked = "unlocked"
try:
zk.lockNode(node, blocking=False)
except Exception:
locked = "locked"
else:
zk.unlockNode(node)
values = [
node.id,
node.provider,
node.type,
node.external_id,
node.public_ipv4,
node.public_ipv6,
node.state,
age(node.state_time),
locked
]
if detail:
values += [
node.hostname,
node.private_ipv4,
node.az,
node.connection_port,
node.launcher,
node.allocated_to,
node.hold_job,
node.comment
]
t.add_row(values)
return str(t)
@ -67,15 +145,16 @@ def dib_image_list_json(zk):
for image_name in zk.getImageNames():
for build_no in zk.getBuildNumbers(image_name):
build = zk.getBuild(image_name, build_no)
objs.append({'id' : '-'.join([image_name, build_no]),
objs.append({'id': '-'.join([image_name, build_no]),
'image': image_name,
'builder': build.builder,
'formats': build.formats,
'state': build.state,
'age': int(build.state_time)
})
})
return json.dumps(objs)
def image_list(zk):
t = PrettyTable(["Build ID", "Upload ID", "Provider", "Image",
"Provider Image Name", "Provider Image ID", "State",
@ -94,3 +173,15 @@ def image_list(zk):
upload.state,
age(upload.state_time)])
return str(t)
def request_list(zk):
t = PrettyTable(["Request ID", "State", "Requestor", "Node Types", "Nodes",
"Declined By"])
t.align = 'l'
for req in zk.nodeRequestIterator():
t.add_row([req.id, req.state, req.requestor,
','.join(req.node_types),
','.join(req.nodes),
','.join(req.declined_by)])
return str(t)

View File

@ -18,12 +18,14 @@
import sys
import threading
import six
from six.moves import queue as Queue
import logging
import time
import requests.exceptions
import stats
from nodepool import stats
class ManagerStoppedException(Exception):
pass
@ -49,7 +51,7 @@ class Task(object):
def wait(self):
self._wait_event.wait()
if self._exception:
raise self._exception, None, self._traceback
six.reraise(self._exception, None, self._traceback)
return self._result
def run(self, client):
@ -105,7 +107,7 @@ class TaskManager(threading.Thread):
self.log.debug("Manager %s ran task %s in %ss" %
(self.name, type(task).__name__, dt))
if self.statsd:
#nodepool.task.PROVIDER.subkey
# nodepool.task.PROVIDER.subkey
subkey = type(task).__name__
key = 'nodepool.task.%s.%s' % (self.name, subkey)
self.statsd.timing(key, int(dt * 1000))

View File

@ -15,27 +15,25 @@
"""Common utilities used in testing"""
import errno
import glob
import logging
import os
import pymysql
import random
import re
import select
import string
import socket
import subprocess
import threading
import tempfile
import time
import uuid
import fixtures
import gear
import lockfile
import kazoo.client
import testtools
from nodepool import allocation, builder, fakeprovider, nodepool, nodedb, webapp
from nodepool import builder
from nodepool import launcher
from nodepool import webapp
from nodepool import zk
from nodepool.cmd.config_validator import ConfigValidator
@ -46,74 +44,6 @@ class LoggingPopen(subprocess.Popen):
pass
class FakeGearmanServer(gear.Server):
def __init__(self, port=0):
self.hold_jobs_in_queue = False
super(FakeGearmanServer, self).__init__(port)
def getJobForConnection(self, connection, peek=False):
for queue in [self.high_queue, self.normal_queue, self.low_queue]:
for job in queue:
if not hasattr(job, 'waiting'):
if job.name.startswith('build:'):
job.waiting = self.hold_jobs_in_queue
else:
job.waiting = False
if job.waiting:
continue
if job.name in connection.functions:
if not peek:
queue.remove(job)
connection.related_jobs[job.handle] = job
job.worker_connection = connection
job.running = True
return job
return None
def release(self, regex=None):
released = False
qlen = (len(self.high_queue) + len(self.normal_queue) +
len(self.low_queue))
self.log.debug("releasing queued job %s (%s)" % (regex, qlen))
for job in self.getQueue():
cmd, name = job.name.split(':')
if cmd != 'build':
continue
if not regex or re.match(regex, name):
self.log.debug("releasing queued job %s" %
job.unique)
job.waiting = False
released = True
else:
self.log.debug("not releasing queued job %s" %
job.unique)
if released:
self.wakeConnections()
qlen = (len(self.high_queue) + len(self.normal_queue) +
len(self.low_queue))
self.log.debug("done releasing queued jobs %s (%s)" % (regex, qlen))
class GearmanServerFixture(fixtures.Fixture):
def __init__(self, port=0):
self._port = port
def setUp(self):
super(GearmanServerFixture, self).setUp()
self.gearman_server = FakeGearmanServer(self._port)
self.addCleanup(self.shutdownGearman)
def shutdownGearman(self):
#TODO:greghaynes remove try once gear client protects against this
try:
self.gearman_server.shutdown()
except OSError as e:
if e.errno == errno.EBADF:
pass
else:
raise
class ZookeeperServerFixture(fixtures.Fixture):
def _setUp(self):
zk_host = os.environ.get('NODEPOOL_ZK_HOST', 'localhost')
@ -171,35 +101,38 @@ class ChrootedKazooFixture(fixtures.Fixture):
_tmp_client.close()
class GearmanClient(gear.Client):
def __init__(self):
super(GearmanClient, self).__init__(client_id='test_client')
self.__log = logging.getLogger("tests.GearmanClient")
class StatsdFixture(fixtures.Fixture):
def _setUp(self):
self.running = True
self.thread = threading.Thread(target=self.run)
self.thread.daemon = True
self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
self.sock.bind(('', 0))
self.port = self.sock.getsockname()[1]
self.wake_read, self.wake_write = os.pipe()
self.stats = []
self.thread.start()
self.addCleanup(self._cleanup)
def get_queued_image_jobs(self):
'Count the number of image-build and upload jobs queued.'
queued = 0
for connection in self.active_connections:
try:
req = gear.StatusAdminRequest()
connection.sendAdminRequest(req)
except Exception:
self.__log.exception("Exception while listing functions")
self._lostConnection(connection)
continue
for line in req.response.split('\n'):
parts = [x.strip() for x in line.split('\t')]
# parts[0] - function name
# parts[1] - total jobs queued (including building)
# parts[2] - jobs building
# parts[3] - workers registered
if not parts or parts[0] == '.':
continue
if (not parts[0].startswith('image-build:') and
not parts[0].startswith('image-upload:')):
continue
queued += int(parts[1])
return queued
def run(self):
while self.running:
poll = select.poll()
poll.register(self.sock, select.POLLIN)
poll.register(self.wake_read, select.POLLIN)
ret = poll.poll()
for (fd, event) in ret:
if fd == self.sock.fileno():
data = self.sock.recvfrom(1024)
if not data:
return
self.stats.append(data[0])
if fd == self.wake_read:
return
def _cleanup(self):
self.running = False
os.write(self.wake_write, b'1\n')
self.thread.join()
class BaseTestCase(testtools.TestCase):
@ -230,7 +163,10 @@ class BaseTestCase(testtools.TestCase):
logging.basicConfig(level=logging.DEBUG)
l = logging.getLogger('kazoo')
l.setLevel(logging.INFO)
l.propagate=False
l.propagate = False
l = logging.getLogger('stevedore')
l.setLevel(logging.INFO)
l.propagate = False
self.useFixture(fixtures.NestedTempfile())
self.subprocesses = []
@ -240,48 +176,46 @@ class BaseTestCase(testtools.TestCase):
self.subprocesses.append(p)
return p
self.statsd = StatsdFixture()
self.useFixture(self.statsd)
# note, use 127.0.0.1 rather than localhost to avoid getting ipv6
# see: https://github.com/jsocol/pystatsd/issues/61
os.environ['STATSD_HOST'] = '127.0.0.1'
os.environ['STATSD_PORT'] = str(self.statsd.port)
self.useFixture(fixtures.MonkeyPatch('subprocess.Popen',
LoggingPopenFactory))
self.setUpFakes()
def setUpFakes(self):
log = logging.getLogger("nodepool.test")
log.debug("set up fakes")
fake_client = fakeprovider.FakeOpenStackCloud()
def get_fake_client(*args, **kwargs):
return fake_client
clouds_path = os.path.join(os.path.dirname(__file__),
'fixtures', 'clouds.yaml')
self.useFixture(fixtures.MonkeyPatch(
'nodepool.provider_manager.ProviderManager._getClient',
get_fake_client))
self.useFixture(fixtures.MonkeyPatch(
'nodepool.nodepool._get_one_cloud',
fakeprovider.fake_get_one_cloud))
'os_client_config.config.CONFIG_FILES', [clouds_path]))
def wait_for_threads(self):
whitelist = ['APScheduler',
'MainThread',
# Wait until all transient threads (node launches, deletions,
# etc.) are all complete. Whitelist any long-running threads.
whitelist = ['MainThread',
'NodePool',
'NodePool Builder',
'NodeUpdateListener',
'Gearman client connect',
'Gearman client poll',
'fake-provider',
'fake-provider1',
'fake-provider2',
'fake-provider3',
'fake-dib-provider',
'fake-jenkins',
'fake-target',
'DiskImageBuilder queue',
'CleanupWorker',
'DeletedNodeWorker',
'pydevd.CommandThread',
'pydevd.Reader',
'pydevd.Writer',
]
while True:
done = True
for t in threading.enumerate():
if t.name.startswith("Thread-"):
# apscheduler thread pool
# Kazoo
continue
if t.name.startswith("worker "):
# paste web server
@ -292,93 +226,45 @@ class BaseTestCase(testtools.TestCase):
continue
if t.name.startswith("CleanupWorker"):
continue
if t.name.startswith("PoolWorker"):
continue
if t.name not in whitelist:
done = False
if done:
return
time.sleep(0.1)
def assertReportedStat(self, key, value=None, kind=None):
start = time.time()
while time.time() < (start + 5):
for stat in self.statsd.stats:
k, v = stat.decode('utf8').split(':')
if key == k:
if value is None and kind is None:
return
elif value:
if value == v:
return
elif kind:
if v.endswith('|' + kind):
return
time.sleep(0.1)
class AllocatorTestCase(object):
def setUp(self):
super(AllocatorTestCase, self).setUp()
self.agt = []
def test_allocator(self):
for i, amount in enumerate(self.results):
print self.agt[i]
for i, amount in enumerate(self.results):
self.assertEqual(self.agt[i].amount, amount,
'Error at pos %d, '
'expected %s and got %s' % (i, self.results,
[x.amount
for x in self.agt]))
class RoundRobinTestCase(object):
def setUp(self):
super(RoundRobinTestCase, self).setUp()
self.allocations = []
def test_allocator(self):
for i, label in enumerate(self.results):
self.assertEqual(self.results[i], self.allocations[i],
'Error at pos %d, '
'expected %s and got %s' % (i, self.results,
self.allocations))
class MySQLSchemaFixture(fixtures.Fixture):
def setUp(self):
super(MySQLSchemaFixture, self).setUp()
random_bits = ''.join(random.choice(string.ascii_lowercase +
string.ascii_uppercase)
for x in range(8))
self.name = '%s_%s' % (random_bits, os.getpid())
self.passwd = uuid.uuid4().hex
lock = lockfile.LockFile('/tmp/nodepool-db-schema-lockfile')
with lock:
db = pymysql.connect(host="localhost",
user="openstack_citest",
passwd="openstack_citest",
db="openstack_citest")
cur = db.cursor()
cur.execute("create database %s" % self.name)
cur.execute(
"grant all on %s.* to '%s'@'localhost' identified by '%s'" %
(self.name, self.name, self.passwd))
cur.execute("flush privileges")
self.dburi = 'mysql+pymysql://%s:%s@localhost/%s' % (self.name,
self.passwd,
self.name)
self.addDetail('dburi', testtools.content.text_content(self.dburi))
self.addCleanup(self.cleanup)
def cleanup(self):
lock = lockfile.LockFile('/tmp/nodepool-db-schema-lockfile')
with lock:
db = pymysql.connect(host="localhost",
user="openstack_citest",
passwd="openstack_citest",
db="openstack_citest")
cur = db.cursor()
cur.execute("drop database %s" % self.name)
cur.execute("drop user '%s'@'localhost'" % self.name)
cur.execute("flush privileges")
raise Exception("Key %s not found in reported stats" % key)
class BuilderFixture(fixtures.Fixture):
def __init__(self, configfile, cleanup_interval):
def __init__(self, configfile, cleanup_interval, securefile=None):
super(BuilderFixture, self).__init__()
self.configfile = configfile
self.securefile = securefile
self.cleanup_interval = cleanup_interval
self.builder = None
def setUp(self):
super(BuilderFixture, self).setUp()
self.builder = builder.NodePoolBuilder(self.configfile)
self.builder = builder.NodePoolBuilder(
self.configfile, secure_path=self.securefile)
self.builder.cleanup_interval = self.cleanup_interval
self.builder.build_interval = .1
self.builder.upload_interval = .1
@ -394,15 +280,6 @@ class DBTestCase(BaseTestCase):
def setUp(self):
super(DBTestCase, self).setUp()
self.log = logging.getLogger("tests")
f = MySQLSchemaFixture()
self.useFixture(f)
self.dburi = f.dburi
self.secure_conf = self._setup_secure()
gearman_fixture = GearmanServerFixture()
self.useFixture(gearman_fixture)
self.gearman_server = gearman_fixture.gearman_server
self.setupZK()
def setup_config(self, filename, images_dir=None):
@ -412,13 +289,13 @@ class DBTestCase(BaseTestCase):
configfile = os.path.join(os.path.dirname(__file__),
'fixtures', filename)
(fd, path) = tempfile.mkstemp()
with open(configfile) as conf_fd:
config = conf_fd.read()
os.write(fd, config.format(images_dir=images_dir.path,
gearman_port=self.gearman_server.port,
zookeeper_host=self.zookeeper_host,
zookeeper_port=self.zookeeper_port,
zookeeper_chroot=self.zookeeper_chroot))
with open(configfile, 'rb') as conf_fd:
config = conf_fd.read().decode('utf8')
data = config.format(images_dir=images_dir.path,
zookeeper_host=self.zookeeper_host,
zookeeper_port=self.zookeeper_port,
zookeeper_chroot=self.zookeeper_chroot)
os.write(fd, data.encode('utf8'))
os.close(fd)
self._config_images_dir = images_dir
validator = ConfigValidator(path)
@ -430,14 +307,18 @@ class DBTestCase(BaseTestCase):
new_configfile = self.setup_config(filename, self._config_images_dir)
os.rename(new_configfile, configfile)
def _setup_secure(self):
def setup_secure(self, filename):
# replace entries in secure.conf
configfile = os.path.join(os.path.dirname(__file__),
'fixtures', 'secure.conf')
'fixtures', filename)
(fd, path) = tempfile.mkstemp()
with open(configfile) as conf_fd:
config = conf_fd.read()
os.write(fd, config.format(dburi=self.dburi))
with open(configfile, 'rb') as conf_fd:
config = conf_fd.read().decode('utf8')
data = config.format(
zookeeper_host=self.zookeeper_host,
zookeeper_port=self.zookeeper_port,
zookeeper_chroot=self.zookeeper_chroot)
os.write(fd, data.encode('utf8'))
os.close(fd)
return path
@ -527,35 +408,65 @@ class DBTestCase(BaseTestCase):
self.wait_for_threads()
def waitForNodes(self, pool):
self.wait_for_config(pool)
allocation_history = allocation.AllocationHistory()
def waitForNodeDeletion(self, node):
while True:
exists = False
for n in self.zk.nodeIterator():
if node.id == n.id:
exists = True
break
if not exists:
break
time.sleep(1)
def waitForInstanceDeletion(self, manager, instance_id):
while True:
servers = manager.listNodes()
if not (instance_id in [s.id for s in servers]):
break
time.sleep(1)
def waitForNodeRequestLockDeletion(self, request_id):
while True:
exists = False
for lock_id in self.zk.getNodeRequestLockIDs():
if request_id == lock_id:
exists = True
break
if not exists:
break
time.sleep(1)
def waitForNodes(self, label, count=1):
while True:
self.wait_for_threads()
with pool.getDB().getSession() as session:
needed = pool.getNeededNodes(session, allocation_history)
if not needed:
nodes = session.getNodes(state=nodedb.BUILDING)
if not nodes:
break
ready_nodes = self.zk.getReadyNodesOfTypes([label])
if label in ready_nodes and len(ready_nodes[label]) == count:
break
time.sleep(1)
self.wait_for_threads()
return ready_nodes[label]
def waitForJobs(self):
# XXX:greghaynes - There is a very narrow race here where nodepool
# is who actually updates the database so this may return before the
# image rows are updated.
client = GearmanClient()
client.addServer('localhost', self.gearman_server.port)
client.waitForServer()
def waitForNodeRequest(self, req, states=None):
'''
Wait for a node request to transition to a final state.
'''
if states is None:
states = (zk.FULFILLED, zk.FAILED)
while True:
req = self.zk.getNodeRequest(req.id)
if req.state in states:
break
time.sleep(1)
while client.get_queued_image_jobs() > 0:
time.sleep(.2)
client.shutdown()
return req
def useNodepool(self, *args, **kwargs):
args = (self.secure_conf,) + args
pool = nodepool.NodePool(*args, **kwargs)
secure_conf = kwargs.pop('secure_conf', None)
args = (secure_conf,) + args
pool = launcher.NodePool(*args, **kwargs)
pool.cleanup_interval = .5
pool.delete_interval = .5
self.addCleanup(pool.stop)
return pool
@ -564,8 +475,10 @@ class DBTestCase(BaseTestCase):
self.addCleanup(app.stop)
return app
def _useBuilder(self, configfile, cleanup_interval=.5):
self.useFixture(BuilderFixture(configfile, cleanup_interval))
def useBuilder(self, configfile, securefile=None, cleanup_interval=.5):
self.useFixture(
BuilderFixture(configfile, cleanup_interval, securefile)
)
def setupZK(self):
f = ZookeeperServerFixture()
@ -587,8 +500,8 @@ class DBTestCase(BaseTestCase):
def printZKTree(self, node):
def join(a, b):
if a.endswith('/'):
return a+b
return a+'/'+b
return a + b
return a + '/' + b
data, stat = self.zk.client.get(node)
self.log.debug("Node: %s" % (node,))

15
nodepool/tests/fixtures/clouds.yaml vendored Normal file
View File

@ -0,0 +1,15 @@
clouds:
fake:
auth:
username: 'fake'
password: 'fake'
project_id: 'fake'
auth_url: 'fake'
fake-vhd:
auth:
username: 'fake'
password: 'fake'
project_id: 'fake'
auth_url: 'fake'
image_format: 'vhd'

View File

@ -1,21 +1,9 @@
elements-dir: /etc/nodepool/elements
images-dir: /opt/nodepool_dib
cron:
cleanup: '*/1 * * * *'
check: '*/15 * * * *'
zmq-publishers:
- tcp://jenkins01.openstack.org:8888
- tcp://jenkins02.openstack.org:8888
- tcp://jenkins03.openstack.org:8888
- tcp://jenkins04.openstack.org:8888
- tcp://jenkins05.openstack.org:8888
- tcp://jenkins06.openstack.org:8888
- tcp://jenkins07.openstack.org:8888
gearman-servers:
- host: zuul.openstack.org
webapp:
port: 8005
listen_address: '0.0.0.0'
zookeeper-servers:
- host: zk1.openstack.org
@ -24,60 +12,69 @@ zookeeper-servers:
labels:
- name: trusty
image: trusty
ready-script: configure_mirror.sh
max-ready-age: 3600
min-ready: 1
providers:
- name: cloud1
- name: cloud2
- name: trusty-2-node
image: trusty
ready-script: multinode_setup.sh
subnodes: 1
min-ready: 0
providers:
- name: cloud1
- name: cloud2
- name: trusty-external
min-ready: 1
providers:
- name: cloud1
driver: openstack
cloud: vanilla-cloud
region-name: 'vanilla'
service-type: 'compute'
service-name: 'cloudServersOpenStack'
username: '<%= username %>'
password: '<%= password %>'
project-id: '<%= project %>'
auth-url: 'https://identity.example.com/v2.0/'
boot-timeout: 120
max-servers: 184
max-concurrency: 10
launch-retries: 3
rate: 0.001
images:
diskimages:
- name: trusty
min-ram: 8192
username: jenkins
user-home: /home/jenkins
private-key: /home/nodepool/.ssh/id_rsa
pools:
- name: main
max-servers: 184
auto-floating-ip: True
labels:
- name: trusty
diskimage: trusty
min-ram: 8192
console-log: True
- name: trusty-2-node
diskimage: trusty
min-ram: 8192
boot-from-volume: True
volume-size: 100
- name: cloud2
driver: openstack
cloud: chocolate-cloud
region-name: 'chocolate'
service-type: 'compute'
service-name: 'cloudServersOpenStack'
username: '<%= username %>'
password: '<%= password %>'
project-id: '<%= project %>'
auth-url: 'https://identity.example.com/v2.0/'
boot-timeout: 120
max-servers: 184
rate: 0.001
images:
diskimages:
- name: trusty
pause: False
min-ram: 8192
username: jenkins
user-home: /home/jenkins
private-key: /home/nodepool/.ssh/id_rsa
targets:
- name: zuul
connection-type: ssh
cloud-images:
- name: trusty-unmanaged
config-drive: true
- name: windows-unmanaged
username: winzuul
connection-type: winrm
pools:
- name: main
max-servers: 184
auto-floating-ip: False
labels:
- name: trusty
diskimage: trusty
min-ram: 8192
- name: trusty-2-node
diskimage: trusty
min-ram: 8192
- name: trusty-external
cloud-image: trusty-unmanaged
min-ram: 8192
diskimages:
- name: trusty

View File

@ -1,22 +1,6 @@
elements-dir: /etc/nodepool/elements
images-dir: /opt/nodepool_dib
cron:
cleanup: '*/1 * * * *'
check: '*/15 * * * *'
zmq-publishers:
- tcp://jenkins01.openstack.org:8888
- tcp://jenkins02.openstack.org:8888
- tcp://jenkins03.openstack.org:8888
- tcp://jenkins04.openstack.org:8888
- tcp://jenkins05.openstack.org:8888
- tcp://jenkins06.openstack.org:8888
- tcp://jenkins07.openstack.org:8888
gearman-servers:
- host: zuul.openstack.org
zookeeper-servers:
- host: zk1.openstack.org
port: 2181
@ -25,15 +9,12 @@ zookeeper-servers:
labels:
- name: trusty
image: trusty
ready-script: configure_mirror.sh
min-ready: 1
providers:
- name: cloud1
- name: cloud2
- name: trusty-2-node
image: trusty
ready-script: multinode_setup.sh
subnodes: 1
min-ready: 0
providers:
- name: cloud1
@ -42,39 +23,20 @@ labels:
providers:
- name: cloud1
region-name: 'vanilla'
service-type: 'compute'
service-name: 'cloudServersOpenStack'
username: '<%= username %>'
password: '<%= password %>'
project-id: '<%= project %>'
auth-url: 'https://identity.example.com/v2.0/'
boot-timeout: 120
max-servers: 184
rate: 0.001
images:
- name: trusty
min-ram: 8192
username: jenkins
private-key: /home/nodepool/.ssh/id_rsa
- name: cloud2
region-name: 'chocolate'
service-type: 'compute'
service-name: 'cloudServersOpenStack'
username: '<%= username %>'
password: '<%= password %>'
project-id: '<%= project %>'
auth-url: 'https://identity.example.com/v2.0/'
boot-timeout: 120
max-servers: 184
rate: 0.001
images:
- name: trusty
min-ram: 8192
username: jenkins
private-key: /home/nodepool/.ssh/id_rsa
targets:
- name: zuul
diskimages:
- name: trusty

View File

@ -1,52 +0,0 @@
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: localhost
labels:
- name: real-label
image: fake-image
min-ready: 1
providers:
- name: real-provider
providers:
- name: real-provider
region-name: real-region
username: 'real'
password: 'real'
auth-url: 'real'
project-id: 'real'
max-servers: 96
pool: 'real'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
- name: fake-image
min-ram: 8192
name-filter: 'Real'
meta:
key: value
key2: value
targets:
- name: fake-target
jenkins:
url: https://jenkins.example.org/
user: fake
apikey: fake
diskimages:
- name: fake-image

View File

@ -0,0 +1,29 @@
images-dir: '{images_dir}'
zookeeper-servers:
- host: localhost
labels:
- name: real-label
min-ready: 1
providers:
- name: real-provider
region-name: real-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
labels:
- name: real-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Real'
diskimages:
- name: fake-image

View File

@ -0,0 +1,29 @@
images-dir: '{images_dir}'
zookeeper-servers:
- host: localhost
labels:
- name: fake-label
min-ready: 1
providers:
- name: real-provider
cloud: real-cloud
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Real'
diskimages:
- name: fake-image

View File

@ -1,44 +0,0 @@
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: localhost
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: real-provider
providers:
- name: real-provider
cloud: real-cloud
max-servers: 96
pool: 'real'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
- name: fake-image
min-ram: 8192
name-filter: 'Real'
meta:
key: value
key2: value
targets:
- name: fake-target
diskimages:
- name: fake-image

View File

@ -0,0 +1,53 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,63 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
- name: fake-label2
min-ready: 0
- name: fake-label3
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 1
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-label2
diskimage: fake-image
min-ram: 8192
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 1
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-label3
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,39 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '* * * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,33 +8,23 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '* * * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,34 +8,24 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
nodepool-id: foo
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -0,0 +1,57 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label1
min-ready: 1
- name: fake-label2
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: pool1
max-servers: 1
availability-zones:
- az1
labels:
- name: fake-label1
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
- name: pool2
max-servers: 1
availability-zones:
- az2
labels:
- name: fake-label2
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,33 +8,31 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image

View File

@ -0,0 +1,84 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label1
min-ready: 1
- name: fake-label2
min-ready: 1
- name: fake-label3
min-ready: 1
providers:
- name: fake-provider1
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
networks:
- 'some-name'
auto-floating-ip: False
labels:
- name: fake-label1
diskimage: fake-image
min-ram: 8192
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
networks:
- 'some-name'
auto-floating-ip: True
labels:
- name: fake-label2
diskimage: fake-image
min-ram: 8192
- name: fake-provider3
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
networks:
- 'some-name'
# Test default value of auto-floating-ip is True
labels:
- name: fake-label3
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,35 +8,29 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
availability-zones:
- az1
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
availability-zones:
- az1
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,40 +8,32 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 2
providers:
- name: fake-provider
- name: multi-fake
image: fake-image
ready-script: multinode_setup.sh
subnodes: 2
min-ready: 2
providers:
- name: fake-provider
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
boot-from-volume: True
diskimages:
- name: fake-image

View File

@ -1,16 +1,5 @@
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -18,54 +7,46 @@ zookeeper-servers:
labels:
- name: fake-label1
image: fake-image1
min-ready: 1
providers:
- name: fake-provider1
- name: fake-label2
image: fake-image2
min-ready: 1
providers:
- name: fake-provider2
providers:
- name: fake-provider1
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
cloud: fake
driver: fake
rate: 0.0001
images:
diskimages:
- name: fake-image1
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
- name: fake-provider2
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
- name: fake-image2
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
labels:
- name: fake-label1
diskimage: fake-image1
min-ram: 8192
flavor-name: 'fake'
targets:
- name: fake-target
- name: fake-provider2
cloud: fake
driver: fake
rate: 0.0001
diskimages:
- name: fake-image2
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
labels:
- name: fake-label2
diskimage: fake-image2
min-ram: 8192
flavor-name: 'fake'
diskimages:
- name: fake-image1

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,33 +8,27 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 0
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'fake'
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,33 +8,27 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'fake'
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -21,8 +10,6 @@ labels: []
providers: []
targets: []
diskimages:
- name: fake-image
formats:

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,40 +8,32 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
- name: fake-label2
image: fake-image2
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
- name: fake-image2
min-ram: 8192
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-label2
diskimage: fake-image2
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -0,0 +1,39 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
flavor-name: Fake Flavor
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,41 +8,33 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
- name: fake-label2
image: fake-image2
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
pause: True
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
- name: fake-image2
min-ram: 8192
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
min-ram: 8192
diskimage: fake-image
- name: fake-label2
diskimage: fake-image2
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,85 +8,47 @@ zookeeper-servers:
labels:
- name: fake-label1
image: fake-image
min-ready: 1
providers:
- name: fake-provider1
- name: fake-label2
image: fake-image
min-ready: 1
providers:
- name: fake-provider2
- name: fake-label3
image: fake-image
min-ready: 1
providers:
- name: fake-provider3
providers:
- name: fake-provider1
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'ipv6-uuid'
ipv6-preferred: True
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
networks:
# This activates a flag in fakeprovider to give us an ipv6
# network
- 'fake-ipv6-network-name'
labels:
- name: fake-label1
diskimage: fake-image
min-ram: 8192
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'ipv6-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
- name: fake-provider3
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
ipv6-preferred: True
rate: 0.0001
images:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
networks:
- 'some-name'
labels:
- name: fake-label2
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -0,0 +1,49 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,40 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
launch-retries: 2
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,41 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
availability-zones:
- az1
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,54 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label1
min-ready: 1
- name: fake-label2
min-ready: 1
- name: fake-label3
min-ready: 1
- name: fake-label4
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label1
diskimage: fake-image
min-ram: 8192
- name: fake-label2
diskimage: fake-image
min-ram: 8192
- name: fake-label3
diskimage: fake-image
min-ram: 8192
- name: fake-label4
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,48 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
max-ready-age: 2
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,47 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 1
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,35 +8,26 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- name: 'fake-public-network-name'
public: true
- name: 'fake-private-network-name'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
networks:
- 'fake-public-network-name'
- 'fake-private-network-name'
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -0,0 +1,47 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,39 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 20
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,39 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-cores: 8
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,39 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 2
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,39 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-ram: 16384
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,40 +8,29 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
- name: fake-label2
image: fake-image2
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
- name: fake-image2
min-ram: 8192
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-label2
diskimage: fake-image2
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,33 +8,23 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,52 +8,37 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
- name: fake-provider2
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,45 +8,29 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
- name: fake-provider2
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images: []
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
targets:
- name: fake-target
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image

View File

@ -0,0 +1,35 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
- name: fake-label-windows
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
cloud-images:
- name: fake-image
- name: fake-image-windows
username: zuul
connection-type: winrm
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
cloud-image: fake-image
min-ram: 8192
- name: fake-label-windows
cloud-image: fake-image-windows
min-ram: 8192

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,53 +8,40 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 2
providers:
- name: fake-provider1
- name: fake-provider2
providers:
- name: fake-provider1
cloud: fake
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 1
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
SHOULD_FAIL: 'true'
- name: fake-provider2
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 2
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
pools:
- name: main
max-servers: 2
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
targets:
- name: fake-target
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 2
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,34 +8,23 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
cloud: fake-vhd
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
image-type: vhd
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -1,17 +1,6 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '*/1 * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
@ -19,54 +8,38 @@ zookeeper-servers:
labels:
- name: fake-label
image: fake-image
min-ready: 2
providers:
- name: fake-provider1
- name: fake-provider2
providers:
- name: fake-provider1
cloud: fake-vhd
driver: fake
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 1
pool: 'fake'
image-type: vhd
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
diskimages:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
- name: fake-provider2
region-name: fake-region
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 1
pool: 'fake'
image-type: qcow2
networks:
- net-id: 'some-uuid'
rate: 0.0001
images:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
pools:
- name: main
max-servers: 2
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
targets:
- name: fake-target
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 2
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image

View File

@ -0,0 +1,47 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 2
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,47 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 1
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -1,8 +0,0 @@
[database]
dburi={dburi}
[jenkins "fake-target"]
user=fake
apikey=fake
credentials=fake
url=http://fake-url

View File

@ -0,0 +1,47 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: invalid_host
port: 1
chroot: invalid_chroot
labels:
- name: fake-label
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,4 @@
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}

View File

@ -0,0 +1,28 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
cloud-images:
- name: fake-image
image-name: provider-named-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
cloud-image: fake-image
min-ram: 8192

3
nodepool/tests/fixtures/webapp.yaml vendored Normal file
View File

@ -0,0 +1,3 @@
webapp:
port: 8080
listen_address: '127.0.0.1'

View File

@ -1,444 +0,0 @@
# Copyright (C) 2014 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import testscenarios
from nodepool import tests
from nodepool import allocation
class OneLabel(tests.AllocatorTestCase, tests.BaseTestCase):
"""The simplest case: one each of providers, labels, and
targets.
Result AGT is:
* label1 from provider1
"""
scenarios = [
('one_node',
dict(provider1=10, label1=1, results=[1])),
('two_nodes',
dict(provider1=10, label1=2, results=[2])),
]
def setUp(self):
super(OneLabel, self).setUp()
ap1 = allocation.AllocationProvider('provider1', self.provider1)
at1 = allocation.AllocationTarget('target1')
ar1 = allocation.AllocationRequest('label1', self.label1)
ar1.addTarget(at1, 0)
self.agt.append(ar1.addProvider(ap1, at1, 0)[1])
ap1.makeGrants()
class TwoLabels(tests.AllocatorTestCase, tests.BaseTestCase):
"""Two labels from one provider.
Result AGTs are:
* label1 from provider1
* label1 from provider2
"""
scenarios = [
('one_node',
dict(provider1=10, label1=1, label2=1, results=[1, 1])),
('two_nodes',
dict(provider1=10, label1=2, label2=2, results=[2, 2])),
]
def setUp(self):
super(TwoLabels, self).setUp()
ap1 = allocation.AllocationProvider('provider1', self.provider1)
at1 = allocation.AllocationTarget('target1')
ar1 = allocation.AllocationRequest('label1', self.label1)
ar2 = allocation.AllocationRequest('label2', self.label2)
ar1.addTarget(at1, 0)
ar2.addTarget(at1, 0)
self.agt.append(ar1.addProvider(ap1, at1, 0)[1])
self.agt.append(ar2.addProvider(ap1, at1, 0)[1])
ap1.makeGrants()
class TwoProvidersTwoLabels(tests.AllocatorTestCase, tests.BaseTestCase):
"""Two labels, each of which is supplied by both providers.
Result AGTs are:
* label1 from provider1
* label2 from provider1
* label1 from provider2
* label2 from provider2
"""
scenarios = [
('one_node',
dict(provider1=10, provider2=10, label1=1, label2=1,
results=[1, 1, 0, 0])),
('two_nodes',
dict(provider1=10, provider2=10, label1=2, label2=2,
results=[1, 1, 1, 1])),
('three_nodes',
dict(provider1=10, provider2=10, label1=3, label2=3,
results=[2, 2, 1, 1])),
('four_nodes',
dict(provider1=10, provider2=10, label1=4, label2=4,
results=[2, 2, 2, 2])),
('four_nodes_at_quota',
dict(provider1=4, provider2=4, label1=4, label2=4,
results=[2, 2, 2, 2])),
('four_nodes_over_quota',
dict(provider1=2, provider2=2, label1=4, label2=4,
results=[1, 1, 1, 1])),
('negative_provider',
dict(provider1=-5, provider2=20, label1=5, label2=5,
results=[0, 0, 5, 5])),
]
def setUp(self):
super(TwoProvidersTwoLabels, self).setUp()
ap1 = allocation.AllocationProvider('provider1', self.provider1)
ap2 = allocation.AllocationProvider('provider2', self.provider2)
at1 = allocation.AllocationTarget('target1')
ar1 = allocation.AllocationRequest('label1', self.label1)
ar2 = allocation.AllocationRequest('label2', self.label2)
ar1.addTarget(at1, 0)
ar2.addTarget(at1, 0)
self.agt.append(ar1.addProvider(ap1, at1, 0)[1])
self.agt.append(ar2.addProvider(ap1, at1, 0)[1])
self.agt.append(ar1.addProvider(ap2, at1, 0)[1])
self.agt.append(ar2.addProvider(ap2, at1, 0)[1])
ap1.makeGrants()
ap2.makeGrants()
class TwoProvidersTwoLabelsOneShared(tests.AllocatorTestCase,
tests.BaseTestCase):
"""One label is served by both providers, the other can only come
from one. This tests that the allocator uses the diverse provider
to supply the label that can come from either while reserving
nodes from the more restricted provider for the label that can
only be supplied by it.
label1 is supplied by provider1 and provider2.
label2 is supplied only by provider2.
Result AGTs are:
* label1 from provider1
* label2 from provider1
* label2 from provider2
"""
scenarios = [
('one_node',
dict(provider1=10, provider2=10, label1=1, label2=1,
results=[1, 1, 0])),
('two_nodes',
dict(provider1=10, provider2=10, label1=2, label2=2,
results=[2, 1, 1])),
('three_nodes',
dict(provider1=10, provider2=10, label1=3, label2=3,
results=[3, 2, 1])),
('four_nodes',
dict(provider1=10, provider2=10, label1=4, label2=4,
results=[4, 2, 2])),
('four_nodes_at_quota',
dict(provider1=4, provider2=4, label1=4, label2=4,
results=[4, 0, 4])),
('four_nodes_over_quota',
dict(provider1=2, provider2=2, label1=4, label2=4,
results=[2, 0, 2])),
]
def setUp(self):
super(TwoProvidersTwoLabelsOneShared, self).setUp()
ap1 = allocation.AllocationProvider('provider1', self.provider1)
ap2 = allocation.AllocationProvider('provider2', self.provider2)
at1 = allocation.AllocationTarget('target1')
ar1 = allocation.AllocationRequest('label1', self.label1)
ar2 = allocation.AllocationRequest('label2', self.label2)
ar1.addTarget(at1, 0)
ar2.addTarget(at1, 0)
self.agt.append(ar1.addProvider(ap1, at1, 0)[1])
self.agt.append(ar2.addProvider(ap1, at1, 0)[1])
self.agt.append(ar2.addProvider(ap2, at1, 0)[1])
ap1.makeGrants()
ap2.makeGrants()
class RoundRobinAllocation(tests.RoundRobinTestCase, tests.BaseTestCase):
"""Test the round-robin behaviour of the AllocationHistory object to
ensure fairness of distribution
"""
scenarios = [
# * one_to_one
#
# test that with only one node available we cycle through the
# available labels.
#
# There's a slight trick with the ordering here; makeGrants()
# algorithm allocates proportionally from the available nodes
# (i.e. if there's allocations for 100 and 50, then the first
# gets twice as many of the available nodes than the second).
# The algorithm is
#
# 1) add up all your peer requests
# 2) calculate your ratio = (your_request / all_peers)
# 3) multiples that ratio by the available nodes
# 4) take the floor() (you can only allocate a whole node)
#
# So we've got 8 total requests, each requesting one node:
#
# label1 = 1/7 other requests = 0.142 * 1 available node = 0
# label2 = 1/6 other requests = 0.166 * 1 available node = 0
# label3 = 1/4 other requests = 0.25 * 1 available node = 0
# ...
# label7 = 1/1 other requests = 1 * 1 available node = 1
#
# ergo label7 is the first to be granted its request. Thus we
# start the round-robin from there
('one_to_one',
dict(provider1=1, provider2=0,
label1=1, label2=1, label3=1, label4=1,
label5=1, label6=1, label7=1, label8=1,
results=['label7',
'label1',
'label2',
'label3',
'label4',
'label5',
'label6',
'label8',
'label7',
'label1',
'label2'])),
# * at_quota
#
# Test that when at quota, every node gets allocated on every
# round; i.e. nobody ever misses out. odds go to ap1, even to
# ap2
('at_quota',
dict(provider1=4, provider2=4,
label1=1, label2=1, label3=1, label4=1,
label5=1, label6=1, label7=1, label8=1,
results=[
'label1', 'label3', 'label5', 'label7',
'label2', 'label4', 'label6', 'label8'] * 11
)),
# * big_fish_little_pond
#
# In this test we have one label that far outweighs the other.
# From the description of the ratio allocation above, it can
# swamp the allocation pool and not allow other nodes to come
# online.
#
# Here with two nodes, we check that one node is dedicated to
# the larger label request, but the second node cycles through
# the smaller requests.
('big_fish_little_pond',
dict(provider1=1, provider2=1,
label1=100, label2=1, label3=1, label4=1,
label5=1, label6=1, label7=1, label8=1,
# provider1 provider2
results=['label1', 'label1', # round 1
'label1', 'label2', # round 2
'label1', 'label3', # ...
'label1', 'label4',
'label1', 'label5',
'label1', 'label6',
'label1', 'label7',
'label1', 'label8',
'label1', 'label2',
'label1', 'label3',
'label1', 'label4'])),
]
def setUp(self):
super(RoundRobinAllocation, self).setUp()
ah = allocation.AllocationHistory()
def do_it():
ap1 = allocation.AllocationProvider('provider1', self.provider1)
ap2 = allocation.AllocationProvider('provider2', self.provider2)
at1 = allocation.AllocationTarget('target1')
ars = []
ars.append(allocation.AllocationRequest('label1', self.label1, ah))
ars.append(allocation.AllocationRequest('label2', self.label2, ah))
ars.append(allocation.AllocationRequest('label3', self.label3, ah))
ars.append(allocation.AllocationRequest('label4', self.label4, ah))
ars.append(allocation.AllocationRequest('label5', self.label5, ah))
ars.append(allocation.AllocationRequest('label6', self.label6, ah))
ars.append(allocation.AllocationRequest('label7', self.label7, ah))
ars.append(allocation.AllocationRequest('label8', self.label8, ah))
# each request to one target, and can be satisfied by both
# providers
for ar in ars:
ar.addTarget(at1, 0)
ar.addProvider(ap1, at1, 0)
ar.addProvider(ap2, at1, 0)
ap1.makeGrants()
for g in ap1.grants:
self.allocations.append(g.request.name)
ap2.makeGrants()
for g in ap2.grants:
self.allocations.append(g.request.name)
ah.grantsDone()
# run the test several times to make sure we bounce around
# enough
for i in range(0, 11):
do_it()
class RoundRobinFixedProvider(tests.RoundRobinTestCase, tests.BaseTestCase):
"""Test that round-robin behaviour exists when we have a more complex
situation where some nodes can only be provided by some providers
* label1 is only able to be allocated from provider1
* label8 is only able to be allocated from provider2
"""
scenarios = [
# * fixed_even
#
# What we see below is an edge case:
#
# Below, label1 always gets chosen because for provider1.
# This is because label1 is requesting 1.0 nodes (it can only
# run on provider1) and all the other labels are requesting
# only 0.5 of a node (they can run on either and no
# allocations have been made yet). We do actually grant in a
# round-robin fashion, but int(0.5) == 0 so no node gets
# allocated. We fall back to the ratio calculation and label1
# wins.
#
# However, after provider1.makeGrants(), the other labels
# increase their request on the remaning provider2 to their
# full 1.0 nodes. Now the "fight" starts and we allocate in
# the round-robin fashion.
('fixed_even',
dict(provider1=1, provider2=1,
label1=1, label2=1, label3=1, label4=1,
label5=1, label6=1, label7=1, label8=1,
# provider1 provider2
results=['label1', 'label6', # round 1
'label1', 'label8', # round 2
'label1', 'label2', # ...
'label1', 'label3',
'label1', 'label4',
'label1', 'label5',
'label1', 'label7',
'label1', 'label6',
'label1', 'label8',
'label1', 'label2',
'label1', 'label3'])),
# * over_subscribed
#
# In contrast to above, any grant made will be satisfied. We
# see that the fixed node label1 and label8 do not get as full
# a share as the non-fixed nodes -- but they do round-robin
# with the other requests. Fixing this is left as an exercise
# for the reader :)
('over_subscribed',
dict(provider1=1, provider2=1,
label1=20, label2=20, label3=20, label4=20,
label5=20, label6=20, label7=20, label8=20,
results=['label1', 'label6',
'label2', 'label8',
'label3', 'label3',
'label4', 'label4',
'label5', 'label5',
'label7', 'label7',
'label1', 'label6',
'label2', 'label8',
'label3', 'label3',
'label4', 'label4',
'label5', 'label5'])),
# * even
#
# When there's enough nodes to go around, we expect everyone
# to be fully satisifed with label1 on provider1 and label8
# on provider2 as required
('even',
dict(provider1=4, provider2=4,
label1=1, label2=1, label3=1, label4=1,
label5=1, label6=1, label7=1, label8=1,
results=[
'label1', 'label2', 'label4', 'label6',
'label8', 'label3', 'label5', 'label7'] * 11))]
def setUp(self):
super(RoundRobinFixedProvider, self).setUp()
ah = allocation.AllocationHistory()
def do_it():
ap1 = allocation.AllocationProvider('provider1', self.provider1)
ap2 = allocation.AllocationProvider('provider2', self.provider2)
at1 = allocation.AllocationTarget('target1')
ars = []
ars.append(allocation.AllocationRequest('label1', self.label1, ah))
ars.append(allocation.AllocationRequest('label2', self.label2, ah))
ars.append(allocation.AllocationRequest('label3', self.label3, ah))
ars.append(allocation.AllocationRequest('label4', self.label4, ah))
ars.append(allocation.AllocationRequest('label5', self.label5, ah))
ars.append(allocation.AllocationRequest('label6', self.label6, ah))
ars.append(allocation.AllocationRequest('label7', self.label7, ah))
ars.append(allocation.AllocationRequest('label8', self.label8, ah))
# first ar can only go to provider1, the last only to
# provider2
ars[0].addTarget(at1, 0)
ars[0].addProvider(ap1, at1, 0)
ars[-1].addTarget(at1, 0)
ars[-1].addProvider(ap2, at1, 0)
# the rest can go anywhere
for ar in ars[1:-1]:
ar.addTarget(at1, 0)
ar.addProvider(ap1, at1, 0)
ar.addProvider(ap2, at1, 0)
ap1.makeGrants()
for g in ap1.grants:
self.allocations.append(g.request.name)
ap2.makeGrants()
for g in ap2.grants:
self.allocations.append(g.request.name)
ah.grantsDone()
# run the test several times to make sure we bounce around
# enough
for i in range(0, 11):
do_it()
def load_tests(loader, in_tests, pattern):
return testscenarios.load_tests_apply_scenarios(loader, in_tests, pattern)

View File

@ -14,9 +14,11 @@
# limitations under the License.
import os
import uuid
import fixtures
from nodepool import builder, exceptions, fakeprovider, tests
from nodepool import builder, exceptions, tests
from nodepool.driver.fake import provider as fakeprovider
from nodepool import zk
@ -84,7 +86,9 @@ class TestNodepoolBuilderDibImage(tests.BaseTestCase):
image = builder.DibImageFile('myid1234')
self.assertRaises(exceptions.BuilderError, image.to_path, '/imagedir/')
class TestNodePoolBuilder(tests.DBTestCase):
def test_start_stop(self):
config = self.setup_config('node.yaml')
nb = builder.NodePoolBuilder(config)
@ -94,6 +98,18 @@ class TestNodePoolBuilder(tests.DBTestCase):
nb.start()
nb.stop()
def test_builder_id_file(self):
configfile = self.setup_config('node.yaml')
self.useBuilder(configfile)
path = os.path.join(self._config_images_dir.path, 'builder_id.txt')
# Validate the unique ID file exists and contents are what we expect
self.assertTrue(os.path.exists(path))
with open(path, "r") as f:
the_id = f.read()
obj = uuid.UUID(the_id, version=4)
self.assertEqual(the_id, str(obj))
def test_image_upload_fail(self):
"""Test that image upload fails are handled properly."""
@ -104,20 +120,18 @@ class TestNodePoolBuilder(tests.DBTestCase):
return fake_client
self.useFixture(fixtures.MonkeyPatch(
'nodepool.provider_manager.FakeProviderManager._getClient',
'nodepool.driver.fake.provider.FakeProvider._getClient',
get_fake_client))
self.useFixture(fixtures.MonkeyPatch(
'nodepool.nodepool._get_one_cloud',
fakeprovider.fake_get_one_cloud))
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
# NOTE(pabelanger): Disable CleanupWorker thread for nodepool-builder
# as we currently race it to validate our failed uploads.
self._useBuilder(configfile, cleanup_interval=0)
self.useBuilder(configfile, cleanup_interval=0)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
self.waitForNodes(pool)
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
newest_builds = self.zk.getMostRecentBuilds(1, 'fake-image',
state=zk.READY)
@ -129,32 +143,33 @@ class TestNodePoolBuilder(tests.DBTestCase):
def test_provider_addition(self):
configfile = self.setup_config('node.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
self.replace_config(configfile, 'node_two_provider.yaml')
self.waitForImage('fake-provider2', 'fake-image')
def test_provider_removal(self):
configfile = self.setup_config('node_two_provider.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
self.waitForImage('fake-provider2', 'fake-image')
image = self.zk.getMostRecentImageUpload('fake-provider', 'fake-image')
self.replace_config(configfile, 'node_two_provider_remove.yaml')
self.waitForImageDeletion('fake-provider2', 'fake-image')
image2 = self.zk.getMostRecentImageUpload('fake-provider', 'fake-image')
image2 = self.zk.getMostRecentImageUpload('fake-provider',
'fake-image')
self.assertEqual(image, image2)
def test_image_addition(self):
configfile = self.setup_config('node.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
self.replace_config(configfile, 'node_two_image.yaml')
self.waitForImage('fake-provider', 'fake-image2')
def test_image_removal(self):
configfile = self.setup_config('node_two_image.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
self.waitForImage('fake-provider', 'fake-image2')
self.replace_config(configfile, 'node_two_image_remove.yaml')
@ -166,7 +181,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
def _test_image_rebuild_age(self, expire=86400):
configfile = self.setup_config('node.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
build = self.waitForBuild('fake-image', '0000000001')
image = self.waitForImage('fake-provider', 'fake-image')
# Expire rebuild-age (default: 1day) to force a new build.
@ -244,7 +259,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
def test_cleanup_hard_upload_fails(self):
configfile = self.setup_config('node.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
upload = self.zk.getUploads('fake-image', '0000000001',
@ -269,7 +284,7 @@ class TestNodePoolBuilder(tests.DBTestCase):
def test_cleanup_failed_image_build(self):
configfile = self.setup_config('node_diskimage_fail.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
# NOTE(pabelanger): We are racing here, but don't really care. We just
# need our first image build to fail.
self.replace_config(configfile, 'node.yaml')
@ -279,5 +294,5 @@ class TestNodePoolBuilder(tests.DBTestCase):
def test_diskimage_build_only(self):
configfile = self.setup_config('node_diskimage_only.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForBuild('fake-image', '0000000001')

View File

@ -27,12 +27,15 @@ from nodepool import zk
class TestNodepoolCMD(tests.DBTestCase):
def setUp(self):
super(TestNodepoolCMD, self).setUp()
def patch_argv(self, *args):
argv = ["nodepool", "-s", self.secure_conf]
argv = ["nodepool"]
argv.extend(args)
self.useFixture(fixtures.MonkeyPatch('sys.argv', argv))
def assert_listed(self, configfile, cmd, col, val, count):
def assert_listed(self, configfile, cmd, col, val, count, col_count=0):
log = logging.getLogger("tests.PrettyTableMock")
self.patch_argv("-c", configfile, *cmd)
with mock.patch('prettytable.PrettyTable.add_row') as m_add_row:
@ -41,13 +44,16 @@ class TestNodepoolCMD(tests.DBTestCase):
# Find add_rows with the status were looking for
for args, kwargs in m_add_row.call_args_list:
row = args[0]
if col_count:
self.assertEquals(len(row), col_count)
log.debug(row)
if row[col] == val:
rows_with_val += 1
self.assertEquals(rows_with_val, count)
def assert_alien_images_listed(self, configfile, image_cnt, image_id):
self.assert_listed(configfile, ['alien-image-list'], 2, image_id, image_cnt)
self.assert_listed(configfile, ['alien-image-list'], 2, image_id,
image_cnt)
def assert_alien_images_empty(self, configfile):
self.assert_alien_images_listed(configfile, 0, 0)
@ -55,8 +61,16 @@ class TestNodepoolCMD(tests.DBTestCase):
def assert_images_listed(self, configfile, image_cnt, status="ready"):
self.assert_listed(configfile, ['image-list'], 6, status, image_cnt)
def assert_nodes_listed(self, configfile, node_cnt, status="ready"):
self.assert_listed(configfile, ['list'], 10, status, node_cnt)
def assert_nodes_listed(self, configfile, node_cnt, status="ready",
detail=False, validate_col_count=False):
cmd = ['list']
col_count = 9
if detail:
cmd += ['--detail']
col_count = 17
if not validate_col_count:
col_count = 0
self.assert_listed(configfile, cmd, 6, status, node_cnt, col_count)
def test_image_list_empty(self):
self.assert_images_listed(self.setup_config("node_cmd.yaml"), 0)
@ -72,7 +86,7 @@ class TestNodepoolCMD(tests.DBTestCase):
def test_image_delete(self):
configfile = self.setup_config("node.yaml")
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
image = self.zk.getMostRecentImageUpload('fake-image', 'fake-provider')
self.patch_argv("-c", configfile, "image-delete",
@ -84,20 +98,9 @@ class TestNodepoolCMD(tests.DBTestCase):
self.waitForUploadRecordDeletion('fake-provider', 'fake-image',
image.build_id, image.id)
def test_alien_list_fail(self):
def fail_list(self):
raise RuntimeError('Fake list error')
self.useFixture(fixtures.MonkeyPatch(
'nodepool.fakeprovider.FakeOpenStackCloud.list_servers',
fail_list))
configfile = self.setup_config("node_cmd.yaml")
self.patch_argv("-c", configfile, "alien-list")
nodepoolcmd.main()
def test_alien_image_list_empty(self):
configfile = self.setup_config("node.yaml")
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
self.patch_argv("-c", configfile, "alien-image-list")
nodepoolcmd.main()
@ -107,7 +110,7 @@ class TestNodepoolCMD(tests.DBTestCase):
def fail_list(self):
raise RuntimeError('Fake list error')
self.useFixture(fixtures.MonkeyPatch(
'nodepool.fakeprovider.FakeOpenStackCloud.list_servers',
'nodepool.driver.fake.provider.FakeOpenStackCloud.list_servers',
fail_list))
configfile = self.setup_config("node_cmd.yaml")
@ -116,12 +119,23 @@ class TestNodepoolCMD(tests.DBTestCase):
def test_list_nodes(self):
configfile = self.setup_config('node.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
self.waitForNodes(pool)
self.assert_nodes_listed(configfile, 1)
self.waitForNodes('fake-label')
self.assert_nodes_listed(configfile, 1, detail=False,
validate_col_count=True)
def test_list_nodes_detail(self):
configfile = self.setup_config('node.yaml')
self.useBuilder(configfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
self.waitForNodes('fake-label')
self.assert_nodes_listed(configfile, 1, detail=True,
validate_col_count=True)
def test_config_validate(self):
config = os.path.join(os.path.dirname(tests.__file__),
@ -131,13 +145,13 @@ class TestNodepoolCMD(tests.DBTestCase):
def test_dib_image_list(self):
configfile = self.setup_config('node.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
self.assert_listed(configfile, ['dib-image-list'], 4, zk.READY, 1)
def test_dib_image_build_pause(self):
configfile = self.setup_config('node_diskimage_pause.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
self.patch_argv("-c", configfile, "image-build", "fake-image")
with testtools.ExpectedException(Exception):
nodepoolcmd.main()
@ -145,19 +159,21 @@ class TestNodepoolCMD(tests.DBTestCase):
def test_dib_image_pause(self):
configfile = self.setup_config('node_diskimage_pause.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.waitForNodes(pool)
nodes = self.waitForNodes('fake-label2')
self.assertEqual(len(nodes), 1)
self.assert_listed(configfile, ['dib-image-list'], 1, 'fake-image', 0)
self.assert_listed(configfile, ['dib-image-list'], 1, 'fake-image2', 1)
def test_dib_image_upload_pause(self):
configfile = self.setup_config('node_image_upload_pause.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.waitForNodes(pool)
nodes = self.waitForNodes('fake-label2')
self.assertEqual(len(nodes), 1)
# Make sure diskimages were built.
self.assert_listed(configfile, ['dib-image-list'], 1, 'fake-image', 1)
self.assert_listed(configfile, ['dib-image-list'], 1, 'fake-image2', 1)
@ -168,10 +184,11 @@ class TestNodepoolCMD(tests.DBTestCase):
def test_dib_image_delete(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self._useBuilder(configfile)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
self.waitForNodes(pool)
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Check the image exists
self.assert_listed(configfile, ['dib-image-list'], 4, zk.READY, 1)
builds = self.zk.getMostRecentBuilds(1, 'fake-image', zk.READY)
@ -187,52 +204,67 @@ class TestNodepoolCMD(tests.DBTestCase):
def test_hold(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self._useBuilder(configfile)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
self.waitForNodes(pool)
nodes = self.waitForNodes('fake-label')
node_id = nodes[0].id
# Assert one node exists and it is node 1 in a ready state.
self.assert_listed(configfile, ['list'], 0, 1, 1)
self.assert_listed(configfile, ['list'], 0, node_id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Hold node 1
self.patch_argv('-c', configfile, 'hold', '1')
# Hold node 0000000000
self.patch_argv(
'-c', configfile, 'hold', node_id, '--reason', 'testing')
nodepoolcmd.main()
# Assert the state changed to HOLD
self.assert_listed(configfile, ['list'], 0, 1, 1)
self.assert_listed(configfile, ['list'], 0, node_id, 1)
self.assert_nodes_listed(configfile, 1, 'hold')
def test_delete(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self._useBuilder(configfile)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
self.waitForNodes(pool)
# Assert one node exists and it is node 1 in a ready state.
self.assert_listed(configfile, ['list'], 0, 1, 1)
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Delete node 1
self.assert_listed(configfile, ['delete', '1'], 10, 'delete', 1)
# Delete node
self.patch_argv('-c', configfile, 'delete', nodes[0].id)
nodepoolcmd.main()
self.waitForNodeDeletion(nodes[0])
# Assert the node is gone
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 0)
def test_delete_now(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self._useBuilder(configfile)
self.useBuilder(configfile)
pool.start()
self.waitForImage( 'fake-provider', 'fake-image')
self.waitForNodes(pool)
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is node 1 in a ready state.
self.assert_listed(configfile, ['list'], 0, 1, 1)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Delete node 1
self.patch_argv('-c', configfile, 'delete', '--now', '1')
# Delete node
self.patch_argv('-c', configfile, 'delete', '--now', nodes[0].id)
nodepoolcmd.main()
self.waitForNodeDeletion(nodes[0])
# Assert the node is gone
self.assert_listed(configfile, ['list'], 0, 1, 0)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 0)
def test_image_build(self):
configfile = self.setup_config('node.yaml')
self._useBuilder(configfile)
self.useBuilder(configfile)
# wait for the scheduled build to arrive
self.waitForImage('fake-provider', 'fake-image')
@ -246,19 +278,25 @@ class TestNodepoolCMD(tests.DBTestCase):
self.waitForImage('fake-provider', 'fake-image', [image])
self.assert_listed(configfile, ['dib-image-list'], 4, zk.READY, 2)
def test_job_create(self):
def test_request_list(self):
configfile = self.setup_config('node.yaml')
self.patch_argv("-c", configfile, "job-create", "fake-job",
"--hold-on-failure", "1")
nodepoolcmd.main()
self.assert_listed(configfile, ['job-list'], 2, 1, 1)
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
def test_job_delete(self):
configfile = self.setup_config('node.yaml')
self.patch_argv("-c", configfile, "job-create", "fake-job",
"--hold-on-failure", "1")
nodepoolcmd.main()
self.assert_listed(configfile, ['job-list'], 2, 1, 1)
self.patch_argv("-c", configfile, "job-delete", "1")
nodepoolcmd.main()
self.assert_listed(configfile, ['job-list'], 0, 1, 0)
req = zk.NodeRequest()
req.state = zk.PENDING # so it will be ignored
req.node_types = ['fake-label']
req.requestor = 'test_request_list'
self.zk.storeNodeRequest(req)
self.assert_listed(configfile, ['request-list'], 0, req.id, 1)
def test_without_argument(self):
configfile = self.setup_config("node_cmd.yaml")
self.patch_argv("-c", configfile)
result = nodepoolcmd.main()
self.assertEqual(1, result)

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More