Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion doc/source/admin/pci-passthrough.rst
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,12 @@ be added to the resource provider representing the matching PCI devices.
(Zed) the nova-compute service will refuse to start with such configuration.
It is suggested to use the PCI address of the device instead.

.. important::
While nova supported configuring :oslo.config:option:`pci.alias` where an
alias name is repeated and therefore associated to multiple alias
specifications, such configuration is not supported when PCI tracking in
Placement is enabled.

The nova-compute service makes sure that existing instances with PCI
allocations in the nova DB will have a corresponding PCI allocation in
placement. This allocation healing also acts on any new instances regardless of
Expand Down Expand Up @@ -494,7 +500,9 @@ configuration option supports requesting devices by Placement resource class
name via the ``resource_class`` field and also support requesting traits to
be present on the selected devices via the ``traits`` field in the alias. If
the ``resource_class`` field is not specified in the alias then it is defaulted
by nova to ``CUSTOM_PCI_<vendor_id>_<product_id>``.
by nova to ``CUSTOM_PCI_<vendor_id>_<product_id>``. Either the ``product_id``
and ``vendor_id`` or the ``resource_class`` field must be provided in each
alias.

For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_

Expand Down
6 changes: 6 additions & 0 deletions nova/api/openstack/wsgi_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from nova import context
from nova import exception
from nova import objects
from nova.pci import request
from nova import service
from nova import utils
from nova import version
Expand All @@ -51,6 +52,11 @@ def _get_config_files(env=None):


def _setup_service(host, name):

# NOTE(gibi): validate the [pci]alias config early to avoid late failures
# at instance creation due to config errors.
request.get_alias_from_config()

try:
utils.raise_if_old_compute()
except exception.TooOldComputeService as e:
Expand Down
4 changes: 4 additions & 0 deletions nova/compute/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1615,6 +1615,10 @@ def init_host(self, service_ref):
# if the configuration is wrong.
whitelist.Whitelist(CONF.pci.device_spec)

# NOTE(gibi): validate the [pci]alias config early to avoid late
# failures at instance lifecycle operations due to config errors.
pci_req_module.get_alias_from_config()

nova.conf.neutron.register_dynamic_opts(CONF)
# Even if only libvirt uses them, make it available for all drivers
nova.conf.devices.register_dynamic_opts(CONF)
Expand Down
5 changes: 4 additions & 1 deletion nova/conf/pci.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@
alias = {
"name": "A16_16A",
"device_type": "type-VF",
resource_class: "CUSTOM_A16_16A",
"resource_class": "GPU_VF",
"traits": "blue, big"
}

Valid key values are :
Expand Down Expand Up @@ -108,6 +109,8 @@
in the alias is matched against the ``resource_class`` defined in the
``[pci]device_spec``. This field can only be used only if
``[filter_scheduler]pci_in_placement`` is enabled.
Either the product_id and vendor_id or the resource_class field must be
provided in each alias.

``traits``
An optional comma separated list of Placement trait names requested to be
Expand Down
60 changes: 57 additions & 3 deletions nova/pci/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
These two aliases define a device request meaning: vendor_id is "8086" and
product_id is "0442" or "0443".
"""

import functools
import typing as ty

import jsonschema
Expand Down Expand Up @@ -121,7 +121,60 @@
}


def _get_alias_from_config() -> Alias:
def _validate_multispec(aliases):
if CONF.filter_scheduler.pci_in_placement:
alias_with_multiple_specs = [
name for name, spec in aliases.items() if len(spec[1]) > 1]
if alias_with_multiple_specs:
raise exception.PciInvalidAlias(
"The PCI alias(es) %s have multiple specs but "
"[filter_scheduler]pci_in_placement is True. The PCI in "
"Placement feature only supports one spec per alias. You can "
"assign the same resource_class to multiple [pci]device_spec "
"matchers to allow using different devices for the same alias."
% ",".join(alias_with_multiple_specs))


def _validate_required_ids(aliases):
if CONF.filter_scheduler.pci_in_placement:
alias_without_ids_or_rc = set()
for name, alias in aliases.items():
for spec in alias[1]:
ids = "vendor_id" in spec and "product_id" in spec
rc = "resource_class" in spec
if not ids and not rc:
alias_without_ids_or_rc.add(name)

if alias_without_ids_or_rc:
raise exception.PciInvalidAlias(
"The PCI alias(es) %s does not have vendor_id and product_id "
"fields set or resource_class field set."
% ",".join(sorted(alias_without_ids_or_rc)))
else:
alias_without_ids = set()
for name, alias in aliases.items():
for spec in alias[1]:
ids = "vendor_id" in spec and "product_id" in spec
if not ids:
alias_without_ids.add(name)

if alias_without_ids:
raise exception.PciInvalidAlias(
"The PCI alias(es) %s does not have vendor_id and product_id "
"fields set."
% ",".join(sorted(alias_without_ids)))


def _validate_aliases(aliases):
"""Checks the parsed aliases for common mistakes and raise easy to parse
error messages
"""
_validate_multispec(aliases)
_validate_required_ids(aliases)


@functools.cache
def get_alias_from_config() -> Alias:
"""Parse and validate PCI aliases from the nova config.

:returns: A dictionary where the keys are alias names and the values are
Expand Down Expand Up @@ -177,14 +230,15 @@ def _get_alias_from_config() -> Alias:
except Exception as exc:
raise exception.PciInvalidAlias(reason=str(exc))

_validate_aliases(aliases)
return aliases


def _translate_alias_to_requests(
alias_spec: str, affinity_policy: ty.Optional[str] = None,
) -> ty.List['objects.InstancePCIRequest']:
"""Generate complete pci requests from pci aliases in extra_spec."""
pci_aliases = _get_alias_from_config()
pci_aliases = get_alias_from_config()

pci_requests: ty.List[objects.InstancePCIRequest] = []
for name, count in [spec.split(':') for spec in alias_spec.split(',')]:
Expand Down
9 changes: 9 additions & 0 deletions nova/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from nova import exception
from nova import objects
from nova.objects import base as objects_base
from nova.pci import request
from nova import quota
from nova.scheduler.client import report
from nova.scheduler import utils as scheduler_utils
Expand Down Expand Up @@ -189,6 +190,10 @@ def setUp(self):
self.useFixture(
nova_fixtures.PropagateTestCaseIdToChildEventlets(self.id()))

# Ensure that the pci alias is reset between test cases running in
# the same process
request.get_alias_from_config.cache_clear()

# How many of which service we've started. {$service-name: $count}
self._service_fixture_count = collections.defaultdict(int)

Expand Down Expand Up @@ -425,6 +430,10 @@ def flags(self, **kw):
group = kw.pop('group', None)
for k, v in kw.items():
CONF.set_override(k, v, group)
# loading and validating alias is cached so if it is reconfigured
# we need to reset the cache
if k == 'alias' and group == 'pci':
request.get_alias_from_config.cache_clear()

def reset_flags(self, *k, **kw):
"""Reset flag variables for a test."""
Expand Down
59 changes: 59 additions & 0 deletions nova/tests/functional/regressions/test_bug_2102038.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from nova.tests.fixtures import libvirt as fakelibvirt
from nova.tests.functional.api import client
from nova.tests.functional.libvirt import test_pci_in_placement as base


class MultipleSpecPerAliasWithPCIInPlacementTest(
base.PlacementPCIReportingTests
):

def test_alias_with_multiple_specs_not_supported(self):
self.flags(group='filter_scheduler', pci_in_placement=True)

pci_alias = [
{
"device_type": "type-VF",
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": "f000",
"name": "a-vf",
},
{
"device_type": "type-VF",
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": "f001",
"name": "a-vf",
}
]
self.flags(
group="pci",
alias=self._to_list_of_json_str(pci_alias),
)
extra_spec = {"pci_passthrough:alias": "a-vf:1"}
flavor_id = self._create_flavor(extra_spec=extra_spec)

exc = self.assertRaises(
client.OpenStackApiException,
self._create_server,
flavor_id=flavor_id,
networks=[],
)
self.assertEqual(400, exc.response.status_code)
self.assertIn(
"The PCI alias(es) a-vf have multiple specs but "
"[filter_scheduler]pci_in_placement is True. The PCI in Placement "
"feature only supports one spec per alias. You can assign the "
"same resource_class to multiple [pci]device_spec matchers to "
"allow using different devices for the same alias.",
exc.response.text)
48 changes: 48 additions & 0 deletions nova/tests/functional/regressions/test_bug_2111440.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from nova.tests.functional.api import client
from nova.tests.functional.libvirt import test_pci_in_placement as base


class MissingRCAndIdAliasWithPCIInPlacementTest(
base.PlacementPCIReportingTests
):

def test_alias_without_rc_or_vendor_product_id_is_not_supported(self):
self.flags(group='filter_scheduler', pci_in_placement=True)

pci_alias = [
{
"device_type": "type-VF",
"name": "a-vf",
"traits": "foo"
},
]
self.flags(
group="pci",
alias=self._to_list_of_json_str(pci_alias),
)
extra_spec = {"pci_passthrough:alias": "a-vf:1"}
flavor_id = self._create_flavor(extra_spec=extra_spec)

exc = self.assertRaises(
client.OpenStackApiException,
self._create_server,
flavor_id=flavor_id,
networks=[],
)
self.assertEqual(400, exc.response.status_code)
self.assertIn(
"The PCI alias(es) a-vf does not have vendor_id and product_id "
"fields set or resource_class field set.",
exc.response.text)
9 changes: 9 additions & 0 deletions nova/tests/unit/api/openstack/test_wsgi_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import fixtures
from oslo_config import fixture as config_fixture
from oslo_serialization import jsonutils
from oslotest import base

from nova.api.openstack import wsgi_app
Expand Down Expand Up @@ -127,6 +128,14 @@ def test_setup_service_version_workaround(self, mock_check_old, mock_get):
group='workarounds')
wsgi_app._setup_service('myhost', 'api')

def test_setup_service_pci_alias_validation(self):
wsgi_app.CONF.set_override(
'alias', jsonutils.dumps({'name': 'foo'}),
group='pci')
self.assertRaises(
exception.PciInvalidAlias,
wsgi_app._setup_service, 'myhost', 'api')

def test__get_config_files_empty_env(self):
env = {}
result = wsgi_app._get_config_files(env)
Expand Down
12 changes: 12 additions & 0 deletions nova/tests/unit/compute/test_compute_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7025,6 +7025,18 @@ def test_init_host_pci_device_spec_validation_failure(self):
self.assertRaises(exception.PciDeviceInvalidDeviceName,
self.compute.init_host, None)

def test_init_host_pci_alias_validation_failure(self):
# Tests that we fail init_host if the pci.alias is
# configured incorrectly.
self.flags(
alias=[
jsonutils.dumps({'name': 'foo'})
],
group='pci'
)
self.assertRaises(
exception.PciInvalidAlias, self.compute.init_host, None)

@mock.patch('nova.compute.manager.ComputeManager._instance_update')
def test_error_out_instance_on_exception_not_implemented_err(self,
inst_update_mock):
Expand Down
Loading