stackhpc
diff --git a/‎doc/source/admin/pci-passthrough.rst
Lines changed: 15 additions & 0 deletions b/‎doc/source/admin/pci-passthrough.rst
Lines changed: 15 additions & 0 deletions
diff --git a/‎nova/compute/pci_placement_translator.py
Lines changed: 38 additions & 23 deletions b/‎nova/compute/pci_placement_translator.py
Lines changed: 38 additions & 23 deletions
diff --git a/‎nova/conf/pci.py
Lines changed: 26 additions & 0 deletions b/‎nova/conf/pci.py
Lines changed: 26 additions & 0 deletions
diff --git a/‎nova/objects/request_spec.py
Lines changed: 52 additions & 13 deletions b/‎nova/objects/request_spec.py
Lines changed: 52 additions & 13 deletions
diff --git a/‎nova/pci/request.py
Lines changed: 7 additions & 1 deletion b/‎nova/pci/request.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎nova/pci/stats.py
Lines changed: 15 additions & 2 deletions b/‎nova/pci/stats.py
Lines changed: 15 additions & 2 deletions
@@ -442,6 +442,21 @@ removed and VFs from the same PF is configured (or vice versa) then
 nova-compute will refuse to start as it would create a situation where both
 the PF and its VFs are made available for consumption.
 
+If a flavor requests multiple ``type-VF`` devices via
+:nova:extra-spec:`pci_passthrough:alias` then it is important to consider the
+value of :nova:extra-spec:`group_policy` as well. The value ``none``
+allows nova to select VFs from the same parent PF to fulfill the request. The
+value ``isolate`` restricts nova to select each VF from a different parent PF
+to fulfill the request. If :nova:extra-spec:`group_policy` is not provided in
+such flavor then it will defaulted to ``none``.
+
+Symmetrically with the ``resource_class`` and ``traits`` fields of
+:oslo.config:option:`pci.device_spec` the :oslo.config:option:`pci.alias`
+configuration option supports requesting devices by Placement resource class
+name via the ``resource_class`` field and also support requesting traits to
+be present on the selected devices via the ``traits`` field in the alias. If
+the ``resource_class`` field is not specified in the alias then it is defaulted
+by nova to ``CUSTOM_PCI_<vendor_id>_<product_id>``.
 
 For deeper technical details please read the `nova specification. <https://specs.openstack.org/openstack/nova-specs/specs/zed/approved/pci-device-tracking-in-placement.html>`_
 
 
@@ -65,20 +65,50 @@ def _normalize_traits(traits: ty.List[str]) -> ty.List[str]:
     return list(standard_traits) + custom_traits
 
 
-def _get_traits_for_dev(
-    dev_spec_tags: ty.Dict[str, str],
-) -> ty.Set[str]:
+def get_traits(traits_str: str) -> ty.Set[str]:
+    """Return a normalized set of placement standard and custom traits from
+    a string of comma separated trait names.
+    """
     # traits is a comma separated list of placement trait names
-    traits_str = dev_spec_tags.get("traits")
     if not traits_str:
-        return {os_traits.COMPUTE_MANAGED_PCI_DEVICE}
+        return set()
+    return set(_normalize_traits(traits_str.split(',')))
 
-    traits = traits_str.split(',')
-    return set(_normalize_traits(traits)) | {
+
+def _get_traits_for_dev(
+    dev_spec_tags: ty.Dict[str, str],
+) -> ty.Set[str]:
+    return get_traits(dev_spec_tags.get("traits", "")) | {
         os_traits.COMPUTE_MANAGED_PCI_DEVICE
     }
 
 
+def _normalize_resource_class(rc: str) -> str:
+    rc = rc.upper()
+    if (
+            rc not in os_resource_classes.STANDARDS and
+            not os_resource_classes.is_custom(rc)
+    ):
+        rc = os_resource_classes.normalize_name(rc)
+        # mypy: normalize_name will return non None for non None input
+        assert rc
+
+    return rc
+
+
+def get_resource_class(
+    requested_name: ty.Optional[str], vendor_id: str, product_id: str
+) -> str:
+    """Return the normalized resource class name based on what is requested
+    or if nothing is requested then generated from the vendor_id and product_id
+    """
+    if requested_name:
+        rc = _normalize_resource_class(requested_name)
+    else:
+        rc = f"CUSTOM_PCI_{vendor_id}_{product_id}".upper()
+    return rc
+
+
 def _get_rc_for_dev(
     dev: pci_device.PciDevice,
     dev_spec_tags: ty.Dict[str, str],
@@ -91,23 +121,8 @@ def _get_rc_for_dev(
     The user specified resource class is normalized if it is not already an
     acceptable standard or custom resource class.
     """
-    # Either use the resource class from the config or the vendor_id and
-    # product_id of the device to generate the RC
     rc = dev_spec_tags.get("resource_class")
-    if rc:
-        rc = rc.upper()
-        if (
-            rc not in os_resource_classes.STANDARDS and
-            not os_resource_classes.is_custom(rc)
-        ):
-            rc = os_resource_classes.normalize_name(rc)
-            # mypy: normalize_name will return non None for non None input
-            assert rc
-
-    else:
-        rc = f"CUSTOM_PCI_{dev.vendor_id}_{dev.product_id}".upper()
-
-    return rc
+    return get_resource_class(rc, dev.vendor_id, dev.product_id)
 
 
 class PciResourceProvider:
 
@@ -67,6 +67,32 @@
     Required NUMA affinity of device. Valid values are: ``legacy``,
     ``preferred`` and ``required``.
 
+  ``resource_class``
+    The optional Placement resource class name that is used
+    to track the requested PCI devices in Placement. It can be a standard
+    resource class from the ``os-resource-classes`` lib. Or can be any string.
+    In that case Nova will normalize it to a proper Placement resource class by
+    making it upper case, replacing any consecutive character outside of
+    ``[A-Z0-9_]`` with a single '_', and prefixing the name with ``CUSTOM_`` if
+    not yet prefixed. The maximum allowed length is 255 character including the
+    prefix. If ``resource_class`` is not provided Nova will generate it from
+    ``vendor_id`` and ``product_id`` values of the alias in the form of
+    ``CUSTOM_PCI_{vendor_id}_{product_id}``. The ``resource_class`` requested
+    in the alias is matched against the ``resource_class`` defined in the
+    ``[pci]device_spec``.
+
+  ``traits``
+    An optional comma separated list of Placement trait names requested to be
+    present on the resource provider that fulfills this alias. Each trait can
+    be a standard trait from ``os-traits`` lib or can be any string. If it is
+    not a standard trait then Nova will normalize the trait name by making it
+    upper case, replacing any consecutive character outside of  ``[A-Z0-9_]``
+    with a single '_', and  prefixing the name with ``CUSTOM_`` if not yet
+    prefixed. The maximum allowed length of a trait name is 255 character
+    including the prefix. Every trait in ``traits`` requested in the alias
+    ensured to be in the list of traits provided in the ``traits`` field of
+    the ``[pci]device_spec`` when scheduling the request.
+
 * Supports multiple aliases by repeating the option (not by specifying
   a list value)::
 
 
@@ -14,12 +14,14 @@
 
 import copy
 import itertools
+import typing as ty
 
 import os_resource_classes as orc
 from oslo_log import log as logging
 from oslo_serialization import jsonutils
 from oslo_utils import versionutils
 
+from nova.compute import pci_placement_translator
 from nova.db.api import api as api_db_api
 from nova.db.api import models as api_models
 from nova import exception
@@ -474,14 +476,16 @@ def to_legacy_filter_properties_dict(self):
         return filt_props
 
     @staticmethod
-    def _rc_from_request(pci_request: 'objects.InstancePCIRequest') -> str:
-        # FIXME(gibi): refactor this and the copy of the logic from the
-        #  translator to a common function
-        # FIXME(gibi): handle directly requested resource_class
-        # ??? can there be more than one spec???
-        spec = pci_request.spec[0]
-        rc = f"CUSTOM_PCI_{spec['vendor_id']}_{spec['product_id']}".upper()
-        return rc
+    def _rc_from_request(spec: ty.Dict[str, ty.Any]) -> str:
+        return pci_placement_translator.get_resource_class(
+            spec.get("resource_class"),
+            spec.get("vendor_id"),
+            spec.get("product_id"),
+        )
+
+    @staticmethod
+    def _traits_from_request(spec: ty.Dict[str, ty.Any]) -> ty.Set[str]:
+        return pci_placement_translator.get_traits(spec.get("traits", ""))
 
     # This is here temporarily until the PCI placement scheduling is under
     # implementation. When that is done there will be a config option
@@ -501,6 +505,34 @@ def _generate_request_groups_from_pci_requests(self):
                 # cycle.
                 continue
 
+            if len(pci_request.spec) != 1:
+                # We are instantiating InstancePCIRequest objects with spec in
+                # two cases:
+                # 1) when a neutron port is translated to InstancePCIRequest
+                #    object in
+                #    nova.network.neutron.API.create_resource_requests
+                # 2) when the pci_passthrough:alias flavor extra_spec is
+                #    translated to InstancePCIRequest objects in
+                #    nova.pci.request._get_alias_from_config which enforces the
+                #    json schema defined in nova.pci.request.
+                #
+                # In both cases only a single dict is added to the spec list.
+                # If we ever want to add support for multiple specs per request
+                # then we have to solve the issue that each spec can request a
+                # different resource class from placement. The only place in
+                # nova that currently handles multiple specs per request is
+                # nova.pci.utils.pci_device_prop_match() and it considers them
+                # as alternatives. So specs with different resource classes
+                # would mean alternative resource_class requests. This cannot
+                # be expressed today in the allocation_candidate query towards
+                # placement.
+                raise ValueError(
+                    "PCI tracking in placement does not support multiple "
+                    "specs per PCI request"
+                )
+
+            spec = pci_request.spec[0]
+
             # The goal is to translate InstancePCIRequest to RequestGroup. Each
             # InstancePCIRequest can be fulfilled from the whole RP tree. And
             # a flavor based InstancePCIRequest might request more than one
@@ -533,9 +565,13 @@ def _generate_request_groups_from_pci_requests(self):
             # per requested device. So for InstancePCIRequest(count=2) we need
             # to generate two separate RequestGroup(RC:1) objects.
 
-            # FIXME(gibi): make sure that if we have count=2 requests then
-            #  group_policy=none is in the request as group_policy=isolate
-            #  would prevent allocating two VFs from the same PF.
+            # NOTE(gibi): If we have count=2 requests then the multiple
+            # RequestGroup split below only works if group_policy is set to
+            # none as group_policy=isolate would prevent allocating two VFs
+            # from the same PF. Fortunately
+            # nova.scheduler.utils.resources_from_request_spec() already
+            # defaults group_policy to none if it is not specified in the
+            # flavor and there are multiple RequestGroups in the RequestSpec.
 
             for i in range(pci_request.count):
                 rg = objects.RequestGroup(
@@ -546,8 +582,11 @@ def _generate_request_groups_from_pci_requests(self):
                     # as we split count >= 2 requests to independent groups
                     # each group will have a resource request of one
                     resources={
-                        self._rc_from_request(pci_request): 1}
-                    # FIXME(gibi): handle traits requested from alias
+                        self._rc_from_request(spec): 1
+                    },
+                    required_traits=self._traits_from_request(spec),
+                    # TODO(gibi): later we can add support for complex trait
+                    # queries here including forbidden_traits.
                 )
                 self.requested_resources.append(rg)
 
 
@@ -106,6 +106,12 @@
             "type": "string",
             "enum": list(obj_fields.PCINUMAAffinityPolicy.ALL),
         },
+        "resource_class": {
+            "type": "string",
+        },
+        "traits": {
+            "type": "string",
+        },
     },
     "required": ["name"],
 }
@@ -114,7 +120,7 @@
 def _get_alias_from_config() -> Alias:
     """Parse and validate PCI aliases from the nova config.
 
-    :returns: A dictionary where the keys are device names and the values are
+    :returns: A dictionary where the keys are alias names and the values are
         tuples of form ``(numa_policy, specs)``. ``numa_policy`` describes the
         required NUMA affinity of the device(s), while ``specs`` is a list of
         PCI device specs.
 
@@ -64,6 +64,11 @@ class PciDeviceStats(object):
     """
 
     pool_keys = ['product_id', 'vendor_id', 'numa_node', 'dev_type']
+    # these can be specified in the [pci]device_spec and can be requested via
+    # the PCI alias, but they are matched by the placement
+    # allocation_candidates query, so we can ignore them during pool creation
+    # and during filtering here
+    ignored_tags = ['resource_class', 'traits']
 
     def __init__(
         self,
@@ -135,7 +140,9 @@ def _create_pool_keys_from_dev(
         tags = devspec.get_tags()
         pool = {k: getattr(dev, k) for k in self.pool_keys}
         if tags:
-            pool.update(tags)
+            pool.update(
+                {k: v for k, v in tags.items() if k not in self.ignored_tags}
+            )
         # NOTE(gibi): parent_ifname acts like a tag during pci claim but
         # not provided as part of the whitelist spec as it is auto detected
         # by the virt driver.
@@ -313,7 +320,13 @@ def _filter_pools_for_spec(
         :returns: A list of pools that can be used to support the request if
             this is possible.
         """
-        request_specs = request.spec
+
+        def ignore_keys(spec):
+            return {
+                k: v for k, v in spec.items() if k not in self.ignored_tags
+            }
+
+        request_specs = [ignore_keys(spec) for spec in request.spec]
         return [
             pool for pool in pools
             if utils.pci_device_prop_match(pool, request_specs)