QEMU源码全解析 —— PCI设备模拟(2)

发布时间:2024年01月11日

接前一篇文章:

2. PCI设备的模拟

QEMU模拟的设备很多都是PCI设备,本节介绍PCI设备的模拟。与所有设备类似,PCI设备的父设备也是TYPE_DEVICE,其定义在QEMU源码根目录/hw/pci/pci.c中,代码如下:

static const TypeInfo pci_device_type_info = {
    .name = TYPE_PCI_DEVICE,
    .parent = TYPE_DEVICE,
    .instance_size = sizeof(PCIDevice),
    .abstract = true,
    .class_size = sizeof(PCIDeviceClass),
    .class_init = pci_device_class_init,
    .class_base_init = pci_device_class_base_init,
};

static void pci_register_types(void)
{
    type_register_static(&pci_bus_info);
    type_register_static(&pcie_bus_info);
    type_register_static(&cxl_bus_info);
    type_register_static(&conventional_pci_interface_info);
    type_register_static(&cxl_interface_info);
    type_register_static(&pcie_interface_info);
    type_register_static(&pci_device_type_info);
}

type_init(pci_register_types)

其中,TypeInfo的定义在include/qom/object.h中,如下:

typedef struct TypeInfo TypeInfo;

而struct TypeInfo的定义在include/qomobject.h中,代码如下:

/**
 * struct TypeInfo:
 * @name: The name of the type.
 * @parent: The name of the parent type.
 * @instance_size: The size of the object (derivative of #Object).  If
 *   @instance_size is 0, then the size of the object will be the size of the
 *   parent object.
 * @instance_align: The required alignment of the object.  If @instance_align
 *   is 0, then normal malloc alignment is sufficient; if non-zero, then we
 *   must use qemu_memalign for allocation.
 * @instance_init: This function is called to initialize an object.  The parent
 *   class will have already been initialized so the type is only responsible
 *   for initializing its own members.
 * @instance_post_init: This function is called to finish initialization of
 *   an object, after all @instance_init functions were called.
 * @instance_finalize: This function is called during object destruction.  This
 *   is called before the parent @instance_finalize function has been called.
 *   An object should only free the members that are unique to its type in this
 *   function.
 * @abstract: If this field is true, then the class is considered abstract and
 *   cannot be directly instantiated.
 * @class_size: The size of the class object (derivative of #ObjectClass)
 *   for this object.  If @class_size is 0, then the size of the class will be
 *   assumed to be the size of the parent class.  This allows a type to avoid
 *   implementing an explicit class type if they are not adding additional
 *   virtual functions.
 * @class_init: This function is called after all parent class initialization
 *   has occurred to allow a class to set its default virtual method pointers.
 *   This is also the function to use to override virtual methods from a parent
 *   class.
 * @class_base_init: This function is called for all base classes after all
 *   parent class initialization has occurred, but before the class itself
 *   is initialized.  This is the function to use to undo the effects of
 *   memcpy from the parent class to the descendants.
 * @class_data: Data to pass to the @class_init,
 *   @class_base_init. This can be useful when building dynamic
 *   classes.
 * @interfaces: The list of interfaces associated with this type.  This
 *   should point to a static array that's terminated with a zero filled
 *   element.
 */
struct TypeInfo
{
    const char *name;
    const char *parent;

    size_t instance_size;
    size_t instance_align;
    void (*instance_init)(Object *obj);
    void (*instance_post_init)(Object *obj);
    void (*instance_finalize)(Object *obj);

    bool abstract;
    size_t class_size;

    void (*class_init)(ObjectClass *klass, void *data);
    void (*class_base_init)(ObjectClass *klass, void *data);
    void *class_data;

    InterfaceInfo *interfaces;
};

这里,对于TypeInfo即struct TypeInfo的对象pci_device_type_info来说,其class_init(函数指针)成员指向了pci_device_class_init函数。该函数也在hw/pci/pci.c中,代码如下:

static void pci_device_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *k = DEVICE_CLASS(klass);

    k->realize = pci_qdev_realize;
    k->unrealize = pci_qdev_unrealize;
    k->bus_type = TYPE_PCI_BUS;
    device_class_set_props(k, pci_props);
}

PCI类初始化函数中设置了PCIDeviceClass基类对象DeviceClass的realize和unrealize函数;bus_type表示设备挂接到的总线;props表示PCI设备有哪些属性,这些属性都可以在命令行指定。同样的,不存在单独的PCI设备,PCI设备也是一个抽象类

PCI设备的具现化函数为pci_qdev_realize。该函数同样在hw/pci/pci.c中,代码如下:

static void pci_qdev_realize(DeviceState *qdev, Error **errp)
{
    PCIDevice *pci_dev = (PCIDevice *)qdev;
    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
    ObjectClass *klass = OBJECT_CLASS(pc);
    Error *local_err = NULL;
    bool is_default_rom;
    uint16_t class_id;

    /*
     * capped by systemd (see: udev-builtin-net_id.c)
     * as it's the only known user honor it to avoid users
     * misconfigure QEMU and then wonder why acpi-index doesn't work
     */
    if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) {
        error_setg(errp, "acpi-index should be less or equal to %u",
                   ONBOARD_INDEX_MAX);
        return;
    }

    /*
     * make sure that acpi-index is unique across all present PCI devices
     */
    if (pci_dev->acpi_index) {
        GSequence *used_indexes = pci_acpi_index_list();

        if (g_sequence_lookup(used_indexes,
                              GINT_TO_POINTER(pci_dev->acpi_index),
                              g_cmp_uint32, NULL)) {
            error_setg(errp, "a PCI device with acpi-index = %" PRIu32
                       " already exist", pci_dev->acpi_index);
            return;
        }
        g_sequence_insert_sorted(used_indexes,
                                 GINT_TO_POINTER(pci_dev->acpi_index),
                                 g_cmp_uint32, NULL);
    }

    if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
        error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
        return;
    }

    /* initialize cap_present for pci_is_express() and pci_config_size(),
     * Note that hybrid PCIs are not set automatically and need to manage
     * QEMU_PCI_CAP_EXPRESS manually */
    if (object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE) &&
       !object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE)) {
        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
    }

    if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) {
        pci_dev->cap_present |= QEMU_PCIE_CAP_CXL;
    }

    pci_dev = do_pci_register_device(pci_dev,
                                     object_get_typename(OBJECT(qdev)),
                                     pci_dev->devfn, errp);
    if (pci_dev == NULL)
        return;

    if (pc->realize) {
        pc->realize(pci_dev, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            do_pci_unregister_device(pci_dev);
            return;
        }
    }

    /*
     * A PCIe Downstream Port that do not have ARI Forwarding enabled must
     * associate only Device 0 with the device attached to the bus
     * representing the Link from the Port (PCIe base spec rev 4.0 ver 0.3,
     * sec 7.3.1).
     * With ARI, PCI_SLOT() can return non-zero value as the traditional
     * 5-bit Device Number and 3-bit Function Number fields in its associated
     * Routing IDs, Requester IDs and Completer IDs are interpreted as a
     * single 8-bit Function Number. Hence, ignore ARI capable devices.
     */
    if (pci_is_express(pci_dev) &&
        !pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI) &&
        pcie_has_upstream_port(pci_dev) &&
        PCI_SLOT(pci_dev->devfn)) {
        warn_report("PCI: slot %d is not valid for %s,"
                    " parent device only allows plugging into slot 0.",
                    PCI_SLOT(pci_dev->devfn), pci_dev->name);
    }

    if (pci_dev->failover_pair_id) {
        if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
            error_setg(errp, "failover primary device must be on "
                             "PCIExpress bus");
            pci_qdev_unrealize(DEVICE(pci_dev));
            return;
        }
        class_id = pci_get_word(pci_dev->config + PCI_CLASS_DEVICE);
        if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
            error_setg(errp, "failover primary device is not an "
                             "Ethernet device");
            pci_qdev_unrealize(DEVICE(pci_dev));
            return;
        }
        if ((pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
            || (PCI_FUNC(pci_dev->devfn) != 0)) {
            error_setg(errp, "failover: primary device must be in its own "
                              "PCI slot");
            pci_qdev_unrealize(DEVICE(pci_dev));
            return;
        }
        qdev->allow_unplug_during_migration = true;
    }

    /* rom loading */
    is_default_rom = false;
    if (pci_dev->romfile == NULL && pc->romfile != NULL) {
        pci_dev->romfile = g_strdup(pc->romfile);
        is_default_rom = true;
    }

    pci_add_option_rom(pci_dev, is_default_rom, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        pci_qdev_unrealize(DEVICE(pci_dev));
        return;
    }

    pci_set_power(pci_dev, true);

    pci_dev->msi_trigger = pci_msi_trigger;
}

pci_qdev_realize函数主要包括三个方面的工作:

(1)首先调用do_pci_register_device函数进行注册。

代码片段如下:

    pci_dev = do_pci_register_device(pci_dev,
                                     object_get_typename(OBJECT(qdev)),
                                     pci_dev->devfn, errp);
    if (pci_dev == NULL)
        return;

do_pci_register_device函数同样在hw/pci/pci.c中,代码如下:

/* -1 for devfn means auto assign */
static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
                                         const char *name, int devfn,
                                         Error **errp)
{
    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
    PCIConfigReadFunc *config_read = pc->config_read;
    PCIConfigWriteFunc *config_write = pc->config_write;
    Error *local_err = NULL;
    DeviceState *dev = DEVICE(pci_dev);
    PCIBus *bus = pci_get_bus(pci_dev);
    bool is_bridge = IS_PCI_BRIDGE(pci_dev);

    /* Only pci bridges can be attached to extra PCI root buses */
    if (pci_bus_is_root(bus) && bus->parent_dev && !is_bridge) {
        error_setg(errp,
                   "PCI: Only PCI/PCIe bridges can be plugged into %s",
                    bus->parent_dev->name);
        return NULL;
    }

    if (devfn < 0) {
        for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
            devfn += PCI_FUNC_MAX) {
            if (pci_bus_devfn_available(bus, devfn) &&
                   !pci_bus_devfn_reserved(bus, devfn)) {
                goto found;
            }
        }
        error_setg(errp, "PCI: no slot/function available for %s, all in use "
                   "or reserved", name);
        return NULL;
    found: ;
    } else if (pci_bus_devfn_reserved(bus, devfn)) {
        error_setg(errp, "PCI: slot %d function %d not available for %s,"
                   " reserved",
                   PCI_SLOT(devfn), PCI_FUNC(devfn), name);
        return NULL;
    } else if (!pci_bus_devfn_available(bus, devfn)) {
        error_setg(errp, "PCI: slot %d function %d not available for %s,"
                   " in use by %s,id=%s",
                   PCI_SLOT(devfn), PCI_FUNC(devfn), name,
                   bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
        return NULL;
    } /*
       * Populating function 0 triggers a scan from the guest that
       * exposes other non-zero functions. Hence we need to ensure that
       * function 0 wasn't added yet.
       */
    else if (dev->hotplugged &&
             !pci_is_vf(pci_dev) &&
             pci_get_function_0(pci_dev)) {
        error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
                   " new func %s cannot be exposed to guest.",
                   PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
                   pci_get_function_0(pci_dev)->name,
                   name);

       return NULL;
    }

    pci_dev->devfn = devfn;
    pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
    pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);

    memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev),
                       "bus master container", UINT64_MAX);
    address_space_init(&pci_dev->bus_master_as,
                       &pci_dev->bus_master_container_region, pci_dev->name);

    if (phase_check(PHASE_MACHINE_READY)) {
        pci_init_bus_master(pci_dev);
    }
    pci_dev->irq_state = 0;
    pci_config_alloc(pci_dev);

    pci_config_set_vendor_id(pci_dev->config, pc->vendor_id);
    pci_config_set_device_id(pci_dev->config, pc->device_id);
    pci_config_set_revision(pci_dev->config, pc->revision);
    pci_config_set_class(pci_dev->config, pc->class_id);

    if (!is_bridge) {
        if (pc->subsystem_vendor_id || pc->subsystem_id) {
            pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
                         pc->subsystem_vendor_id);
            pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
                         pc->subsystem_id);
        } else {
            pci_set_default_subsystem_id(pci_dev);
        }
    } else {
        /* subsystem_vendor_id/subsystem_id are only for header type 0 */
        assert(!pc->subsystem_vendor_id);
        assert(!pc->subsystem_id);
    }
    pci_init_cmask(pci_dev);
    pci_init_wmask(pci_dev);
    pci_init_w1cmask(pci_dev);
    if (is_bridge) {
        pci_init_mask_bridge(pci_dev);
    }
    pci_init_multifunction(bus, pci_dev, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        do_pci_unregister_device(pci_dev);
        return NULL;
    }

    if (!config_read)
        config_read = pci_default_read_config;
    if (!config_write)
        config_write = pci_default_write_config;
    pci_dev->config_read = config_read;
    pci_dev->config_write = config_write;
    bus->devices[devfn] = pci_dev;
    pci_dev->version_id = 2; /* Current pci device vmstate version */
    return pci_dev;
}

do_pci_register_device函数完成设备及其对应PCI总线上的一些初始化工作。

1)如果指定的devfn为-1,表示由总线自己选择插槽,得到插槽之后保存在PCIDevice的devfn(即pci_dev->devfn)中;如果在设备命令行中指定了addr,则addr会作为设备的devfn。代码片段如下:

    if (devfn < 0) {
        for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
            devfn += PCI_FUNC_MAX) {
            if (pci_bus_devfn_available(bus, devfn) &&
                   !pci_bus_devfn_reserved(bus, devfn)) {
                goto found;
            }
        }
        error_setg(errp, "PCI: no slot/function available for %s, all in use "
                   "or reserved", name);
        return NULL;
    found: ;
    } else if (pci_bus_devfn_reserved(bus, devfn)) {
        error_setg(errp, "PCI: slot %d function %d not available for %s,"
                   " reserved",
                   PCI_SLOT(devfn), PCI_FUNC(devfn), name);
        return NULL;
    } else if (!pci_bus_devfn_available(bus, devfn)) {
        error_setg(errp, "PCI: slot %d function %d not available for %s,"
                   " in use by %s,id=%s",
                   PCI_SLOT(devfn), PCI_FUNC(devfn), name,
                   bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
        return NULL;
    } /*
       * Populating function 0 triggers a scan from the guest that
       * exposes other non-zero functions. Hence we need to ensure that
       * function 0 wasn't added yet.
       */
    else if (dev->hotplugged &&
             !pci_is_vf(pci_dev) &&
             pci_get_function_0(pci_dev)) {
        error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
                   " new func %s cannot be exposed to guest.",
                   PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
                   pci_get_function_0(pci_dev)->name,
                   name);

       return NULL;
    }

    pci_dev->devfn = devfn;
    pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
    pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);

2)接下来设置PCIDevice结构体中的各个域,包括调用pci_init_bus_master函数初始化PCIDevice中的Address成员bus_master_as及其对应的MR。代码片段如下:

    memory_region_init(&pci_dev->bus_master_container_region, OBJECT(pci_dev),
                       "bus master container", UINT64_MAX);
    address_space_init(&pci_dev->bus_master_as,
                       &pci_dev->bus_master_container_region, pci_dev->name);

    if (phase_check(PHASE_MACHINE_READY)) {
        pci_init_bus_master(pci_dev);
    }

3)之后,调用pci_config_alloc函数分配PCI设备的配置空间,cmake用来检测相关的能力,wmask用来控制读写,w1cmask用来实现RW1C。由此完成一些初始化的设置,如vendor_id等。代码片段如下:

    pci_config_alloc(pci_dev);

    pci_config_set_vendor_id(pci_dev->config, pc->vendor_id);
    pci_config_set_device_id(pci_dev->config, pc->device_id);
    pci_config_set_revision(pci_dev->config, pc->revision);
    pci_config_set_class(pci_dev->config, pc->class_id);

    if (!is_bridge) {
        if (pc->subsystem_vendor_id || pc->subsystem_id) {
            pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
                         pc->subsystem_vendor_id);
            pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
                         pc->subsystem_id);
        } else {
            pci_set_default_subsystem_id(pci_dev);
        }
    } else {
        /* subsystem_vendor_id/subsystem_id are only for header type 0 */
        assert(!pc->subsystem_vendor_id);
        assert(!pc->subsystem_id);
    }
    pci_init_cmask(pci_dev);
    pci_init_wmask(pci_dev);
    pci_init_w1cmask(pci_dev);
    if (is_bridge) {
        pci_init_mask_bridge(pci_dev);
    }
    pci_init_multifunction(bus, pci_dev, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        do_pci_unregister_device(pci_dev);
        return NULL;
    }

4)然后是设置设备的config_read和config_write函数。如果相关的子类自己没有设置,那么就使用默认的pci_default_read/write_config函数。代码片段如下:

    if (!config_read)
        config_read = pci_default_read_config;
    if (!config_write)
        config_write = pci_default_write_config;
    pci_dev->config_read = config_read;
    pci_dev->config_write = config_write;

5)最后,将该device复制到bus->devices数组中。代码片段如下:

    bus->devices[devfn] = pci_dev;
    pci_dev->version_id = 2; /* Current pci device vmstate version */

至此,pci_qdev_realize函数所做的第一方面工作即所调用的第1个函数do_pci_register_device()就解析完了。

欲知后事如何,且看下回分解。

文章来源:https://blog.csdn.net/phmatthaus/article/details/135496556
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。