From 5d758f1d5fadcb3cb5ec0e82a5c4d8bfc08f94d1 Mon Sep 17 00:00:00 2001 From: Paul Praschl <101004482+Pauli1Go@users.noreply.github.com> Date: Sun, 28 Jun 2026 22:52:55 +0200 Subject: [PATCH 1/3] apple: t8010: add clean H9P NVMe storage --- .../bindings/pci/apple,t8010-pcie.yaml | 237 +++ arch/arm64/boot/dts/apple/t8010-ipad7.dtsi | 117 ++ drivers/iommu/Kconfig | 11 + drivers/iommu/Makefile | 1 + drivers/iommu/apple-t8010-dart.c | 680 ++++++++ drivers/nvme/host/pci.c | 444 +++++- drivers/pci/controller/Kconfig | 13 + drivers/pci/controller/Makefile | 1 + drivers/pci/controller/pcie-apple-h9p.c | 1368 +++++++++++++++++ include/linux/apple-h9p-pcie.h | 28 + 10 files changed, 2889 insertions(+), 11 deletions(-) create mode 100644 Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml create mode 100644 drivers/iommu/apple-t8010-dart.c create mode 100644 drivers/pci/controller/pcie-apple-h9p.c create mode 100644 include/linux/apple-h9p-pcie.h diff --git a/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml b/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml new file mode 100644 index 00000000000000..db6b1e88180262 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml @@ -0,0 +1,237 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/apple,t8010-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Apple H9P/T8010 PCIe Host Controller + +maintainers: + - Hector Martin + +description: + Apple A10/T8010 devices use an older H9P PCIe root complex for the + internal storage path. It exposes one ECAM window shared by up to four + root ports, controller-specific PHY and port register windows, and a + controller-local MSI block. + +allOf: + - $ref: /schemas/pci/pci-host-bridge.yaml# + - $ref: /schemas/interrupt-controller/msi-controller.yaml# + +properties: + compatible: + const: apple,t8010-pcie + + reg: + minItems: 10 + maxItems: 10 + + reg-names: + items: + - const: config + - const: phy0 + - const: phy1 + - const: phy2 + - const: port0 + - const: port1 + - const: port2 + - const: port3 + - const: nvmmu0 + - const: pcieclk-postup + + interrupts: + description: + Four port state interrupts followed by 32 MSI interrupts and the + optional NVMMU fault interrupt for the active storage port. + minItems: 37 + maxItems: 37 + + clocks: + minItems: 3 + maxItems: 3 + + clock-names: + items: + - const: core + - const: aux + - const: ref + + power-domains: + minItems: 3 + maxItems: 3 + + power-domain-names: + items: + - const: core + - const: aux + - const: ref + + reset-gpios: + description: + PERST# GPIOs indexed by PCIe root port. + minItems: 1 + maxItems: 4 + + clkreq-gpios: + description: + CLKREQ# GPIOs indexed by PCIe root port. + minItems: 1 + maxItems: 4 + + msi-controller: true + + msi-parent: true + + apple,msi-doorbell: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + MSI doorbell address programmed into downstream endpoints. + + apple,enabled-ports: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Bitmask of root ports that should be powered and trained. + minimum: 1 + maximum: 15 + + apple,nvmmu-iova: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Base device-visible address for the reserved NVMMU/SART window. + + memory-region: + description: + Reserved physical window programmed into the H9P NVMMU/SART path. + maxItems: 1 + + interrupt-controller: true + + '#interrupt-cells': + const: 1 + +required: + - compatible + - reg + - reg-names + - interrupts + - clocks + - clock-names + - power-domains + - power-domain-names + - reset-gpios + - clkreq-gpios + - bus-range + - ranges + - msi-controller + - msi-parent + - apple,enabled-ports + - apple,nvmmu-iova + - memory-region + - '#interrupt-cells' + +unevaluatedProperties: false + +examples: + - | + #include + #include + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + pcie0_nvmmu_window: nvmmu-window@8bee00000 { + reg = <0x8 0xbee00000 0x0 0x01200000>; + no-map; + }; + }; + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pcie0: pcie@610000000 { + compatible = "apple,t8010-pcie"; + device_type = "pci"; + + reg = <0x6 0x10000000 0x0 0x1000000>, + <0x6 0x00000000 0x0 0x8000>, + <0x6 0x00008000 0x0 0x4000>, + <0x6 0x0a000000 0x0 0x40000>, + <0x6 0x01000000 0x0 0x4000>, + <0x6 0x02000000 0x0 0x4000>, + <0x6 0x03000000 0x0 0x4000>, + <0x6 0x04000000 0x0 0x4000>, + <0x6 0x01004000 0x0 0x4000>, + <0x6 0x00010000 0x0 0x8000>; + reg-names = "config", "phy0", "phy1", "phy2", + "port0", "port1", "port2", "port3", + "nvmmu0", "pcieclk-postup"; + + interrupt-parent = <&aic>; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + + msi-controller; + msi-parent = <&pcie0>; + apple,msi-doorbell = <0xbffff000>; + apple,enabled-ports = <0x1>; + apple,nvmmu-iova = <0xbc000000>; + memory-region = <&pcie0_nvmmu_window>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + bus-range = <0x00 0x0f>; + ranges = <0x03000000 0x0 0xc0000000 + 0x7 0xc0000000 0x0 0x40000000>; + + clocks = <&clkref>, <&clkref>, <&clkref>; + clock-names = "core", "aux", "ref"; + power-domains = <&ps_pcie>, <&ps_pcie_aux>, + <&ps_pcie_ref>; + power-domain-names = "core", "aux", "ref"; + + reset-gpios = <&pinctrl_ap 12 GPIO_ACTIVE_HIGH>; + clkreq-gpios = <&pinctrl_ap 16 GPIO_ACTIVE_HIGH>; + }; + }; + +... diff --git a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi index bd0e9c0b5696fa..bb89ca3e5df7b9 100644 --- a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi +++ b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi @@ -12,3 +12,120 @@ * now. */ #include "t8010-ipad6.dtsi" + +/ { + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + pcie0_nvmmu_window: nvmmu-window@8bee00000 { + reg = <0x8 0xbee00000 0x0 0x01200000>; + no-map; + }; + }; + + soc { + pcie0_dart0: iommu@601008000 { + compatible = "apple,t8010-dart"; + reg = <0x6 0x01008000 0x0 0x4000>; + #iommu-cells = <1>; + interrupt-parent = <&aic>; + interrupts = ; + power-domains = <&ps_pcie>; + pcie-dart; + }; + + pcie0: pcie@610000000 { + compatible = "apple,t8010-pcie"; + device_type = "pci"; + status = "okay"; + + reg = <0x6 0x10000000 0x0 0x1000000>, + <0x6 0x00000000 0x0 0x8000>, + <0x6 0x00008000 0x0 0x4000>, + <0x6 0x0a000000 0x0 0x40000>, + <0x6 0x01000000 0x0 0x4000>, + <0x6 0x02000000 0x0 0x4000>, + <0x6 0x03000000 0x0 0x4000>, + <0x6 0x04000000 0x0 0x4000>, + <0x6 0x01004000 0x0 0x4000>, + <0x6 0x00010000 0x0 0x8000>; + reg-names = "config", "phy0", "phy1", "phy2", + "port0", "port1", "port2", "port3", + "nvmmu0", "pcieclk-postup"; + + interrupt-parent = <&aic>; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + + msi-controller; + msi-parent = <&pcie0>; + apple,msi-doorbell = <0xbffff000>; + apple,enabled-ports = <0x1>; + apple,nvmmu-iova = <0xbc000000>; + memory-region = <&pcie0_nvmmu_window>; + + iommu-map = <0x100 &pcie0_dart0 0 1>; + iommu-map-mask = <0xff00>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupt-controller; + bus-range = <0x00 0x0f>; + ranges = <0x03000000 0x0 0xc0000000 + 0x7 0xc0000000 0x0 0x40000000>; + + clocks = <&clkref>, <&clkref>, <&clkref>; + clock-names = "core", "aux", "ref"; + power-domains = <&ps_pcie>, <&ps_pcie_aux>, + <&ps_pcie_ref>; + power-domain-names = "core", "aux", "ref"; + + reset-gpios = <&pinctrl_ap 12 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 13 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 14 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 15 GPIO_ACTIVE_HIGH>; + clkreq-gpios = <&pinctrl_ap 16 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 17 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 18 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 19 GPIO_ACTIVE_HIGH>; + }; + }; +}; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index f86262b11416d1..a2ba9de4375878 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -315,6 +315,17 @@ config APPLE_DART Say Y here if you are using an Apple SoC. +config APPLE_T8010_DART + tristate "Apple T8010 legacy DART IOMMU support" + depends on ARCH_APPLE || COMPILE_TEST + select IOMMU_API + help + Support for the older DART layout used by Apple A10/T8010 PCIe. + This covers the pre-M1 register layout where TTBRs start at 0x40 + and the PCIe aperture is offset into the device IOVA space. + Enable this when bringing up H9P/Apple NVMe storage on T8010 + devices that cannot use the newer apple-dart register layout. + config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 0275821f4ef985..dc0ef895c74c0c 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -36,4 +36,5 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o +obj-$(CONFIG_APPLE_T8010_DART) += apple-t8010-dart.o obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o diff --git a/drivers/iommu/apple-t8010-dart.c b/drivers/iommu/apple-t8010-dart.c new file mode 100644 index 00000000000000..9d927a4e46593a --- /dev/null +++ b/drivers/iommu/apple-t8010-dart.c @@ -0,0 +1,680 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DART IOMMU on Apple T8010/A10 SoCs + * + * Copyright (C) 2020 Corellium LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DART_TLB_OP 0x0000 +#define DART_TLB_OP_FLUSH 0x00000002 +#define DART_TLB_OP_SID_SHIFT 8 +#define DART_TLB_OP_SID(sid4) (1 << ((sid4) + 8)) +#define DART_TLB_OP_BUSY BIT(3) +#define DART_CONFIG 0x000C +#define DART_CONFIG_TXEN(sid4) (1 << ((sid4) * 8 + 7)) +#define DART_ERROR_STATUS 0x0010 +#define DART_ERROR_AXI_REQ0 0x0014 +#define DART_ERROR_AXI_REQ1 0x0018 +#define DART_ERROR_ADDRESS 0x001C +#define DART_DIAG_CONFIG 0x0020 +#define DART_UNKNOWN_24 0x0024 +#define DART_SID_REMAP 0x0028 +#define DART_UNKNOWN_2C 0x002C +#define DART_FETCH_CONFIG 0x0030 +#define DART_PERF_CONFIG 0x0078 +#define DART_TLB_MISS 0x007C +#define DART_TLB_WAIT 0x0080 +#define DART_TLB_HIT 0x0084 +#define DART_ST_MISS 0x0088 +#define DART_ST_WAIT 0x008C +#define DART_ST_HIT 0x0090 +#define DART_TTBR(sid4, l1idx4) (0x0040 + 16 * (sid4) + 4 * (l1idx4)) +#define DART_TTBR_VALID BIT(31) +#define DART_TTBR_MASK 0x00FFFFFF +#define DART_TLB_STATUS 0x1000 +#define DART_TLB_UNKNOWN(idx) (0x1004 + 4 * (idx)) +#define DART_STT_PA_DATA(idx) (0x2000 + 4 * (idx)) +#define DART_STT_PA_DATA_COUNT 1024 +#define DART_SMMU_TLB_CFG 0x3000 +#define DART_SMMU_TLB_DATA_RD 0x3100 +#define DART_SMMU_TLB_DATA_RD_COUNT 4 +#define DART_DATA_DEBUG_IDX 0x3120 +#define DART_DATA_DEBUG_CNTL 0x3124 +#define DART_DATA_DEBUG_CNTL_READ BIT(0) +#define DART_DATA_DEBUG_CNTL_BUSY BIT(2) +#define DART_TLB_TAG(idx) (0x3800 + 4 * (idx)) +#define DART_TLB_TAG_COUNT 128 + +#define DART_PTE_STATE_MASK 3 +#define DART_PTE_STATE_INVALID 0 +#define DART_PTE_STATE_NEXT 3 +#define DART_PTE_STATE_VALID 3 +#define DART_PTE_ADDR_MASK 0xFFFFFF000ull + +#define DART_NUM_SID 4 +#define DART_PAGE_SHIFT 12 + +#define DART_PAGE_SIZE BIT(DART_PAGE_SHIFT) +#define DART_PAGE_MASK (DART_PAGE_SIZE - 1ul) + +struct apple_t8010_dart_iommu { + struct device *dev; + struct iommu_device iommu; + void __iomem *base; + int is_init; + int is_pcie; + u64 iova_offset; + u64 **l2dma[DART_NUM_SID]; + u64 *l1dma[DART_NUM_SID]; + /* Protects DART page table allocation and register updates. */ + spinlock_t dart_lock; +}; + +struct apple_t8010_dart_iommu_domain { + struct iommu_domain domain; + struct apple_t8010_dart_iommu *iommu; + int sid; +}; + +struct apple_t8010_dart_iommu_devdata { + struct apple_t8010_dart_iommu *iommu; + u32 sid; +}; + +static irqreturn_t apple_t8010_dart_iommu_irq(int irq, void *dev_id) +{ + struct apple_t8010_dart_iommu *im = dev_id; + u32 status, axi_req[2], addr, tlbstat; + + status = readl(im->base + DART_ERROR_STATUS); + tlbstat = readl(im->base + DART_TLB_STATUS); + axi_req[0] = readl(im->base + DART_ERROR_AXI_REQ0); + axi_req[1] = readl(im->base + DART_ERROR_AXI_REQ1); + addr = readl(im->base + DART_ERROR_ADDRESS); + + writel(status, im->base + DART_ERROR_STATUS); + writel(tlbstat, im->base + DART_TLB_STATUS); + + dev_err(im->dev, + "STATUS %08x AXI_REQ %08x:%08x ADDR %08x TLBSTAT %08x\n", + status, axi_req[0], axi_req[1], addr, tlbstat); + + return IRQ_HANDLED; +} + +static void apple_t8010_dart_tlb_flush(struct apple_t8010_dart_iommu *im, + u32 sidmask, int need_lock) +{ + unsigned long flags; + u32 status; + + if (need_lock) + spin_lock_irqsave(&im->dart_lock, flags); + writel(DART_TLB_OP_FLUSH | (sidmask << DART_TLB_OP_SID_SHIFT), + im->base + DART_TLB_OP); + while (1) { + status = readl(im->base + DART_TLB_OP); + if (!(status & DART_TLB_OP_BUSY)) + break; + } + if (need_lock) + spin_unlock_irqrestore(&im->dart_lock, flags); +} + +static u64 *apple_t8010_dart_get_pte(struct apple_t8010_dart_iommu *im, u32 sid, + u64 iova, int optional, + unsigned long *flags) +{ + unsigned int i, l1idx, l1base, l2idx, npgs, npg; + u64 phys, **l1pt, *l1dma, *l2dma; + void *dmava, *ptva; + dma_addr_t dmah; + + if (im->is_pcie) + sid = 0; + + if (!im->l1dma[sid]) { + spin_unlock_irqrestore(&im->dart_lock, *flags); + ptva = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + DART_PAGE_SHIFT + 2 - + PAGE_SHIFT); + dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE * 4, &dmah, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + spin_lock_irqsave(&im->dart_lock, *flags); + if (!im->l1dma[sid]) { + if (!ptva || !dmava) { + if (ptva) + free_pages((unsigned long)ptva, + DART_PAGE_SHIFT + 2 - + PAGE_SHIFT); + else + dev_err(im->dev, + "failed to allocate shadow L1 pagetable\n"); + if (dmava) + dma_free_attrs(im->dev, + DART_PAGE_SIZE * 4, + dmava, dmah, + DMA_ATTR_WRITE_COMBINE); + else + dev_err(im->dev, + "failed to allocate uncached L1 pagetable\n"); + return NULL; + } + im->l2dma[sid] = ptva; + im->l1dma[sid] = dmava; + phys = dmah; + for (i = 0; i < 4; i++) + writel((((phys >> DART_PAGE_SHIFT) + i) & + DART_TTBR_MASK) | + DART_TTBR_VALID, + im->base + DART_TTBR(sid, i)); + } else { + if (ptva) + free_pages((unsigned long)ptva, + DART_PAGE_SHIFT + 2 - PAGE_SHIFT); + if (dmava) + dma_free_attrs(im->dev, DART_PAGE_SIZE * 4, + dmava, dmah, + DMA_ATTR_WRITE_COMBINE); + } + } + + l1pt = im->l2dma[sid]; + l1idx = (iova >> 21) & 0x7FF; + + if (!l1pt[l1idx]) { + if (optional) + return NULL; + if (DART_PAGE_SHIFT < PAGE_SHIFT) + npgs = PAGE_SHIFT - DART_PAGE_SHIFT; + else + npgs = 0; + spin_unlock_irqrestore(&im->dart_lock, *flags); + dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE << npgs, &dmah, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + spin_lock_irqsave(&im->dart_lock, *flags); + if (!l1pt[l1idx]) { + if (!dmava) { + dev_err(im->dev, + "failed to allocate uncached L2 pagetable\n"); + return NULL; + } + npg = 1 << npgs; + phys = dmah; + l1dma = im->l1dma[sid]; + l1base = (l1idx >> npgs) << npgs; + for (i = 0; i < npg; i++) { + l1pt[l1base + i] = + dmava + (i << DART_PAGE_SHIFT); + l1dma[l1base + i] = + ((phys + (i << DART_PAGE_SHIFT)) & + DART_PTE_ADDR_MASK) | + DART_PTE_STATE_NEXT; + } + } else if (dmava) + dma_free_attrs(im->dev, DART_PAGE_SIZE << npgs, dmava, + dmah, DMA_ATTR_WRITE_COMBINE); + } + + l2dma = l1pt[l1idx]; + l2idx = (iova >> 12) & 0x1FF; + return &l2dma[l2idx]; +} + +static void apple_t8010_dart_iommu_enable(struct apple_t8010_dart_iommu *im, + u32 sid) +{ + u32 val; + + val = readl(im->base + DART_CONFIG); + if (val & DART_CONFIG_TXEN(sid)) + return; + writel(val | DART_CONFIG_TXEN(sid), im->base + DART_CONFIG); + if (!(readl(im->base + DART_CONFIG) & DART_CONFIG_TXEN(sid))) + dev_err(im->dev, "failed to enable SID %d: 0x%08x.\n", sid, + readl(im->base + DART_CONFIG)); +} + +static bool apple_t8010_dart_iommu_capable(struct device *dev, + enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + return false; + } +} + +static struct apple_t8010_dart_iommu_domain * +to_apple_t8010_dart_iommu_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct apple_t8010_dart_iommu_domain, domain); +} + +static struct iommu_domain * +apple_t8010_dart_iommu_domain_alloc_paging(struct device *dev) +{ + struct apple_t8010_dart_iommu_domain *idom; + + idom = kzalloc_obj(*idom, GFP_KERNEL); + if (!idom) + return NULL; + + idom->domain.pgsize_bitmap = SZ_4K; + idom->sid = -1; + + return &idom->domain; +} + +static void apple_t8010_dart_iommu_domain_free(struct iommu_domain *domain) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + kfree(idom); +} + +static int apple_t8010_dart_iommu_attach_device(struct iommu_domain *domain, + struct device *dev, + struct iommu_domain *old) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu_devdata *idd; + struct apple_t8010_dart_iommu *im; + unsigned long flags; + u32 sid, i, j; + + idd = dev_iommu_priv_get(dev); + if (!idd) + return -ENODEV; + im = idd->iommu; + + if (idom->iommu && idom->iommu != im) { + dev_err(dev, + "different DART already assigned to IOMMU domain.\n"); + return -EINVAL; + } + + if (!idom->iommu) { + idom->iommu = im; + if (im->is_pcie) { + idom->domain.geometry.aperture_start = 0x80000000ul; + idom->domain.geometry.aperture_end = 0xBBFFFFFFul; + } else { + idom->domain.geometry.aperture_start = 0x00004000ul; + idom->domain.geometry.aperture_end = 0xFFFFFFFFul; + } + idom->domain.geometry.force_aperture = true; + } + + sid = im->is_pcie ? 0 : idd->sid; + if (idom->sid >= 0 && idom->sid != sid) { + dev_err(dev, + "multiple SIDs mapped to the same IOMMU domain.\n"); + return -EEXIST; + } + idom->sid = sid; + + spin_lock_irqsave(&im->dart_lock, flags); + + if (!im->is_init) { + im->is_init = 1; + writel(0x0020FFFC, im->base + DART_UNKNOWN_24); + writel(0x00000000, im->base + DART_UNKNOWN_2C); + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + writel(0x00000000, im->base + DART_TTBR(i, j)); + writel(0x000E0303, im->base + DART_FETCH_CONFIG); + writel(0x00000100, im->base + DART_DIAG_CONFIG); + for (i = 0; i < 6; i++) + writel(0x00000000, im->base + DART_TLB_UNKNOWN(i)); + writel(0x03F3FFFF, im->base + DART_TLB_STATUS); + + apple_t8010_dart_tlb_flush(im, 15, 0); + } + apple_t8010_dart_iommu_enable(im, sid); + + spin_unlock_irqrestore(&im->dart_lock, flags); + + return 0; +} + +static int apple_t8010_dart_iommu_map_pages(struct iommu_domain *domain, + unsigned long iova, + phys_addr_t paddr, size_t pgsize, + size_t pgcount, int prot, gfp_t gfp, + size_t *mapped) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu *im = idom->iommu; + u64 len = (u64)pgsize * pgcount; + u64 end = iova + len - 1; + unsigned int i, npg; + unsigned long flags; + u64 *ptep; + int ret = 0; + + if (!im || idom->sid < 0) + return -EINVAL; + + if (!len || end < iova || iova < domain->geometry.aperture_start || + end > domain->geometry.aperture_end) + return -EINVAL; + + npg = (len + DART_PAGE_MASK) >> DART_PAGE_SHIFT; + + if (iova < im->iova_offset) + return -EINVAL; + iova -= im->iova_offset; + + spin_lock_irqsave(&im->dart_lock, flags); + for (i = 0; i < npg; i++) { + ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 0, &flags); + if (!ptep) { + ret = -ENOMEM; + break; + } + *ptep = (paddr & DART_PTE_ADDR_MASK) | DART_PTE_STATE_VALID; + iova += DART_PAGE_SIZE; + paddr += DART_PAGE_SIZE; + if (mapped) + *mapped += DART_PAGE_SIZE; + } + spin_unlock_irqrestore(&im->dart_lock, flags); + + return ret; +} + +static phys_addr_t +apple_t8010_dart_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu *im = idom->iommu; + unsigned long flags; + u64 *ptep, result = 0; + + if (idom->sid < 0) + return 0; + + if (iova < im->iova_offset) + return 0; + iova -= im->iova_offset; + + spin_lock_irqsave(&im->dart_lock, flags); + ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); + if (ptep) + result = *ptep; + spin_unlock_irqrestore(&im->dart_lock, flags); + + if (result & DART_PTE_STATE_MASK) + result = (result & DART_PTE_ADDR_MASK) | + (iova & DART_PAGE_MASK); + return result; +} + +static size_t apple_t8010_dart_iommu_unmap_pages(struct iommu_domain *domain, + unsigned long iova, + size_t pgsize, + size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu *im = idom->iommu; + size_t size = pgsize * pgcount; + unsigned int i, npg = (size + DART_PAGE_MASK) >> DART_PAGE_SHIFT; + unsigned long flags; + u64 *ptep; + + if (idom->sid < 0) + return 0; + + if (iova < im->iova_offset) + return 0; + iova -= im->iova_offset; + + spin_lock_irqsave(&im->dart_lock, flags); + for (i = 0; i < npg; i++) { + ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); + if (ptep) + *ptep = 0; + iova += DART_PAGE_SIZE; + } + spin_unlock_irqrestore(&im->dart_lock, flags); + + return size; +} + +static void apple_t8010_dart_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + if (!idom->iommu) + return; + + if (idom->sid >= 0) + apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); +} + +static void apple_t8010_dart_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + if (!idom->iommu) + return; + + if (idom->sid >= 0) + apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); +} + +static int apple_t8010_dart_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, + size_t size) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + if (!idom->iommu) + return 0; + + if (idom->sid >= 0) + apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); + return 0; +} + +static const struct iommu_ops apple_t8010_dart_iommu_ops; + +static struct iommu_device * +apple_t8010_dart_iommu_probe_device(struct device *dev) +{ + struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); + + if (!idd || !idd->iommu) + return ERR_PTR(-ENODEV); + + device_link_add(dev, idd->iommu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + + return &idd->iommu->iommu; +} + +static void apple_t8010_dart_iommu_release_device(struct device *dev) +{ + struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); + + dev_iommu_priv_set(dev, NULL); + kfree(idd); +} + +static struct iommu_group * +apple_t8010_dart_iommu_device_group(struct device *dev) +{ +#ifdef CONFIG_PCI + if (dev_is_pci(dev)) + return pci_device_group(dev); +#endif + + return generic_device_group(dev); +} + +static int apple_t8010_dart_iommu_of_xlate(struct device *dev, + const struct of_phandle_args *args) +{ + struct platform_device *iommu_dev; + struct apple_t8010_dart_iommu_devdata *data; + + data = kzalloc_obj(*data, GFP_KERNEL); + if (!data) + return -ENOMEM; + + iommu_dev = of_find_device_by_node(args->np); + if (!iommu_dev) { + kfree(data); + return -ENODEV; + } + + data->iommu = platform_get_drvdata(iommu_dev); + if (!data->iommu) { + platform_device_put(iommu_dev); + kfree(data); + return -ENODEV; + } + + data->sid = args->args[0]; + dev_iommu_priv_set(dev, data); + + platform_device_put(iommu_dev); + + return 0; +} + +static const struct iommu_ops apple_t8010_dart_iommu_ops = { + .capable = apple_t8010_dart_iommu_capable, + .of_xlate = apple_t8010_dart_iommu_of_xlate, + .domain_alloc_paging = apple_t8010_dart_iommu_domain_alloc_paging, + .probe_device = apple_t8010_dart_iommu_probe_device, + .release_device = apple_t8010_dart_iommu_release_device, + .device_group = apple_t8010_dart_iommu_device_group, + .owner = THIS_MODULE, + .default_domain_ops = + &(const struct iommu_domain_ops){ + .attach_dev = apple_t8010_dart_iommu_attach_device, + .map_pages = apple_t8010_dart_iommu_map_pages, + .unmap_pages = apple_t8010_dart_iommu_unmap_pages, + .iova_to_phys = apple_t8010_dart_iommu_iova_to_phys, + .flush_iotlb_all = + apple_t8010_dart_iommu_flush_iotlb_all, + .iotlb_sync = apple_t8010_dart_iommu_iotlb_sync, + .iotlb_sync_map = apple_t8010_dart_iommu_iotlb_sync_map, + .free = apple_t8010_dart_iommu_domain_free, + }, +}; + +static int apple_t8010_dart_iommu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *node = dev->of_node; + struct apple_t8010_dart_iommu *im; + struct resource *r; + int ret = 0, irq; + + im = devm_kzalloc(dev, sizeof(struct apple_t8010_dart_iommu), + GFP_KERNEL); + if (!im) + return -ENOMEM; + + im->dev = &pdev->dev; + platform_set_drvdata(pdev, im); + + spin_lock_init(&im->dart_lock); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + return dev_err_probe(dev, ret, "failed to set DMA mask\n"); + + if (of_property_read_bool(pdev->dev.of_node, "pcie-dart")) { + im->is_pcie = 1; + im->iova_offset = 0x80000000ul; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + im->base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(im->base)) + return PTR_ERR(im->base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(&pdev->dev, irq, apple_t8010_dart_iommu_irq, 0, + dev_name(&pdev->dev), im); + if (ret < 0) + return ret; + + ret = iommu_device_sysfs_add(&im->iommu, dev, NULL, node->name); + if (ret) + return ret; + + ret = iommu_device_register(&im->iommu, &apple_t8010_dart_iommu_ops, + dev); + if (ret) + goto err_sysfs_remove; + + return 0; + +err_sysfs_remove: + iommu_device_sysfs_remove(&im->iommu); + return ret; +} + +static void apple_t8010_dart_iommu_remove(struct platform_device *pdev) +{ + struct apple_t8010_dart_iommu *im = platform_get_drvdata(pdev); + + iommu_device_unregister(&im->iommu); + iommu_device_sysfs_remove(&im->iommu); +} + +static const struct of_device_id apple_t8010_dart_iommu_match[] = { + { .compatible = "apple,t8010-dart" }, + {}, +}; +MODULE_DEVICE_TABLE(of, apple_t8010_dart_iommu_match); + +static struct platform_driver apple_t8010_dart_iommu_driver = { + .probe = apple_t8010_dart_iommu_probe, + .remove = apple_t8010_dart_iommu_remove, + .driver = { + .name = "apple-t8010-dart", + .of_match_table = apple_t8010_dart_iommu_match, + }, +}; +module_platform_driver(apple_t8010_dart_iommu_driver); + +MODULE_DESCRIPTION("Apple T8010 legacy DART IOMMU driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b5f84620067899..2c443efa450c28 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -5,7 +5,9 @@ */ #include +#include #include +#include #include #include #include @@ -13,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +23,8 @@ #include #include #include +#include +#include #include #include #include @@ -43,6 +48,22 @@ #define NVME_MAX_BYTES SZ_8M #define NVME_MAX_NR_DESCRIPTORS 5 +#define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) + +#define APPLE_H9P_REG_INIT 0x1800 +#define APPLE_H9P_REG_INIT_REGULAR 0 +#define APPLE_H9P_REG_SCRATCH_SIZE_REQ 0x1808 +#define APPLE_H9P_REG_SCRATCH_ALIGN_REQ 0x180c +#define APPLE_H9P_REG_SCRATCH_BASE_LO 0x1810 +#define APPLE_H9P_REG_SCRATCH_BASE_HI 0x1814 +#define APPLE_H9P_REG_SCRATCH_SIZE 0x1818 +#define APPLE_H9P_REG_CORE_MASK 0x1824 +#define APPLE_H9P_REG_LOG_SIZE 0x1828 +#define APPLE_H9P_REG_BOOT_STATE 0x1b18 +#define APPLE_H9P_REG_BOOT_STATE_MAGIC 0xbfbfbfbfu +#define APPLE_H9P_NVME_MAX_SECTORS \ + (APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE >> SECTOR_SHIFT) + /* * For data SGLs we support a single descriptors worth of SGL entries. * For PRPs, segments don't matter at all. @@ -278,6 +299,7 @@ MODULE_PARM_DESC(noacpi, "disable acpi bios quirks"); struct nvme_dev; struct nvme_queue; +struct apple_h9p_nvme; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_delete_io_queues(struct nvme_dev *dev); @@ -317,6 +339,7 @@ struct nvme_dev { bool hmb; struct sg_table *hmb_sgt; mempool_t *dmavec_mempool; + struct apple_h9p_nvme *apple_h9p; /* shadow doorbell buffer support: */ __le32 *dbbuf_dbs; @@ -444,6 +467,7 @@ struct nvme_iod { size_t meta_total_len; struct dma_iova_state meta_dma_state; struct nvme_sgl_desc *meta_descriptor; + void *apple_h9p_req; }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -886,6 +910,345 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, le32_to_cpu(sg_list[i].length), dir, attrs); } +struct apple_h9p_nvme_req { + u64 pages[APPLE_H9P_NVMMU_MAX_PAGES]; + unsigned int npages; +}; + +struct apple_h9p_nvme { + dma_addr_t scratch_dma; + u32 scratch_size; + struct apple_h9p_nvme_req req[APPLE_H9P_NVMMU_MAX_REQS]; + DECLARE_BITMAP(used_req, APPLE_H9P_NVMMU_MAX_REQS); + unsigned int last_req; + /* Protects the FlatDMA request-slot bitmap. */ + spinlock_t req_lock; +}; + +static bool nvme_pci_is_apple_h9p(struct pci_dev *pdev) +{ + return pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2002; +} + +static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, + u32 scratch_size_req, + u32 scratch_align_req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct pci_host_bridge *bridge; + struct device_node *pcie_np; + struct device_node *mem_np; + struct resource res; + resource_size_t size; + u32 iova; + int ret; + + bridge = pci_find_host_bridge(pdev->bus); + if (!bridge) + return -ENODEV; + + pcie_np = bridge->dev.parent ? bridge->dev.parent->of_node : NULL; + if (!pcie_np) + pcie_np = bridge->dev.of_node; + if (!pcie_np) + return -ENODEV; + + mem_np = of_parse_phandle(pcie_np, "memory-region", 0); + if (!mem_np) + return -ENODEV; + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_mem; + + ret = of_property_read_u32(pcie_np, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_mem; + + if (!scratch_size_req || scratch_size_req == U32_MAX) { + ret = -EINVAL; + goto out_put_mem; + } + if (!scratch_align_req || scratch_align_req == U32_MAX) + scratch_align_req = 1; + + size = resource_size(&res); + if (size < scratch_size_req || size > U32_MAX) { + ret = -ENOSPC; + goto out_put_mem; + } + if (!IS_ALIGNED(res.start, scratch_align_req) || + !IS_ALIGNED(iova, scratch_align_req)) { + ret = -EINVAL; + goto out_put_mem; + } + + h9p->scratch_dma = iova; + h9p->scratch_size = size; + dev_dbg(dev->dev, "Apple H9P NVMe scratch %#x@%pa as dma %#llx\n", + h9p->scratch_size, &res.start, (u64)h9p->scratch_dma); + +out_put_mem: + of_node_put(mem_np); + return ret; +} + +static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts, core_mask, log_size; + u32 scratch_size, scratch_align; + int ret; + + if (!dev->apple_h9p) + return 0; + + if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x17c, 0x10081008) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x18c, 0) != PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x188, 0x40550000) != + PCIBIOS_SUCCESSFUL) + return -EIO; + + if (readl(dev->bar + APPLE_H9P_REG_BOOT_STATE) == + APPLE_H9P_REG_BOOT_STATE_MAGIC) + dev_dbg(dev->dev, "Apple H9P NVMe boot-state magic present\n"); + + core_mask = readl(dev->bar + APPLE_H9P_REG_CORE_MASK); + log_size = readl(dev->bar + APPLE_H9P_REG_LOG_SIZE); + scratch_size = readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE_REQ); + scratch_align = readl(dev->bar + APPLE_H9P_REG_SCRATCH_ALIGN_REQ); + + writel(0, dev->bar + NVME_REG_CC); + ret = readl_poll_timeout(dev->bar + NVME_REG_CSTS, csts, + !(csts & (NVME_CSTS_RDY | NVME_CSTS_CFS)), + 1000, 2000000); + if (ret) + return ret; + + ret = nvme_pci_apple_h9p_find_scratch(dev, scratch_size, + scratch_align); + if (ret) + return ret; + + dev_dbg(dev->dev, + "Apple H9P NVMe core_mask=%#x log_size=%#x scratch_req=%#x align=%#x\n", + core_mask, log_size, scratch_size, scratch_align); + return 0; +} + +static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + + if (!h9p) + return 0; + if (!h9p->scratch_size) + return -EINVAL; + + writel(APPLE_H9P_REG_INIT_REGULAR, dev->bar + APPLE_H9P_REG_INIT); + writel(lower_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_LO); + writel(upper_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_HI); + writel(h9p->scratch_size, dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + + return 0; +} + +static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req **req, + unsigned int *tag) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int idx; + + spin_lock_irqsave(&h9p->req_lock, flags); + idx = find_next_zero_bit(h9p->used_req, APPLE_H9P_NVMMU_MAX_REQS, + (h9p->last_req + 1) % + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) + idx = find_first_zero_bit(h9p->used_req, + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) { + spin_unlock_irqrestore(&h9p->req_lock, flags); + dev_dbg_ratelimited(dev->dev, + "Apple H9P NVMe FlatDMA slots exhausted\n"); + return BLK_STS_RESOURCE; + } + + h9p->last_req = idx; + __set_bit(idx, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); + + *req = &h9p->req[idx]; + *tag = idx; + (*req)->npages = 0; + memset((*req)->pages, 0, sizeof((*req)->pages)); + return BLK_STS_OK; +} + +static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req *req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int tag; + + if (!h9p || !req) + return; + + tag = req - h9p->req; + if (tag >= APPLE_H9P_NVMMU_MAX_REQS) + return; + + apple_h9p_pcie_map_nvmmu(dev->dev, tag, NULL, 0, NULL); + req->npages = 0; + + spin_lock_irqsave(&h9p->req_lock, flags); + __clear_bit(tag, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); +} + +static bool nvme_pci_apple_h9p_unmap_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + unsigned int i; + + if (!iod->apple_h9p_req) + return false; + + for (i = 0; i < iod->nr_dma_vecs; i++) + dma_unmap_page(dev->dev, iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, rq_dma_dir(req)); + if (iod->dma_vecs) { + mempool_free(iod->dma_vecs, dev->dmavec_mempool); + iod->dma_vecs = NULL; + } + iod->nr_dma_vecs = 0; + + nvme_pci_apple_h9p_free_req(dev, iod->apple_h9p_req); + iod->apple_h9p_req = NULL; + iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; + return true; +} + +static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + struct apple_h9p_nvme_req *hreq; + struct req_iterator iter; + struct bio_vec bv; + dma_addr_t flatdma; + u64 phys, offs = 0; + unsigned int tag, npages = 0, consumed = 0; + unsigned int total = blk_rq_payload_bytes(req); + blk_status_t status; + int ret; + + if (!dev->apple_h9p) + return BLK_STS_NOTSUPP; + if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) + return BLK_STS_IOERR; + + status = nvme_pci_apple_h9p_alloc_req(dev, &hreq, &tag); + if (status) + return status; + + iod->apple_h9p_req = hreq; + iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); + if (!iod->dma_vecs) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + rq_for_each_bvec(bv, req, iter) { + dma_addr_t dma_addr; + unsigned int len = bv.bv_len; + + if (WARN_ON_ONCE(iod->nr_dma_vecs >= + blk_rq_nr_phys_segments(req))) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + dma_addr = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, dma_addr)) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + iod->dma_vecs[iod->nr_dma_vecs].addr = dma_addr; + iod->dma_vecs[iod->nr_dma_vecs].len = len; + iod->nr_dma_vecs++; + + phys = page_to_phys(bv.bv_page) + bv.bv_offset; + if (!consumed) { + offs = phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1); + phys -= offs; + len += offs; + } else if (phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1)) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment is not page-aligned: phys=%#llx\n", + phys); + status = BLK_STS_IOERR; + goto out_unmap; + } + + if (consumed + bv.bv_len != total && + (len & (APPLE_H9P_NVMMU_PAGE_SIZE - 1))) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment length is not page-aligned: len=%#x\n", + len); + status = BLK_STS_IOERR; + goto out_unmap; + } + + while (len) { + if (npages >= APPLE_H9P_NVMMU_MAX_PAGES) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->pages[npages++] = phys; + phys += APPLE_H9P_NVMMU_PAGE_SIZE; + len = len > APPLE_H9P_NVMMU_PAGE_SIZE ? + len - APPLE_H9P_NVMMU_PAGE_SIZE : 0; + } + + consumed += bv.bv_len; + } + + ret = apple_h9p_pcie_map_nvmmu(dev->dev, tag, hreq->pages, npages, + &flatdma); + if (ret) { + status = errno_to_blk_status(ret); + if (status == BLK_STS_NOTSUPP) + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->npages = npages; + iod->total_len = total; + iod->cmd.common.flags |= NVME_CMD_APPLE_H9P_FLATDMA; + iod->cmd.common.dptr.prp1 = cpu_to_le64(flatdma + offs); + iod->cmd.common.dptr.prp2 = 0; + return BLK_STS_OK; + +out_unmap: + nvme_pci_apple_h9p_unmap_data(req); + return status; +} + static void nvme_unmap_metadata(struct request *req) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; @@ -932,6 +1295,9 @@ static void nvme_unmap_data(struct request *req) struct device *dma_dev = nvmeq->dev->dev; unsigned int attrs = 0; + if (nvmeq->dev->apple_h9p && nvme_pci_apple_h9p_unmap_data(req)) + return; + if (iod->flags & IOD_SINGLE_SEGMENT) { static_assert(offsetof(union nvme_data_ptr, prp1) == offsetof(union nvme_data_ptr, sgl.addr)); @@ -1246,6 +1612,12 @@ static blk_status_t nvme_map_data(struct request *req) struct blk_dma_iter iter; blk_status_t ret; + if (dev->apple_h9p) { + ret = nvme_pci_apple_h9p_map_data(req); + if (ret != BLK_STS_NOTSUPP) + return ret; + } + /* * Try to skip the DMA iterator for single segment requests, as that * significantly improves performances for small I/O sizes. @@ -1401,6 +1773,8 @@ static blk_status_t nvme_prep_rq(struct request *req) iod->total_len = 0; iod->meta_total_len = 0; iod->nr_dma_vecs = 0; + iod->dma_vecs = NULL; + iod->apple_h9p_req = NULL; ret = nvme_setup_cmd(req->q->queuedata, req); if (ret) @@ -1450,6 +1824,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ret = nvme_prep_rq(req); if (unlikely(ret)) return ret; + spin_lock(&nvmeq->sq_lock); nvme_sq_copy_cmd(nvmeq, &iod->cmd); nvme_write_sq_db(nvmeq, bd->last); @@ -2386,6 +2761,10 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); + result = nvme_pci_apple_h9p_prepare_enable(dev); + if (result) + return result; + result = nvme_enable_ctrl(&dev->ctrl); if (result) return result; @@ -2942,6 +3321,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) unsigned int nr_io_queues; unsigned long size; int result; + bool reuse_single_vector = false; /* * Sample the module parameters once at reset time so that we have @@ -2986,7 +3366,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = nvme_setup_io_queues_trylock(dev); if (result) return result; - if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + reuse_single_vector = dev->apple_h9p && + (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) && + pdev->msi_enabled && + test_bit(NVMEQ_ENABLED, &adminq->flags); + + if (!reuse_single_vector && + test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) pci_free_irq(pdev, 0, adminq); if (dev->cmb_use_sqes) { @@ -3014,19 +3400,27 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) retry: /* Deregister the admin queue's interrupt */ - if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + if (!reuse_single_vector && + test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) pci_free_irq(pdev, 0, adminq); /* * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - pci_free_irq_vectors(pdev); + if (reuse_single_vector) { + result = 1; + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; + dev->io_queues[HCTX_TYPE_READ] = 0; + dev->io_queues[HCTX_TYPE_POLL] = 0; + } else { + pci_free_irq_vectors(pdev); - result = nvme_setup_irqs(dev, nr_io_queues); - if (result <= 0) { - result = -EIO; - goto out_unlock; + result = nvme_setup_irqs(dev, nr_io_queues); + if (result <= 0) { + result = -EIO; + goto out_unlock; + } } dev->num_vecs = result; @@ -3039,10 +3433,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * path to scale better, even if the receive path is limited by the * number of interrupts. */ - result = queue_request_irq(adminq); - if (result) - goto out_unlock; - set_bit(NVMEQ_ENABLED, &adminq->flags); + if (reuse_single_vector) { + result = 0; + } else { + result = queue_request_irq(adminq); + if (result) + goto out_unlock; + set_bit(NVMEQ_ENABLED, &adminq->flags); + } mutex_unlock(&dev->shutdown_lock); result = nvme_create_io_queues(dev); @@ -3249,8 +3647,15 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", dev->q_depth); } + if (dev->apple_h9p && + dev->q_depth > APPLE_H9P_NVMMU_MAX_REQS) + dev->q_depth = APPLE_H9P_NVMMU_MAX_REQS; dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ + result = nvme_pci_apple_h9p_preinit(dev); + if (result) + goto free_irq; + nvme_map_cmb(dev); pci_save_state(pdev); @@ -3373,6 +3778,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) nvme_free_tagset(dev); put_device(dev->dev); kfree(dev->queues); + kfree(dev->apple_h9p); kfree(dev); } @@ -3707,6 +4113,14 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks |= qentry->enabled_quirks; quirks &= ~qentry->disabled_quirks; } + if (nvme_pci_is_apple_h9p(pdev)) { + quirks |= NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_SHARED_TAGS; + dev->apple_h9p = kzalloc_node(sizeof(*dev->apple_h9p), + GFP_KERNEL, node); + if (!dev->apple_h9p) + goto out_put_device; + spin_lock_init(&dev->apple_h9p->req_lock); + } ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); if (ret) @@ -3726,6 +4140,10 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev->ctrl.max_hw_sectors = min_t(u32, NVME_MAX_BYTES >> SECTOR_SHIFT, dma_opt_mapping_size(&pdev->dev) >> 9); + if (dev->apple_h9p) + dev->ctrl.max_hw_sectors = + min_t(u32, dev->ctrl.max_hw_sectors, + APPLE_H9P_NVME_MAX_SECTORS); dev->ctrl.max_segments = NVME_MAX_SEGS; dev->ctrl.max_integrity_segments = 1; return dev; @@ -3733,6 +4151,7 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, out_put_device: put_device(dev->dev); kfree(dev->queues); + kfree(dev->apple_h9p); out_free_dev: kfree(dev); return ERR_PTR(ret); @@ -4271,6 +4690,9 @@ static const struct pci_device_id nvme_id_table[] = { */ .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_QDEPTH_ONE }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2002), + .driver_data = NVME_QUIRK_SINGLE_VECTOR | + NVME_QUIRK_SHARED_TAGS }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), .driver_data = NVME_QUIRK_SINGLE_VECTOR | diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 2247709ef6d696..0b9e77af5aac39 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -54,6 +54,19 @@ config PCIE_APPLE If unsure, say Y if you have an Apple Silicon system. +config PCIE_APPLE_H9P + tristate "Apple H9P/T8010 PCIe controller" + depends on ARCH_APPLE || COMPILE_TEST + depends on OF + depends on PCI_MSI + select PCI_HOST_COMMON + help + Say Y here to enable the PCIe root complex found in Apple A10 + (T8010/H9P) devices. This controller is used for the internal + Apple NVMe storage path on devices such as iPad7,12. + It provides the legacy PHY/link setup and NVMMU/SART mapping + hooks needed before the standard PCI and NVMe layers can bind. + config PCI_VERSATILE bool "ARM Versatile PB PCI controller" depends on ARCH_VERSATILE || COMPILE_TEST diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index ac8db283f0fea7..a4b489bd4f473f 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o obj-$(CONFIG_PCIE_HISI_ERR) += pcie-hisi-error.o obj-$(CONFIG_PCIE_APPLE) += pcie-apple.o +obj-$(CONFIG_PCIE_APPLE_H9P) += pcie-apple-h9p.o obj-$(CONFIG_PCIE_MT7621) += pcie-mt7621.o obj-$(CONFIG_PCIE_ASPEED) += pcie-aspeed.o diff --git a/drivers/pci/controller/pcie-apple-h9p.c b/drivers/pci/controller/pcie-apple-h9p.c new file mode 100644 index 00000000000000..5ea301e2e2a7f9 --- /dev/null +++ b/drivers/pci/controller/pcie-apple-h9p.c @@ -0,0 +1,1368 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe host bridge driver for Apple H9P/T8010 SoCs. + * + * The controller exposes an ECAM-compatible root complex after the SoC-specific + * power, clock and PHY sequence has brought a port out of reset. The hardware + * differs enough from the Apple Silicon PCIe controller to keep the early H9P + * bring-up sequence separate, while still using the generic PCI host bridge + * and MSI subsystems. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "pci-host-common.h" + +#define H9P_NUM_PORTS 4 +#define H9P_NUM_MSI 32 +#define H9P_MSI_PER_PORT (H9P_NUM_MSI / H9P_NUM_PORTS) + +#define H9P_CFG_PORT_STRIDE 0x8000 + +#define H9P_PHY0_PORTSTAT(port) (0x0100 + (port) * 0x0080) +#define H9P_PHY1_PORTMASK 0x000c + +#define H9P_PORT_LTSSMCTL 0x0080 +#define H9P_PORT_IRQSTAT 0x0100 +#define H9P_PORT_IRQMASK 0x0104 +#define H9P_PORT_MSIVECBASE 0x0128 +#define H9P_PORT_ENABLE 0x0140 +#define H9P_PORT_LINKSTS 0x0208 + +#define H9P_LINK_SPEED_2_5GT 1 +#define H9P_LINK_SPEED_8GT 3 + +#define H9P_PCIECLK_POSTUP0 0x0000 +#define H9P_PCIECLK_POSTUP1 0x000c +#define H9P_PCIECLK_POSTUP2 0x4104 +#define H9P_PCIECLK_POSTUP3 0x4100 + +#define H9P_NVMMU_TCB_CTRL 0x0004 +#define H9P_NVMMU_TCB_BASE_LO 0x0008 +#define H9P_NVMMU_TCB_BASE_HI 0x000c +#define H9P_NVMMU_TCB_TABLE_LO 0x0010 +#define H9P_NVMMU_TCB_TABLE_HI 0x0014 +#define H9P_NVMMU_SART_CTRL 0x0020 +#define H9P_NVMMU_SART_VA_BASE 0x0024 +#define H9P_NVMMU_SART_VA_END 0x0028 +#define H9P_NVMMU_SART_PA_BASE 0x002c + +#define H9P_NVMMU_TCB_BYTES 0x80 +#define H9P_NVMMU_TCB_DWORDS (H9P_NVMMU_TCB_BYTES / sizeof(u32)) +#define H9P_NVMMU_SGL_WORDS APPLE_H9P_NVMMU_MAX_PAGES +#define H9P_NVMMU_FLATDMA_BASE 0x40000000ULL +#define H9P_NVMMU_FLATDMA_STRIDE SZ_8M +#define H9P_NVMMU_SART_ALIGNMENT SZ_1M +#define H9P_NVMMU_TCB_READ 0x100 +#define H9P_NVMMU_TCB_WRITE 0x200 + +#define H9P_DEFAULT_MSI_DOORBELL 0xbffff000ULL + +#define APPLE_PMGR_AUTO_ENABLE BIT(28) +#define APPLE_PMGR_WAS_CLKGATED BIT(9) +#define APPLE_PMGR_WAS_PWRGATED BIT(8) +#define APPLE_PMGR_PS_ACTUAL GENMASK(7, 4) +#define APPLE_PMGR_PS_TARGET GENMASK(3, 0) +#define APPLE_PMGR_FLAGS (APPLE_PMGR_WAS_CLKGATED | \ + APPLE_PMGR_WAS_PWRGATED) +#define APPLE_PMGR_PS_ACTIVE 0xf +#define APPLE_PMGR_PS_SET_TIMEOUT_US 10000 + +struct apple_h9p_tunable { + u32 offset; + u32 size; + u64 mask; + u64 data; +}; + +static const struct apple_h9p_tunable h9p_phy0_tunables[] = { + { 0x0008, 4, 0x7f7f7f7f, 0x00000000 }, + { 0x000c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0010, 4, 0x00000700, 0x00000000 }, + { 0x0018, 4, 0x00ffffff, 0x000c0960 }, + { 0x001c, 4, 0x00001fff, 0x0000092c }, + { 0x002c, 4, 0x000000ff, 0x00000009 }, + { 0x003c, 4, 0x80000000, 0x00000000 }, + { 0x0100, 4, 0x31100010, 0x01000000 }, + { 0x0108, 4, 0x00000707, 0x00000000 }, + { 0x010c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0110, 4, 0x00000011, 0x00000001 }, + { 0x0114, 4, 0x00000007, 0x00000000 }, + { 0x0118, 4, 0x00073f3f, 0x00043f00 }, + { 0x0120, 4, 0x0333003f, 0x0111000f }, + { 0x0130, 4, 0x000000ff, 0x0000000f }, + { 0x0138, 4, 0x0000007f, 0x0000003e }, + { 0x0180, 4, 0x31100010, 0x01000000 }, + { 0x0188, 4, 0x00000707, 0x00000000 }, + { 0x018c, 4, 0x00073f3f, 0x00043f00 }, + { 0x01a0, 4, 0x0333003f, 0x0111000f }, + { 0x01b0, 4, 0x000000ff, 0x0000000f }, + { 0x01b8, 4, 0x0000007f, 0x0000003e }, + { 0x0200, 4, 0x31100010, 0x01000000 }, + { 0x0208, 4, 0x00000707, 0x00000000 }, + { 0x020c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0220, 4, 0x0333003f, 0x0111000f }, + { 0x0230, 4, 0x000000ff, 0x0000000f }, + { 0x0238, 4, 0x0000007f, 0x0000003e }, + { 0x0280, 4, 0x31100010, 0x01000000 }, + { 0x0288, 4, 0x00000707, 0x00000000 }, + { 0x028c, 4, 0x00073f3f, 0x00043f00 }, + { 0x02a0, 4, 0x0333003f, 0x0111000f }, + { 0x02b0, 4, 0x000000ff, 0x0000000f }, + { 0x02b8, 4, 0x0000007f, 0x0000003e }, + { 0x0100, 4, 0x00000010, 0x00000010 }, + { 0x0180, 4, 0x00000010, 0x00000000 }, + { 0x0200, 4, 0x00000010, 0x00000000 }, + { 0x0280, 4, 0x00000010, 0x00000000 }, +}; + +static const struct apple_h9p_tunable h9p_config_tunables[] = { + { 0x0098, 4, 0x0000000f, 0x00000000 }, + { 0x0164, 4, 0x00f8ff00, 0x00000000 }, + { 0x08e0, 4, 0x00000005, 0x00000005 }, +}; + +static const struct apple_h9p_tunable h9p_port_tunables[] = { + { 0x0090, 4, 0x000000ff, 0x00000028 }, + { 0x0130, 4, 0x0000000d, 0x00000005 }, + { 0x0134, 4, 0x00000001, 0x00000001 }, + { 0x0138, 4, 0x00007f7f, 0x00000000 }, + { 0x013c, 4, 0x00000002, 0x00000002 }, + { 0x0140, 4, 0x0073ffff, 0x00704c4b }, +}; + +struct apple_h9p_pcie { + struct device *dev; + struct platform_device *pdev; + struct pci_host_bridge *bridge; + struct pci_config_window *cfgwin; + + void __iomem *base_config; + void __iomem *base_phy[3]; + void __iomem *base_port[H9P_NUM_PORTS]; + void __iomem *base_pcieclk_postup; + + struct clk_bulk_data clks[3]; + struct gpio_desc *perst[H9P_NUM_PORTS]; + struct gpio_desc *clkreq[H9P_NUM_PORTS]; + struct gpio_descs *devpwr; + struct pinctrl *pinctrl; + u32 enabled_ports; + + struct apple_h9p_nvmmu { + struct apple_h9p_pcie *pcie; + void __iomem *base; + u64 pa_base; + u32 va_base; + u32 size; + void *tcb; + void *tcb_table; + void *tcb_sgl; + size_t tcb_size; + size_t tcb_table_size; + size_t tcb_sgl_size; + dma_addr_t tcb_dma; + dma_addr_t tcb_table_dma; + dma_addr_t tcb_sgl_dma; + } nvmmu[H9P_NUM_PORTS]; + + struct device **pd_dev; + struct device_link **pd_link; + int pd_count; + + DECLARE_BITMAP(used_msi[H9P_NUM_PORTS], H9P_MSI_PER_PORT); + u64 msi_doorbell; + /* Protects the per-port MSI allocation bitmaps. */ + spinlock_t used_msi_lock; + struct irq_domain *irq_dom; + struct irq_domain *msi_dom; + + struct apple_h9p_msi { + struct apple_h9p_pcie *pcie; + int virq; + bool disabled; + } msi[H9P_NUM_MSI]; +}; + +static inline void h9p_rmw(void __iomem *addr, u32 clear, u32 set) +{ + writel((readl(addr) & ~clear) | set, addr); +} + +static inline void h9p_rmww(void __iomem *addr, u16 clear, u16 set) +{ + writew((readw(addr) & ~clear) | set, addr); +} + +static inline u64 h9p_readsz(void __iomem *addr, u32 size) +{ + switch (size) { + case 1: + return readb(addr); + case 2: + return readw(addr); + case 4: + return readl(addr); + case 8: + return readq(addr); + default: + return 0; + } +} + +static inline void h9p_writesz(u64 value, void __iomem *addr, u32 size) +{ + switch (size) { + case 1: + writeb(value, addr); + break; + case 2: + writew(value, addr); + break; + case 4: + writel(value, addr); + break; + case 8: + writeq(value, addr); + break; + } +} + +static inline void h9p_writel_flush(u32 value, void __iomem *addr) +{ + writel(value, addr); + readl(addr); +} + +static void apple_h9p_pcie_detach_genpd(struct apple_h9p_pcie *pcie) +{ + int i; + + for (i = pcie->pd_count - 1; i >= 0; i--) { + if (pcie->pd_link[i]) + device_link_del(pcie->pd_link[i]); + if (!IS_ERR_OR_NULL(pcie->pd_dev[i])) + dev_pm_domain_detach(pcie->pd_dev[i], true); + } +} + +static int apple_h9p_pcie_attach_genpd(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + int i; + + pcie->pd_count = of_count_phandle_with_args(dev->of_node, + "power-domains", + "#power-domain-cells"); + if (pcie->pd_count <= 1) + return 0; + + pcie->pd_dev = devm_kcalloc(dev, pcie->pd_count, + sizeof(*pcie->pd_dev), GFP_KERNEL); + if (!pcie->pd_dev) + return -ENOMEM; + + pcie->pd_link = devm_kcalloc(dev, pcie->pd_count, + sizeof(*pcie->pd_link), GFP_KERNEL); + if (!pcie->pd_link) + return -ENOMEM; + + for (i = 0; i < pcie->pd_count; i++) { + pcie->pd_dev[i] = dev_pm_domain_attach_by_id(dev, i); + if (IS_ERR(pcie->pd_dev[i])) { + apple_h9p_pcie_detach_genpd(pcie); + return PTR_ERR(pcie->pd_dev[i]); + } + + pcie->pd_link[i] = device_link_add(dev, pcie->pd_dev[i], + DL_FLAG_STATELESS | + DL_FLAG_PM_RUNTIME | + DL_FLAG_RPM_ACTIVE); + if (!pcie->pd_link[i]) { + apple_h9p_pcie_detach_genpd(pcie); + return -EINVAL; + } + } + + return 0; +} + +static void apple_h9p_pcie_genpd_cleanup(void *data) +{ + apple_h9p_pcie_detach_genpd(data); +} + +static void apple_h9p_pcie_clk_cleanup(void *data) +{ + struct apple_h9p_pcie *pcie = data; + + clk_bulk_disable_unprepare(ARRAY_SIZE(pcie->clks), pcie->clks); +} + +static struct apple_h9p_pcie *apple_h9p_pcie_lookup(struct device *dev) +{ + struct pci_host_bridge *bridge = dev_get_drvdata(dev); + + return bridge ? pci_host_bridge_priv(bridge) : NULL; +} + +static int apple_h9p_pcie_force_power_domain(struct apple_h9p_pcie *pcie, + struct device_node *pd_np) +{ + struct device *dev = pcie->dev; + struct device_node *pmgr_np; + struct regmap *regmap; + u32 offset; + u32 val; + int ret; + + ret = of_property_read_u32_index(pd_np, "reg", 0, &offset); + if (ret) + return dev_err_probe(dev, ret, "%pOF missing PMGR reg\n", + pd_np); + + pmgr_np = of_get_parent(pd_np); + if (!pmgr_np) + return dev_err_probe(dev, -EINVAL, + "%pOF has no PMGR parent\n", pd_np); + + regmap = syscon_node_to_regmap(pmgr_np); + of_node_put(pmgr_np); + if (IS_ERR(regmap)) + return dev_err_probe(dev, PTR_ERR(regmap), + "%pOF missing PMGR regmap\n", pd_np); + + ret = regmap_read(regmap, offset, &val); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR read failed\n", pd_np); + + val &= ~(APPLE_PMGR_AUTO_ENABLE | APPLE_PMGR_FLAGS | + APPLE_PMGR_PS_TARGET); + val |= FIELD_PREP(APPLE_PMGR_PS_TARGET, APPLE_PMGR_PS_ACTIVE); + + ret = regmap_write(regmap, offset, val); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR write failed\n", pd_np); + + ret = regmap_read_poll_timeout_atomic(regmap, offset, val, + FIELD_GET(APPLE_PMGR_PS_ACTUAL, + val) == + APPLE_PMGR_PS_ACTIVE, 1, + APPLE_PMGR_PS_SET_TIMEOUT_US); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR active timeout\n", pd_np); + + val &= ~APPLE_PMGR_FLAGS; + val |= APPLE_PMGR_AUTO_ENABLE; + + ret = regmap_write(regmap, offset, val); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR auto-enable failed\n", + pd_np); + + return 0; +} + +static int apple_h9p_pcie_force_power_domains(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + struct device_node *pd_np; + int count; + int i; + int ret; + + count = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (count <= 0) + return 0; + + for (i = 0; i < count; i++) { + pd_np = of_parse_phandle(dev->of_node, "power-domains", i); + if (!pd_np) + return dev_err_probe(dev, -EINVAL, + "missing power-domain %d\n", i); + + ret = apple_h9p_pcie_force_power_domain(pcie, pd_np); + of_node_put(pd_np); + if (ret) + return ret; + } + + return 0; +} + +static int apple_h9p_pcie_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + struct pci_config_window *cfg = bus->sysdata; + + if (bus->number == cfg->busr.start && PCI_SLOT(devfn) >= H9P_NUM_PORTS) + return PCIBIOS_DEVICE_NOT_FOUND; + + return pci_generic_config_read(bus, devfn, where, size, val); +} + +static int apple_h9p_pcie_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct pci_config_window *cfg = bus->sysdata; + + if (bus->number == cfg->busr.start && PCI_SLOT(devfn) >= H9P_NUM_PORTS) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (where <= PCI_INTERRUPT_LINE && where + size > PCI_INTERRUPT_LINE) + val |= 0xffu << ((PCI_INTERRUPT_LINE - where) << 3); + + return pci_generic_config_write(bus, devfn, where, size, val); +} + +static unsigned int apple_h9p_pcie_bus_to_port(struct apple_h9p_pcie *pcie, + unsigned int bus) +{ + unsigned int port; + + for (port = 0; port < H9P_NUM_PORTS; port++) { + u32 cfg, sec, sub; + + cfg = readl(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + PCI_PRIMARY_BUS); + sec = (cfg >> 8) & 0xff; + sub = (cfg >> 16) & 0xff; + + if (!sec || !sub || sec == 0xff || sub == 0xff) + continue; + if (bus >= sec && bus <= sub) + return port; + } + + return H9P_NUM_PORTS; +} + +static int apple_h9p_pcie_device_port(struct apple_h9p_pcie *pcie, + struct device *dev) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) + return -ENODEV; + + pdev = to_pci_dev(dev); + if (!pdev->bus) + return -ENODEV; + + return apple_h9p_pcie_bus_to_port(pcie, pdev->bus->number); +} + +static void apple_h9p_msi_compose_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie) { + memset(msg, 0, sizeof(*msg)); + return; + } + + msg->address_lo = lower_32_bits(pcie->msi_doorbell); + msg->address_hi = upper_32_bits(pcie->msi_doorbell); + msg->data = d->hwirq; +} + +static void apple_h9p_msi_write_msg(struct irq_data *d, struct msi_msg *msg) +{ + pci_write_msi_msg(d->irq, msg); +} + +static int apple_h9p_msi_set_affinity(struct irq_data *d, + const struct cpumask *mask, bool force) +{ + return -EINVAL; +} + +static void apple_h9p_msi_mask(struct irq_data *d) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie || d->hwirq >= H9P_NUM_MSI || pcie->msi[d->hwirq].virq <= 0) + return; + + if (!pcie->msi[d->hwirq].disabled) { + disable_irq_nosync(pcie->msi[d->hwirq].virq); + pcie->msi[d->hwirq].disabled = true; + } +} + +static void apple_h9p_msi_unmask(struct irq_data *d) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie || d->hwirq >= H9P_NUM_MSI || pcie->msi[d->hwirq].virq <= 0) + return; + + if (pcie->msi[d->hwirq].disabled) { + enable_irq(pcie->msi[d->hwirq].virq); + pcie->msi[d->hwirq].disabled = false; + } +} + +static void apple_h9p_msi_ack(struct irq_data *d) +{ +} + +static struct irq_chip apple_h9p_msi_chip = { + .name = "Apple H9P PCIe MSI", + .irq_ack = apple_h9p_msi_ack, + .irq_mask = apple_h9p_msi_mask, + .irq_unmask = apple_h9p_msi_unmask, + .irq_compose_msi_msg = apple_h9p_msi_compose_msg, + .irq_write_msi_msg = apple_h9p_msi_write_msg, + .irq_set_affinity = apple_h9p_msi_set_affinity, +}; + +static void apple_h9p_msi_isr(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct apple_h9p_msi *msi = irq_desc_get_handler_data(desc); + struct apple_h9p_pcie *pcie = msi->pcie; + unsigned int idx = msi - pcie->msi; + unsigned int virq; + + chained_irq_enter(chip, desc); + virq = irq_find_mapping(pcie->irq_dom, idx); + if (virq) + generic_handle_irq(virq); + chained_irq_exit(chip, desc); +} + +static int apple_h9p_msi_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct apple_h9p_pcie *pcie = domain->host_data; + msi_alloc_info_t *info = args; + struct msi_desc *desc = info ? info->desc : NULL; + struct pci_dev *pdev = NULL; + unsigned long flags; + unsigned int bus = 0; + unsigned int port; + int slot; + + if (nr_irqs != 1) + return -ENOSPC; + + if (desc && desc->dev && dev_is_pci(desc->dev)) { + pdev = to_pci_dev(desc->dev); + if (pdev->bus) + bus = pdev->bus->number; + } + + if (bus < 1) + return -ENOSPC; + + port = apple_h9p_pcie_bus_to_port(pcie, bus); + if (port >= H9P_NUM_PORTS) + return -ENOSPC; + if (!(pcie->enabled_ports & BIT(port))) + return -ENOSPC; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + slot = find_first_zero_bit(pcie->used_msi[port], H9P_MSI_PER_PORT); + if (slot >= H9P_MSI_PER_PORT) { + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + return -ENOSPC; + } + __set_bit(slot, pcie->used_msi[port]); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + + irq_domain_set_info(domain, virq, port * H9P_MSI_PER_PORT + slot, + &apple_h9p_msi_chip, pcie, handle_edge_irq, + NULL, NULL); + return 0; +} + +static void apple_h9p_msi_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct apple_h9p_pcie *pcie = d ? irq_data_get_irq_chip_data(d) : NULL; + unsigned long flags; + unsigned int i; + + if (!pcie || !d) + return; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + for (i = 0; i < nr_irqs; i++) { + unsigned long hwirq = d->hwirq + i; + unsigned int port = hwirq / H9P_MSI_PER_PORT; + unsigned int slot = hwirq % H9P_MSI_PER_PORT; + + if (port < H9P_NUM_PORTS) + __clear_bit(slot, pcie->used_msi[port]); + } + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); +} + +static const struct irq_domain_ops apple_h9p_msi_domain_ops = { + .alloc = apple_h9p_msi_alloc, + .free = apple_h9p_msi_free, +}; + +static struct irq_chip apple_h9p_msi_parent_chip = { + .name = "Apple H9P PCIe MSI parent", + .irq_ack = irq_chip_ack_parent, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_write_msi_msg = apple_h9p_msi_write_msg, +}; + +static struct msi_domain_info apple_h9p_msi_domain_info = { + .flags = MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX | + MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSI_MASK_PARENT, + .chip = &apple_h9p_msi_parent_chip, +}; + +static void apple_h9p_pcie_msi_cleanup(void *data) +{ + struct apple_h9p_pcie *pcie = data; + unsigned int i; + + for (i = 0; i < H9P_NUM_MSI; i++) { + if (pcie->msi[i].virq <= 0) + continue; + + irq_set_chained_handler_and_data(pcie->msi[i].virq, NULL, + NULL); + if (pcie->msi[i].disabled) { + enable_irq(pcie->msi[i].virq); + pcie->msi[i].disabled = false; + } + } + + if (pcie->msi_dom) { + irq_domain_remove(pcie->msi_dom); + pcie->msi_dom = NULL; + } + + if (pcie->irq_dom) { + irq_domain_remove(pcie->irq_dom); + pcie->irq_dom = NULL; + } +} + +static int apple_h9p_pcie_setup_msi(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + struct fwnode_handle *fwnode = dev_fwnode(dev); + unsigned int i; + int ret; + + pcie->irq_dom = irq_domain_create_linear(fwnode, H9P_NUM_MSI, + &apple_h9p_msi_domain_ops, + pcie); + if (!pcie->irq_dom) + return -ENOMEM; + + pcie->msi_dom = msi_create_irq_domain(fwnode, + &apple_h9p_msi_domain_info, + pcie->irq_dom); + if (!pcie->msi_dom) { + irq_domain_remove(pcie->irq_dom); + pcie->irq_dom = NULL; + return -ENOMEM; + } + + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_msi_cleanup, + pcie); + if (ret) + return ret; + + for (i = 0; i < H9P_NUM_MSI; i++) { + int irq = platform_get_irq(pcie->pdev, H9P_NUM_PORTS + i); + + if (irq < 0) + return irq; + + pcie->msi[i].pcie = pcie; + pcie->msi[i].virq = irq; + irq_set_chained_handler_and_data(irq, apple_h9p_msi_isr, + &pcie->msi[i]); + disable_irq(irq); + pcie->msi[i].disabled = true; + } + + return 0; +} + +static u64 apple_h9p_read_pci_cap(struct apple_h9p_pcie *pcie, + unsigned int busdevfn, u32 type) +{ + void __iomem *cfg = pcie->base_config + (busdevfn << 12); + u32 ptr = readl(cfg + PCI_CAPABILITY_LIST) & 0xff; + + while (ptr) { + u32 next = readl(cfg + ptr); + + if ((next & 0xff) == type) + return ptr; + ptr = (next >> 8) & 0xff; + } + + return 0; +} + +static int apple_h9p_wait(void __iomem *addr, u32 mask, u32 min, u32 max, + unsigned long timeout_us) +{ + u32 val; + + return readl_poll_timeout(addr, val, (val & mask) >= min && + (val & mask) <= max, 1000, timeout_us); +} + +static int apple_h9p_wait_gpio(struct gpio_desc *desc, int value, + unsigned long timeout_us) +{ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); + + do { + if (gpiod_get_raw_value(desc) == value) + return 0; + usleep_range(1000, 2000); + } while (ktime_before(ktime_get(), timeout)); + + return -ETIMEDOUT; +} + +static irqreturn_t apple_h9p_nvmmu_irq(int irq, void *data) +{ + struct apple_h9p_nvmmu *nvmmu = data; + struct apple_h9p_pcie *pcie = nvmmu->pcie; + unsigned int port = nvmmu - pcie->nvmmu; + + dev_err_ratelimited(pcie->dev, "port %u NVMMU fault interrupt\n", port); + return IRQ_HANDLED; +} + +static int apple_h9p_setup_nvmmu_port(struct apple_h9p_pcie *pcie, + unsigned int port) +{ + struct apple_h9p_nvmmu *nvmmu = &pcie->nvmmu[port]; + struct device *dev = pcie->dev; + struct device_node *mem_np; + struct resource res; + u32 iova; + int irq; + int ret; + + if (!nvmmu->base) + return 0; + + mem_np = of_parse_phandle(dev->of_node, "memory-region", port); + if (!mem_np) + return dev_err_probe(dev, -EINVAL, + "port %u NVMMU missing memory-region\n", + port); + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_node; + + ret = of_property_read_u32(dev->of_node, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_node; + + if (resource_size(&res) < H9P_NVMMU_SART_ALIGNMENT || + !IS_ALIGNED(res.start, H9P_NVMMU_SART_ALIGNMENT) || + !IS_ALIGNED(iova, H9P_NVMMU_SART_ALIGNMENT)) { + ret = -EINVAL; + goto out_put_node; + } + + nvmmu->pcie = pcie; + nvmmu->pa_base = res.start; + nvmmu->va_base = iova; + nvmmu->size = resource_size(&res); + nvmmu->tcb_size = round_up(APPLE_H9P_NVMMU_MAX_REQS * + H9P_NVMMU_TCB_BYTES, PAGE_SIZE); + nvmmu->tcb_table_size = PAGE_SIZE * 16; + nvmmu->tcb_sgl_size = round_up(APPLE_H9P_NVMMU_MAX_REQS * + H9P_NVMMU_SGL_WORDS * sizeof(u32), + PAGE_SIZE); + + nvmmu->tcb = dmam_alloc_attrs(dev, nvmmu->tcb_size, &nvmmu->tcb_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb) { + ret = -ENOMEM; + goto out_put_node; + } + + nvmmu->tcb_table = dmam_alloc_attrs(dev, nvmmu->tcb_table_size, + &nvmmu->tcb_table_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb_table) { + ret = -ENOMEM; + goto out_put_node; + } + + nvmmu->tcb_sgl = dmam_alloc_attrs(dev, nvmmu->tcb_sgl_size, + &nvmmu->tcb_sgl_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb_sgl) { + ret = -ENOMEM; + goto out_put_node; + } + + h9p_writel_flush(lower_32_bits(nvmmu->tcb_dma), + nvmmu->base + H9P_NVMMU_TCB_BASE_LO); + h9p_writel_flush(upper_32_bits(nvmmu->tcb_dma), + nvmmu->base + H9P_NVMMU_TCB_BASE_HI); + h9p_writel_flush(lower_32_bits(nvmmu->tcb_table_dma), + nvmmu->base + H9P_NVMMU_TCB_TABLE_LO); + h9p_writel_flush(upper_32_bits(nvmmu->tcb_table_dma), + nvmmu->base + H9P_NVMMU_TCB_TABLE_HI); + h9p_writel_flush(0x10000, nvmmu->base + H9P_NVMMU_TCB_CTRL); + + ret = apple_h9p_wait(nvmmu->base + H9P_NVMMU_TCB_CTRL, 0x10, 0, 0, + 250000); + if (ret) + goto out_put_node; + + h9p_writel_flush(nvmmu->va_base - 0x80000000U, + nvmmu->base + H9P_NVMMU_SART_VA_BASE); + h9p_writel_flush(round_up(nvmmu->va_base + nvmmu->size, + H9P_NVMMU_SART_ALIGNMENT) - 0x80100000U, + nvmmu->base + H9P_NVMMU_SART_VA_END); + h9p_writel_flush(nvmmu->pa_base >> 20, + nvmmu->base + H9P_NVMMU_SART_PA_BASE); + h9p_writel_flush(1, nvmmu->base + H9P_NVMMU_SART_CTRL); + + irq = platform_get_irq_optional(pcie->pdev, H9P_NUM_PORTS + + H9P_NUM_MSI + port); + if (irq > 0) { + ret = devm_request_irq(dev, irq, apple_h9p_nvmmu_irq, 0, + dev_name(dev), nvmmu); + if (ret) + goto out_put_node; + } else if (irq != -ENXIO) { + ret = irq; + goto out_put_node; + } + + dev_dbg(dev, "port %u NVMMU window %#x@%pa size %#x\n", port, + nvmmu->va_base, &res.start, nvmmu->size); + +out_put_node: + of_node_put(mem_np); + return ret; +} + +static int apple_h9p_setup_nvmmu(struct apple_h9p_pcie *pcie) +{ + unsigned int port; + int ret; + + for (port = 0; port < H9P_NUM_PORTS; port++) { + if (!(pcie->enabled_ports & BIT(port))) + continue; + + ret = apple_h9p_setup_nvmmu_port(pcie, port); + if (ret) + return dev_err_probe(pcie->dev, ret, + "port %u NVMMU setup failed\n", + port); + } + + return 0; +} + +int apple_h9p_pcie_map_nvmmu(struct device *dev, unsigned int tag, + const u64 *pages, unsigned int npages, + dma_addr_t *iova) +{ + struct apple_h9p_nvmmu *nvmmu; + struct apple_h9p_pcie *pcie; + struct device *host_dev = dev; + unsigned int port; + unsigned int i; + u64 sgl_dma; + u32 *tcb; + u32 *sgl; + int ret; + + if (tag >= APPLE_H9P_NVMMU_MAX_REQS || + npages > APPLE_H9P_NVMMU_MAX_PAGES) + return -EINVAL; + if (npages && !pages) + return -EINVAL; + + while (host_dev && host_dev->bus == dev->bus) + host_dev = host_dev->parent; + if (!host_dev || !host_dev->parent) + return -ENODEV; + + pcie = apple_h9p_pcie_lookup(host_dev->parent); + if (!pcie) + return -ENODEV; + + ret = apple_h9p_pcie_device_port(pcie, dev); + if (ret < 0) + return ret; + port = ret; + if (port >= H9P_NUM_PORTS || !(pcie->enabled_ports & BIT(port))) + return -ENODEV; + + nvmmu = &pcie->nvmmu[port]; + if (!nvmmu->base || !nvmmu->tcb || !nvmmu->tcb_sgl) + return -EOPNOTSUPP; + + tcb = (u32 *)nvmmu->tcb + tag * H9P_NVMMU_TCB_DWORDS; + sgl = (u32 *)nvmmu->tcb_sgl + tag * H9P_NVMMU_SGL_WORDS; + memset(tcb, 0, H9P_NVMMU_TCB_BYTES); + memset(sgl, 0, H9P_NVMMU_SGL_WORDS * sizeof(*sgl)); + + if (npages) { + tcb[0] = H9P_NVMMU_TCB_READ | H9P_NVMMU_TCB_WRITE; + tcb[1] = npages; + tcb[2] = pages[0] >> ilog2(APPLE_H9P_NVMMU_PAGE_SIZE); + for (i = 0; i < npages; i++) + sgl[i] = pages[i] >> ilog2(APPLE_H9P_NVMMU_PAGE_SIZE); + + sgl_dma = nvmmu->tcb_sgl_dma + + tag * H9P_NVMMU_SGL_WORDS * sizeof(*sgl); + memcpy(&tcb[4], &sgl_dma, sizeof(sgl_dma)); + if (iova) + *iova = H9P_NVMMU_FLATDMA_BASE + + tag * H9P_NVMMU_FLATDMA_STRIDE; + } else { + dma_wmb(); + h9p_writel_flush(tag, nvmmu->base + H9P_NVMMU_TCB_CTRL); + if (iova) + *iova = 0; + return 0; + } + + dma_wmb(); + return 0; +} +EXPORT_SYMBOL_GPL(apple_h9p_pcie_map_nvmmu); + +static void apple_h9p_apply_tunables(void __iomem *base, + const struct apple_h9p_tunable *tunables, + unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + u64 val = h9p_readsz(base + tunables[i].offset, tunables[i].size); + + if ((val & tunables[i].mask) == tunables[i].data) + continue; + val &= ~tunables[i].mask; + val |= tunables[i].data; + h9p_writesz(val, base + tunables[i].offset, tunables[i].size); + } +} + +static int apple_h9p_pcieclk_postup(struct apple_h9p_pcie *pcie) +{ + if (!pcie->base_pcieclk_postup) + return 0; + + writel(0x00000007, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP0); + writel(0x80010005, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP1); + writel(0x00000003, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP2); + writel(0x00000003, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP3); + + return 0; +} + +static bool apple_h9p_link_up(struct apple_h9p_pcie *pcie, unsigned int port) +{ + u32 linksts = readl(pcie->base_port[port] + H9P_PORT_LINKSTS); + + linksts = (linksts >> 8) & 0x3f; + return linksts >= 0x11 && linksts <= 0x14; +} + +static int apple_h9p_setup_port(struct apple_h9p_pcie *pcie, unsigned int port) +{ + struct device *dev = pcie->dev; + u64 cap; + int ret; + + if (apple_h9p_link_up(pcie, port)) + return 0; + + gpiod_direction_output(pcie->perst[port], 0); + + h9p_rmw(pcie->base_phy[0] + 0x134 + 0x80 * port, 1, 0); + h9p_rmw(pcie->base_phy[0] + 0x124 + 0x80 * port, 0, 1); + + ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 0x10, 0x10, 0x10, + 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u init timeout\n", port); + + usleep_range(250, 1000); + h9p_rmw(pcie->base_phy[0] + 0x100 + 0x80 * port, 0, 1); + h9p_rmw(pcie->base_phy[0] + 0x100 + 0x80 * port, 0x100, 0); + usleep_range(500, 1000); + h9p_rmw(pcie->base_phy[0] + 0x134 + 0x80 * port, 0, 1); + + writel(port ? 0 : H9P_LINK_SPEED_8GT, + pcie->base_phy[0] + 0x4020 + 0x40 * port); + h9p_rmw(pcie->base_phy[0] + 0x124 + 0x80 * port, 0x100, 0); + + cap = apple_h9p_read_pci_cap(pcie, port << 3, PCI_CAP_ID_EXP); + if (cap) + h9p_rmww(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + cap + PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS, + port ? H9P_LINK_SPEED_2_5GT : H9P_LINK_SPEED_8GT); + + apple_h9p_apply_tunables(pcie->base_config + port * H9P_CFG_PORT_STRIDE, + h9p_config_tunables, + ARRAY_SIZE(h9p_config_tunables)); + apple_h9p_apply_tunables(pcie->base_port[port], h9p_port_tunables, + ARRAY_SIZE(h9p_port_tunables)); + + h9p_rmw(pcie->base_config + port * H9P_CFG_PORT_STRIDE + 0x8e0, + 0, 1); + + writel(0xff002fff, pcie->base_port[port] + H9P_PORT_IRQMASK); + writel(0x00ffd000, pcie->base_port[port] + H9P_PORT_IRQSTAT); + + h9p_rmw(pcie->base_port[port] + H9P_PORT_ENABLE, 0, 0x80000000); + writel(0x31, pcie->base_port[port] + 0x124); + writel(port * 0x10001 * H9P_MSI_PER_PORT, + pcie->base_port[port] + H9P_PORT_MSIVECBASE); + + usleep_range(250, 1000); + ret = apple_h9p_wait_gpio(pcie->clkreq[port], 0, 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u CLKREQ# timeout\n", + port); + + gpiod_direction_output(pcie->perst[port], 1); + usleep_range(250, 1000); + + ret = apple_h9p_wait(pcie->base_phy[1] + H9P_PHY1_PORTMASK, + BIT(port), BIT(port), BIT(port), 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u PHY up timeout\n", + port); + + h9p_rmw(pcie->base_phy[2] + 0x180, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + 0x184, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + 0x90, 0xfff, 100); + h9p_rmw(pcie->base_phy[2] + 0x98, 0xfff, 25); + h9p_rmw(pcie->base_phy[2] + 0x10088 + 0x800 * port, 0, 0x4000); + writel(0, pcie->base_phy[2] + 0x10784 + 0x800 * port); + h9p_rmw(pcie->base_phy[2] + 0x10004 + 0x800 * port, 0xfff, 0x600); + writel(0x3105, pcie->base_phy[2] + 0x20788 + 0x800 * port); + h9p_rmw(pcie->base_phy[2] + 0x207a0 + 0x800 * port, 0xff, 0x9f); + h9p_rmw(pcie->base_phy[2] + 0x207a8 + 0x800 * port, 0xff, 0x01); + h9p_rmw(pcie->base_phy[2] + 0x20400 + 0x800 * port, 0x1f, 0x0a); + writel(175, pcie->base_phy[2] + 0x2009c + 0x800 * port); + writel(175, pcie->base_phy[2] + 0x200dc + 0x800 * port); + writel(333, pcie->base_phy[2] + 0x200a0 + 0x800 * port); + writel(333, pcie->base_phy[2] + 0x200e0 + 0x800 * port); + writel(530, pcie->base_phy[2] + 0x200a4 + 0x800 * port); + writel(530, pcie->base_phy[2] + 0x200e4 + 0x800 * port); + writel(0, pcie->base_phy[2] + 0x20330 + 0x800 * port); + writel(0, pcie->base_phy[2] + 0x20340 + 0x800 * port); + writel(0, pcie->base_phy[2] + 0x20350 + 0x800 * port); + + writel(0xff002f0f, pcie->base_port[port] + H9P_PORT_IRQMASK); + usleep_range(5000, 10000); + + h9p_rmw(pcie->base_port[port] + H9P_PORT_LTSSMCTL, 0, 1); + ret = apple_h9p_wait(pcie->base_port[port] + H9P_PORT_LINKSTS, + 0x3f00, 0x1100, 0x1400, 500000); + if (ret) + dev_warn(dev, "port %u link did not reach L0\n", port); + + return 0; +} + +static int apple_h9p_setup_ports(struct apple_h9p_pcie *pcie) +{ + unsigned int port; + int ret; + + writel(0x10, pcie->base_phy[0] + 0x0004); + h9p_rmw(pcie->base_phy[0] + 0x124, 0, 1); + + ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 0x10, 0x10, 0x10, + 250000); + if (ret) + return dev_err_probe(pcie->dev, ret, + "global PHY init timeout\n"); + + ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 1, 1, 1, 250000); + if (ret) + return dev_err_probe(pcie->dev, ret, + "global PHY ready timeout\n"); + + writel(1, pcie->base_phy[0] + 0x34); + apple_h9p_apply_tunables(pcie->base_phy[0], h9p_phy0_tunables, + ARRAY_SIZE(h9p_phy0_tunables)); + writel(1, pcie->base_phy[0] + 0x14); + usleep_range(5000, 10000); + writel(1, pcie->base_phy[0] + 0x24); + usleep_range(500, 1000); + + for (port = 0; port < H9P_NUM_PORTS; port++) { + if (!(pcie->enabled_ports & BIT(port))) + continue; + + ret = apple_h9p_setup_port(pcie, port); + if (ret) + return ret; + } + + return 0; +} + +static int apple_h9p_pcie_init(struct pci_config_window *cfg) +{ + struct apple_h9p_pcie *pcie = apple_h9p_pcie_lookup(cfg->parent); + int ret; + + if (!pcie) + return -ENODEV; + + pcie->cfgwin = cfg; + pcie->base_config = cfg->win; + + ret = apple_h9p_pcieclk_postup(pcie); + if (ret) + return ret; + + ret = apple_h9p_setup_ports(pcie); + if (ret) + return ret; + + ret = apple_h9p_setup_nvmmu(pcie); + return ret; +} + +static const struct pci_ecam_ops apple_h9p_pcie_ecam_ops = { + .bus_shift = 20, + .init = apple_h9p_pcie_init, + .pci_ops = { + .map_bus = pci_ecam_map_bus, + .read = apple_h9p_pcie_config_read, + .write = apple_h9p_pcie_config_write, + }, +}; + +static int apple_h9p_pcie_map_resources(struct platform_device *pdev, + struct apple_h9p_pcie *pcie) +{ + struct device *dev = &pdev->dev; + unsigned int i; + + for (i = 0; i < 3; i++) { + char name[8]; + + snprintf(name, sizeof(name), "phy%u", i); + pcie->base_phy[i] = devm_platform_ioremap_resource_byname(pdev, + name); + if (IS_ERR(pcie->base_phy[i])) + return PTR_ERR(pcie->base_phy[i]); + } + + for (i = 0; i < H9P_NUM_PORTS; i++) { + char name[8]; + struct resource *res; + + snprintf(name, sizeof(name), "port%u", i); + pcie->base_port[i] = devm_platform_ioremap_resource_byname(pdev, + name); + if (IS_ERR(pcie->base_port[i])) + return PTR_ERR(pcie->base_port[i]); + + snprintf(name, sizeof(name), "nvmmu%u", i); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (res) { + pcie->nvmmu[i].base = devm_ioremap_resource(dev, res); + if (IS_ERR(pcie->nvmmu[i].base)) + return PTR_ERR(pcie->nvmmu[i].base); + } + } + + pcie->base_pcieclk_postup = + devm_platform_ioremap_resource_byname(pdev, "pcieclk-postup"); + if (IS_ERR(pcie->base_pcieclk_postup)) { + if (PTR_ERR(pcie->base_pcieclk_postup) == -EINVAL) + pcie->base_pcieclk_postup = NULL; + else + return dev_err_probe(dev, + PTR_ERR(pcie->base_pcieclk_postup), + "failed to map pcieclk post-up\n"); + } + + return 0; +} + +static int apple_h9p_pcie_get_gpios(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + unsigned int i; + + for (i = 0; i < H9P_NUM_PORTS; i++) { + if (!(pcie->enabled_ports & BIT(i))) + continue; + + pcie->perst[i] = devm_gpiod_get_index(dev, "reset", i, + GPIOD_OUT_LOW); + if (IS_ERR(pcie->perst[i])) + return dev_err_probe(dev, PTR_ERR(pcie->perst[i]), + "failed to get PERST#%u\n", i); + + pcie->clkreq[i] = devm_gpiod_get_index(dev, "clkreq", i, + GPIOD_IN); + if (IS_ERR(pcie->clkreq[i])) + return dev_err_probe(dev, PTR_ERR(pcie->clkreq[i]), + "failed to get CLKREQ#%u\n", i); + } + + pcie->devpwr = devm_gpiod_get_array_optional(dev, "devpwr", GPIOD_ASIS); + if (IS_ERR(pcie->devpwr)) + return dev_err_probe(dev, PTR_ERR(pcie->devpwr), + "failed to get device power GPIOs\n"); + + return 0; +} + +static int apple_h9p_pcie_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct pci_host_bridge *bridge; + struct apple_h9p_pcie *pcie; + int ret; + + bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie)); + if (!bridge) + return -ENOMEM; + + pcie = pci_host_bridge_priv(bridge); + pcie->dev = dev; + pcie->pdev = pdev; + pcie->bridge = bridge; + spin_lock_init(&pcie->used_msi_lock); + + ret = of_property_read_u32(dev->of_node, "apple,enabled-ports", + &pcie->enabled_ports); + if (ret) + pcie->enabled_ports = BIT(0); + pcie->enabled_ports &= GENMASK(H9P_NUM_PORTS - 1, 0); + if (!pcie->enabled_ports) + return dev_err_probe(dev, -EINVAL, "no enabled ports\n"); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret) + return dev_err_probe(dev, ret, "failed to set DMA mask\n"); + + ret = apple_h9p_pcie_attach_genpd(pcie); + if (ret) + return dev_err_probe(dev, ret, "failed to attach power domains\n"); + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_genpd_cleanup, pcie); + if (ret) + return ret; + + pcie->clks[0].id = "core"; + pcie->clks[1].id = "aux"; + pcie->clks[2].id = "ref"; + ret = devm_clk_bulk_get(dev, ARRAY_SIZE(pcie->clks), pcie->clks); + if (ret) + return dev_err_probe(dev, ret, "failed to get clocks\n"); + + ret = clk_bulk_prepare_enable(ARRAY_SIZE(pcie->clks), pcie->clks); + if (ret) + return dev_err_probe(dev, ret, "failed to enable clocks\n"); + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_clk_cleanup, pcie); + if (ret) + return ret; + + ret = apple_h9p_pcie_force_power_domains(pcie); + if (ret) + return ret; + + pcie->pinctrl = devm_pinctrl_get_select_default(dev); + if (PTR_ERR(pcie->pinctrl) == -ENODEV) + pcie->pinctrl = NULL; + else if (IS_ERR(pcie->pinctrl)) + return dev_err_probe(dev, PTR_ERR(pcie->pinctrl), + "failed to select pinctrl state\n"); + + ret = apple_h9p_pcie_map_resources(pdev, pcie); + if (ret) + return ret; + + ret = apple_h9p_pcie_get_gpios(pcie); + if (ret) + return ret; + + ret = of_property_read_u64(dev->of_node, "apple,msi-doorbell", + &pcie->msi_doorbell); + if (ret) + pcie->msi_doorbell = H9P_DEFAULT_MSI_DOORBELL; + + ret = apple_h9p_pcie_setup_msi(pcie); + if (ret) + return dev_err_probe(dev, ret, "failed to set up MSI\n"); + + return pci_host_common_init(pdev, bridge, &apple_h9p_pcie_ecam_ops); +} + +static const struct of_device_id apple_h9p_pcie_of_match[] = { + { .compatible = "apple,t8010-pcie" }, + { } +}; +MODULE_DEVICE_TABLE(of, apple_h9p_pcie_of_match); + +static struct platform_driver apple_h9p_pcie_driver = { + .probe = apple_h9p_pcie_probe, + .driver = { + .name = "pcie-apple-h9p", + .of_match_table = apple_h9p_pcie_of_match, + .suppress_bind_attrs = true, + }, +}; +module_platform_driver(apple_h9p_pcie_driver); + +MODULE_DESCRIPTION("Apple H9P/T8010 PCIe host bridge driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/apple-h9p-pcie.h b/include/linux/apple-h9p-pcie.h new file mode 100644 index 00000000000000..c29219c281b208 --- /dev/null +++ b/include/linux/apple-h9p-pcie.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_APPLE_H9P_PCIE_H +#define _LINUX_APPLE_H9P_PCIE_H + +#include +#include +#include + +struct device; + +#define APPLE_H9P_NVMMU_MAX_REQS 36 +#define APPLE_H9P_NVMMU_MAX_PAGES 256 +#define APPLE_H9P_NVMMU_PAGE_SIZE 4096 + +#if IS_REACHABLE(CONFIG_PCIE_APPLE_H9P) +int apple_h9p_pcie_map_nvmmu(struct device *dev, unsigned int tag, + const u64 *pages, unsigned int npages, + dma_addr_t *iova); +#else +static inline int apple_h9p_pcie_map_nvmmu(struct device *dev, + unsigned int tag, const u64 *pages, + unsigned int npages, dma_addr_t *iova) +{ + return -EOPNOTSUPP; +} +#endif + +#endif /* _LINUX_APPLE_H9P_PCIE_H */ From 60287da0389f466209a01a7f58fa7a2a278b27fe Mon Sep 17 00:00:00 2001 From: Pauli1Go <101004482+Pauli1Go@users.noreply.github.com> Date: Sun, 28 Jun 2026 23:34:21 +0200 Subject: [PATCH 2/3] apple: t8010: use upstream DART for H9P NVMe Drop the temporary T8010 DART driver. The Hoolock base now has S5L8960X/T8010 support in apple-dart, so the iPad7 PCIe DART can use the upstream compatible fallback plus apple,dma-range. Move the Apple H9P FlatDMA helpers out of the main nvme-pci body into a private include file. The remaining pci.c changes are limited to the H9P hooks that still need private nvme-pci access. Signed-off-by: Pauli1Go <101004482+Pauli1Go@users.noreply.github.com> --- arch/arm64/boot/dts/apple/t8010-ipad7.dtsi | 4 +- drivers/iommu/Kconfig | 11 - drivers/iommu/Makefile | 1 - drivers/iommu/apple-t8010-dart.c | 680 --------------------- drivers/nvme/host/pci-apple-h9p.c | 364 +++++++++++ drivers/nvme/host/pci.c | 357 +---------- 6 files changed, 368 insertions(+), 1049 deletions(-) delete mode 100644 drivers/iommu/apple-t8010-dart.c create mode 100644 drivers/nvme/host/pci-apple-h9p.c diff --git a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi index bb89ca3e5df7b9..882fec4e69b5f3 100644 --- a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi +++ b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi @@ -27,13 +27,13 @@ soc { pcie0_dart0: iommu@601008000 { - compatible = "apple,t8010-dart"; + compatible = "apple,t8010-dart", "apple,s5l8960x-dart"; reg = <0x6 0x01008000 0x0 0x4000>; #iommu-cells = <1>; interrupt-parent = <&aic>; interrupts = ; power-domains = <&ps_pcie>; - pcie-dart; + apple,dma-range = <0x0 0x80000000 0x0 0x3c000000>; }; pcie0: pcie@610000000 { diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index a2ba9de4375878..f86262b11416d1 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -315,17 +315,6 @@ config APPLE_DART Say Y here if you are using an Apple SoC. -config APPLE_T8010_DART - tristate "Apple T8010 legacy DART IOMMU support" - depends on ARCH_APPLE || COMPILE_TEST - select IOMMU_API - help - Support for the older DART layout used by Apple A10/T8010 PCIe. - This covers the pre-M1 register layout where TTBRs start at 0x40 - and the PCIe aperture is offset into the device IOVA space. - Enable this when bringing up H9P/Apple NVMe storage on T8010 - devices that cannot use the newer apple-dart register layout. - config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index dc0ef895c74c0c..0275821f4ef985 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -36,5 +36,4 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o -obj-$(CONFIG_APPLE_T8010_DART) += apple-t8010-dart.o obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o diff --git a/drivers/iommu/apple-t8010-dart.c b/drivers/iommu/apple-t8010-dart.c deleted file mode 100644 index 9d927a4e46593a..00000000000000 --- a/drivers/iommu/apple-t8010-dart.c +++ /dev/null @@ -1,680 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DART IOMMU on Apple T8010/A10 SoCs - * - * Copyright (C) 2020 Corellium LLC - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DART_TLB_OP 0x0000 -#define DART_TLB_OP_FLUSH 0x00000002 -#define DART_TLB_OP_SID_SHIFT 8 -#define DART_TLB_OP_SID(sid4) (1 << ((sid4) + 8)) -#define DART_TLB_OP_BUSY BIT(3) -#define DART_CONFIG 0x000C -#define DART_CONFIG_TXEN(sid4) (1 << ((sid4) * 8 + 7)) -#define DART_ERROR_STATUS 0x0010 -#define DART_ERROR_AXI_REQ0 0x0014 -#define DART_ERROR_AXI_REQ1 0x0018 -#define DART_ERROR_ADDRESS 0x001C -#define DART_DIAG_CONFIG 0x0020 -#define DART_UNKNOWN_24 0x0024 -#define DART_SID_REMAP 0x0028 -#define DART_UNKNOWN_2C 0x002C -#define DART_FETCH_CONFIG 0x0030 -#define DART_PERF_CONFIG 0x0078 -#define DART_TLB_MISS 0x007C -#define DART_TLB_WAIT 0x0080 -#define DART_TLB_HIT 0x0084 -#define DART_ST_MISS 0x0088 -#define DART_ST_WAIT 0x008C -#define DART_ST_HIT 0x0090 -#define DART_TTBR(sid4, l1idx4) (0x0040 + 16 * (sid4) + 4 * (l1idx4)) -#define DART_TTBR_VALID BIT(31) -#define DART_TTBR_MASK 0x00FFFFFF -#define DART_TLB_STATUS 0x1000 -#define DART_TLB_UNKNOWN(idx) (0x1004 + 4 * (idx)) -#define DART_STT_PA_DATA(idx) (0x2000 + 4 * (idx)) -#define DART_STT_PA_DATA_COUNT 1024 -#define DART_SMMU_TLB_CFG 0x3000 -#define DART_SMMU_TLB_DATA_RD 0x3100 -#define DART_SMMU_TLB_DATA_RD_COUNT 4 -#define DART_DATA_DEBUG_IDX 0x3120 -#define DART_DATA_DEBUG_CNTL 0x3124 -#define DART_DATA_DEBUG_CNTL_READ BIT(0) -#define DART_DATA_DEBUG_CNTL_BUSY BIT(2) -#define DART_TLB_TAG(idx) (0x3800 + 4 * (idx)) -#define DART_TLB_TAG_COUNT 128 - -#define DART_PTE_STATE_MASK 3 -#define DART_PTE_STATE_INVALID 0 -#define DART_PTE_STATE_NEXT 3 -#define DART_PTE_STATE_VALID 3 -#define DART_PTE_ADDR_MASK 0xFFFFFF000ull - -#define DART_NUM_SID 4 -#define DART_PAGE_SHIFT 12 - -#define DART_PAGE_SIZE BIT(DART_PAGE_SHIFT) -#define DART_PAGE_MASK (DART_PAGE_SIZE - 1ul) - -struct apple_t8010_dart_iommu { - struct device *dev; - struct iommu_device iommu; - void __iomem *base; - int is_init; - int is_pcie; - u64 iova_offset; - u64 **l2dma[DART_NUM_SID]; - u64 *l1dma[DART_NUM_SID]; - /* Protects DART page table allocation and register updates. */ - spinlock_t dart_lock; -}; - -struct apple_t8010_dart_iommu_domain { - struct iommu_domain domain; - struct apple_t8010_dart_iommu *iommu; - int sid; -}; - -struct apple_t8010_dart_iommu_devdata { - struct apple_t8010_dart_iommu *iommu; - u32 sid; -}; - -static irqreturn_t apple_t8010_dart_iommu_irq(int irq, void *dev_id) -{ - struct apple_t8010_dart_iommu *im = dev_id; - u32 status, axi_req[2], addr, tlbstat; - - status = readl(im->base + DART_ERROR_STATUS); - tlbstat = readl(im->base + DART_TLB_STATUS); - axi_req[0] = readl(im->base + DART_ERROR_AXI_REQ0); - axi_req[1] = readl(im->base + DART_ERROR_AXI_REQ1); - addr = readl(im->base + DART_ERROR_ADDRESS); - - writel(status, im->base + DART_ERROR_STATUS); - writel(tlbstat, im->base + DART_TLB_STATUS); - - dev_err(im->dev, - "STATUS %08x AXI_REQ %08x:%08x ADDR %08x TLBSTAT %08x\n", - status, axi_req[0], axi_req[1], addr, tlbstat); - - return IRQ_HANDLED; -} - -static void apple_t8010_dart_tlb_flush(struct apple_t8010_dart_iommu *im, - u32 sidmask, int need_lock) -{ - unsigned long flags; - u32 status; - - if (need_lock) - spin_lock_irqsave(&im->dart_lock, flags); - writel(DART_TLB_OP_FLUSH | (sidmask << DART_TLB_OP_SID_SHIFT), - im->base + DART_TLB_OP); - while (1) { - status = readl(im->base + DART_TLB_OP); - if (!(status & DART_TLB_OP_BUSY)) - break; - } - if (need_lock) - spin_unlock_irqrestore(&im->dart_lock, flags); -} - -static u64 *apple_t8010_dart_get_pte(struct apple_t8010_dart_iommu *im, u32 sid, - u64 iova, int optional, - unsigned long *flags) -{ - unsigned int i, l1idx, l1base, l2idx, npgs, npg; - u64 phys, **l1pt, *l1dma, *l2dma; - void *dmava, *ptva; - dma_addr_t dmah; - - if (im->is_pcie) - sid = 0; - - if (!im->l1dma[sid]) { - spin_unlock_irqrestore(&im->dart_lock, *flags); - ptva = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - DART_PAGE_SHIFT + 2 - - PAGE_SHIFT); - dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE * 4, &dmah, - GFP_KERNEL | __GFP_ZERO, - DMA_ATTR_WRITE_COMBINE); - spin_lock_irqsave(&im->dart_lock, *flags); - if (!im->l1dma[sid]) { - if (!ptva || !dmava) { - if (ptva) - free_pages((unsigned long)ptva, - DART_PAGE_SHIFT + 2 - - PAGE_SHIFT); - else - dev_err(im->dev, - "failed to allocate shadow L1 pagetable\n"); - if (dmava) - dma_free_attrs(im->dev, - DART_PAGE_SIZE * 4, - dmava, dmah, - DMA_ATTR_WRITE_COMBINE); - else - dev_err(im->dev, - "failed to allocate uncached L1 pagetable\n"); - return NULL; - } - im->l2dma[sid] = ptva; - im->l1dma[sid] = dmava; - phys = dmah; - for (i = 0; i < 4; i++) - writel((((phys >> DART_PAGE_SHIFT) + i) & - DART_TTBR_MASK) | - DART_TTBR_VALID, - im->base + DART_TTBR(sid, i)); - } else { - if (ptva) - free_pages((unsigned long)ptva, - DART_PAGE_SHIFT + 2 - PAGE_SHIFT); - if (dmava) - dma_free_attrs(im->dev, DART_PAGE_SIZE * 4, - dmava, dmah, - DMA_ATTR_WRITE_COMBINE); - } - } - - l1pt = im->l2dma[sid]; - l1idx = (iova >> 21) & 0x7FF; - - if (!l1pt[l1idx]) { - if (optional) - return NULL; - if (DART_PAGE_SHIFT < PAGE_SHIFT) - npgs = PAGE_SHIFT - DART_PAGE_SHIFT; - else - npgs = 0; - spin_unlock_irqrestore(&im->dart_lock, *flags); - dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE << npgs, &dmah, - GFP_KERNEL | __GFP_ZERO, - DMA_ATTR_WRITE_COMBINE); - spin_lock_irqsave(&im->dart_lock, *flags); - if (!l1pt[l1idx]) { - if (!dmava) { - dev_err(im->dev, - "failed to allocate uncached L2 pagetable\n"); - return NULL; - } - npg = 1 << npgs; - phys = dmah; - l1dma = im->l1dma[sid]; - l1base = (l1idx >> npgs) << npgs; - for (i = 0; i < npg; i++) { - l1pt[l1base + i] = - dmava + (i << DART_PAGE_SHIFT); - l1dma[l1base + i] = - ((phys + (i << DART_PAGE_SHIFT)) & - DART_PTE_ADDR_MASK) | - DART_PTE_STATE_NEXT; - } - } else if (dmava) - dma_free_attrs(im->dev, DART_PAGE_SIZE << npgs, dmava, - dmah, DMA_ATTR_WRITE_COMBINE); - } - - l2dma = l1pt[l1idx]; - l2idx = (iova >> 12) & 0x1FF; - return &l2dma[l2idx]; -} - -static void apple_t8010_dart_iommu_enable(struct apple_t8010_dart_iommu *im, - u32 sid) -{ - u32 val; - - val = readl(im->base + DART_CONFIG); - if (val & DART_CONFIG_TXEN(sid)) - return; - writel(val | DART_CONFIG_TXEN(sid), im->base + DART_CONFIG); - if (!(readl(im->base + DART_CONFIG) & DART_CONFIG_TXEN(sid))) - dev_err(im->dev, "failed to enable SID %d: 0x%08x.\n", sid, - readl(im->base + DART_CONFIG)); -} - -static bool apple_t8010_dart_iommu_capable(struct device *dev, - enum iommu_cap cap) -{ - switch (cap) { - case IOMMU_CAP_CACHE_COHERENCY: - return true; - default: - return false; - } -} - -static struct apple_t8010_dart_iommu_domain * -to_apple_t8010_dart_iommu_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct apple_t8010_dart_iommu_domain, domain); -} - -static struct iommu_domain * -apple_t8010_dart_iommu_domain_alloc_paging(struct device *dev) -{ - struct apple_t8010_dart_iommu_domain *idom; - - idom = kzalloc_obj(*idom, GFP_KERNEL); - if (!idom) - return NULL; - - idom->domain.pgsize_bitmap = SZ_4K; - idom->sid = -1; - - return &idom->domain; -} - -static void apple_t8010_dart_iommu_domain_free(struct iommu_domain *domain) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - kfree(idom); -} - -static int apple_t8010_dart_iommu_attach_device(struct iommu_domain *domain, - struct device *dev, - struct iommu_domain *old) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu_devdata *idd; - struct apple_t8010_dart_iommu *im; - unsigned long flags; - u32 sid, i, j; - - idd = dev_iommu_priv_get(dev); - if (!idd) - return -ENODEV; - im = idd->iommu; - - if (idom->iommu && idom->iommu != im) { - dev_err(dev, - "different DART already assigned to IOMMU domain.\n"); - return -EINVAL; - } - - if (!idom->iommu) { - idom->iommu = im; - if (im->is_pcie) { - idom->domain.geometry.aperture_start = 0x80000000ul; - idom->domain.geometry.aperture_end = 0xBBFFFFFFul; - } else { - idom->domain.geometry.aperture_start = 0x00004000ul; - idom->domain.geometry.aperture_end = 0xFFFFFFFFul; - } - idom->domain.geometry.force_aperture = true; - } - - sid = im->is_pcie ? 0 : idd->sid; - if (idom->sid >= 0 && idom->sid != sid) { - dev_err(dev, - "multiple SIDs mapped to the same IOMMU domain.\n"); - return -EEXIST; - } - idom->sid = sid; - - spin_lock_irqsave(&im->dart_lock, flags); - - if (!im->is_init) { - im->is_init = 1; - writel(0x0020FFFC, im->base + DART_UNKNOWN_24); - writel(0x00000000, im->base + DART_UNKNOWN_2C); - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - writel(0x00000000, im->base + DART_TTBR(i, j)); - writel(0x000E0303, im->base + DART_FETCH_CONFIG); - writel(0x00000100, im->base + DART_DIAG_CONFIG); - for (i = 0; i < 6; i++) - writel(0x00000000, im->base + DART_TLB_UNKNOWN(i)); - writel(0x03F3FFFF, im->base + DART_TLB_STATUS); - - apple_t8010_dart_tlb_flush(im, 15, 0); - } - apple_t8010_dart_iommu_enable(im, sid); - - spin_unlock_irqrestore(&im->dart_lock, flags); - - return 0; -} - -static int apple_t8010_dart_iommu_map_pages(struct iommu_domain *domain, - unsigned long iova, - phys_addr_t paddr, size_t pgsize, - size_t pgcount, int prot, gfp_t gfp, - size_t *mapped) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu *im = idom->iommu; - u64 len = (u64)pgsize * pgcount; - u64 end = iova + len - 1; - unsigned int i, npg; - unsigned long flags; - u64 *ptep; - int ret = 0; - - if (!im || idom->sid < 0) - return -EINVAL; - - if (!len || end < iova || iova < domain->geometry.aperture_start || - end > domain->geometry.aperture_end) - return -EINVAL; - - npg = (len + DART_PAGE_MASK) >> DART_PAGE_SHIFT; - - if (iova < im->iova_offset) - return -EINVAL; - iova -= im->iova_offset; - - spin_lock_irqsave(&im->dart_lock, flags); - for (i = 0; i < npg; i++) { - ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 0, &flags); - if (!ptep) { - ret = -ENOMEM; - break; - } - *ptep = (paddr & DART_PTE_ADDR_MASK) | DART_PTE_STATE_VALID; - iova += DART_PAGE_SIZE; - paddr += DART_PAGE_SIZE; - if (mapped) - *mapped += DART_PAGE_SIZE; - } - spin_unlock_irqrestore(&im->dart_lock, flags); - - return ret; -} - -static phys_addr_t -apple_t8010_dart_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu *im = idom->iommu; - unsigned long flags; - u64 *ptep, result = 0; - - if (idom->sid < 0) - return 0; - - if (iova < im->iova_offset) - return 0; - iova -= im->iova_offset; - - spin_lock_irqsave(&im->dart_lock, flags); - ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); - if (ptep) - result = *ptep; - spin_unlock_irqrestore(&im->dart_lock, flags); - - if (result & DART_PTE_STATE_MASK) - result = (result & DART_PTE_ADDR_MASK) | - (iova & DART_PAGE_MASK); - return result; -} - -static size_t apple_t8010_dart_iommu_unmap_pages(struct iommu_domain *domain, - unsigned long iova, - size_t pgsize, - size_t pgcount, - struct iommu_iotlb_gather *gather) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu *im = idom->iommu; - size_t size = pgsize * pgcount; - unsigned int i, npg = (size + DART_PAGE_MASK) >> DART_PAGE_SHIFT; - unsigned long flags; - u64 *ptep; - - if (idom->sid < 0) - return 0; - - if (iova < im->iova_offset) - return 0; - iova -= im->iova_offset; - - spin_lock_irqsave(&im->dart_lock, flags); - for (i = 0; i < npg; i++) { - ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); - if (ptep) - *ptep = 0; - iova += DART_PAGE_SIZE; - } - spin_unlock_irqrestore(&im->dart_lock, flags); - - return size; -} - -static void apple_t8010_dart_iommu_flush_iotlb_all(struct iommu_domain *domain) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - if (!idom->iommu) - return; - - if (idom->sid >= 0) - apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); -} - -static void apple_t8010_dart_iommu_iotlb_sync(struct iommu_domain *domain, - struct iommu_iotlb_gather *gather) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - if (!idom->iommu) - return; - - if (idom->sid >= 0) - apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); -} - -static int apple_t8010_dart_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, - size_t size) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - if (!idom->iommu) - return 0; - - if (idom->sid >= 0) - apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); - return 0; -} - -static const struct iommu_ops apple_t8010_dart_iommu_ops; - -static struct iommu_device * -apple_t8010_dart_iommu_probe_device(struct device *dev) -{ - struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); - - if (!idd || !idd->iommu) - return ERR_PTR(-ENODEV); - - device_link_add(dev, idd->iommu->dev, - DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); - - return &idd->iommu->iommu; -} - -static void apple_t8010_dart_iommu_release_device(struct device *dev) -{ - struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); - - dev_iommu_priv_set(dev, NULL); - kfree(idd); -} - -static struct iommu_group * -apple_t8010_dart_iommu_device_group(struct device *dev) -{ -#ifdef CONFIG_PCI - if (dev_is_pci(dev)) - return pci_device_group(dev); -#endif - - return generic_device_group(dev); -} - -static int apple_t8010_dart_iommu_of_xlate(struct device *dev, - const struct of_phandle_args *args) -{ - struct platform_device *iommu_dev; - struct apple_t8010_dart_iommu_devdata *data; - - data = kzalloc_obj(*data, GFP_KERNEL); - if (!data) - return -ENOMEM; - - iommu_dev = of_find_device_by_node(args->np); - if (!iommu_dev) { - kfree(data); - return -ENODEV; - } - - data->iommu = platform_get_drvdata(iommu_dev); - if (!data->iommu) { - platform_device_put(iommu_dev); - kfree(data); - return -ENODEV; - } - - data->sid = args->args[0]; - dev_iommu_priv_set(dev, data); - - platform_device_put(iommu_dev); - - return 0; -} - -static const struct iommu_ops apple_t8010_dart_iommu_ops = { - .capable = apple_t8010_dart_iommu_capable, - .of_xlate = apple_t8010_dart_iommu_of_xlate, - .domain_alloc_paging = apple_t8010_dart_iommu_domain_alloc_paging, - .probe_device = apple_t8010_dart_iommu_probe_device, - .release_device = apple_t8010_dart_iommu_release_device, - .device_group = apple_t8010_dart_iommu_device_group, - .owner = THIS_MODULE, - .default_domain_ops = - &(const struct iommu_domain_ops){ - .attach_dev = apple_t8010_dart_iommu_attach_device, - .map_pages = apple_t8010_dart_iommu_map_pages, - .unmap_pages = apple_t8010_dart_iommu_unmap_pages, - .iova_to_phys = apple_t8010_dart_iommu_iova_to_phys, - .flush_iotlb_all = - apple_t8010_dart_iommu_flush_iotlb_all, - .iotlb_sync = apple_t8010_dart_iommu_iotlb_sync, - .iotlb_sync_map = apple_t8010_dart_iommu_iotlb_sync_map, - .free = apple_t8010_dart_iommu_domain_free, - }, -}; - -static int apple_t8010_dart_iommu_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct device_node *node = dev->of_node; - struct apple_t8010_dart_iommu *im; - struct resource *r; - int ret = 0, irq; - - im = devm_kzalloc(dev, sizeof(struct apple_t8010_dart_iommu), - GFP_KERNEL); - if (!im) - return -ENOMEM; - - im->dev = &pdev->dev; - platform_set_drvdata(pdev, im); - - spin_lock_init(&im->dart_lock); - - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (ret) - return dev_err_probe(dev, ret, "failed to set DMA mask\n"); - - if (of_property_read_bool(pdev->dev.of_node, "pcie-dart")) { - im->is_pcie = 1; - im->iova_offset = 0x80000000ul; - } - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - im->base = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(im->base)) - return PTR_ERR(im->base); - - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, apple_t8010_dart_iommu_irq, 0, - dev_name(&pdev->dev), im); - if (ret < 0) - return ret; - - ret = iommu_device_sysfs_add(&im->iommu, dev, NULL, node->name); - if (ret) - return ret; - - ret = iommu_device_register(&im->iommu, &apple_t8010_dart_iommu_ops, - dev); - if (ret) - goto err_sysfs_remove; - - return 0; - -err_sysfs_remove: - iommu_device_sysfs_remove(&im->iommu); - return ret; -} - -static void apple_t8010_dart_iommu_remove(struct platform_device *pdev) -{ - struct apple_t8010_dart_iommu *im = platform_get_drvdata(pdev); - - iommu_device_unregister(&im->iommu); - iommu_device_sysfs_remove(&im->iommu); -} - -static const struct of_device_id apple_t8010_dart_iommu_match[] = { - { .compatible = "apple,t8010-dart" }, - {}, -}; -MODULE_DEVICE_TABLE(of, apple_t8010_dart_iommu_match); - -static struct platform_driver apple_t8010_dart_iommu_driver = { - .probe = apple_t8010_dart_iommu_probe, - .remove = apple_t8010_dart_iommu_remove, - .driver = { - .name = "apple-t8010-dart", - .of_match_table = apple_t8010_dart_iommu_match, - }, -}; -module_platform_driver(apple_t8010_dart_iommu_driver); - -MODULE_DESCRIPTION("Apple T8010 legacy DART IOMMU driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/pci-apple-h9p.c b/drivers/nvme/host/pci-apple-h9p.c new file mode 100644 index 00000000000000..06cc12995f8312 --- /dev/null +++ b/drivers/nvme/host/pci-apple-h9p.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Apple H9P/T8010 PCI NVMe glue. + * + * This file is included by pci.c because the FlatDMA path needs access to + * nvme-pci's private request, queue and controller structs. + */ + +#define PCI_DEVICE_ID_APPLE_H9P_NVME 0x2002 +#define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) + +#define APPLE_H9P_REG_INIT 0x1800 +#define APPLE_H9P_REG_INIT_REGULAR 0 +#define APPLE_H9P_REG_SCRATCH_SIZE_REQ 0x1808 +#define APPLE_H9P_REG_SCRATCH_ALIGN_REQ 0x180c +#define APPLE_H9P_REG_SCRATCH_BASE_LO 0x1810 +#define APPLE_H9P_REG_SCRATCH_BASE_HI 0x1814 +#define APPLE_H9P_REG_SCRATCH_SIZE 0x1818 +#define APPLE_H9P_REG_CORE_MASK 0x1824 +#define APPLE_H9P_REG_LOG_SIZE 0x1828 +#define APPLE_H9P_REG_BOOT_STATE 0x1b18 +#define APPLE_H9P_REG_BOOT_STATE_MAGIC 0xbfbfbfbfu +#define APPLE_H9P_NVME_MAX_SECTORS \ + (APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE >> SECTOR_SHIFT) + +struct apple_h9p_nvme_req { + u64 pages[APPLE_H9P_NVMMU_MAX_PAGES]; + unsigned int npages; +}; + +struct apple_h9p_nvme { + dma_addr_t scratch_dma; + u32 scratch_size; + struct apple_h9p_nvme_req req[APPLE_H9P_NVMMU_MAX_REQS]; + DECLARE_BITMAP(used_req, APPLE_H9P_NVMMU_MAX_REQS); + unsigned int last_req; + /* Protects the FlatDMA request-slot bitmap. */ + spinlock_t req_lock; +}; + +static bool nvme_pci_is_apple_h9p(struct pci_dev *pdev) +{ + return pdev->vendor == PCI_VENDOR_ID_APPLE && + pdev->device == PCI_DEVICE_ID_APPLE_H9P_NVME; +} + +static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, + u32 scratch_size_req, + u32 scratch_align_req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct pci_host_bridge *bridge; + struct device_node *pcie_np; + struct device_node *mem_np; + struct resource res; + resource_size_t size; + u32 iova; + int ret; + + bridge = pci_find_host_bridge(pdev->bus); + if (!bridge) + return -ENODEV; + + pcie_np = bridge->dev.parent ? bridge->dev.parent->of_node : NULL; + if (!pcie_np) + pcie_np = bridge->dev.of_node; + if (!pcie_np) + return -ENODEV; + + mem_np = of_parse_phandle(pcie_np, "memory-region", 0); + if (!mem_np) + return -ENODEV; + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_mem; + + ret = of_property_read_u32(pcie_np, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_mem; + + if (!scratch_size_req || scratch_size_req == U32_MAX) { + ret = -EINVAL; + goto out_put_mem; + } + if (!scratch_align_req || scratch_align_req == U32_MAX) + scratch_align_req = 1; + + size = resource_size(&res); + if (size < scratch_size_req || size > U32_MAX) { + ret = -ENOSPC; + goto out_put_mem; + } + if (!IS_ALIGNED(res.start, scratch_align_req) || + !IS_ALIGNED(iova, scratch_align_req)) { + ret = -EINVAL; + goto out_put_mem; + } + + h9p->scratch_dma = iova; + h9p->scratch_size = size; + dev_dbg(dev->dev, "Apple H9P NVMe scratch %#x@%pa as dma %#llx\n", + h9p->scratch_size, &res.start, (u64)h9p->scratch_dma); + +out_put_mem: + of_node_put(mem_np); + return ret; +} + +static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts, core_mask, log_size; + u32 scratch_size, scratch_align; + int ret; + + if (!dev->apple_h9p) + return 0; + + if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x17c, 0x10081008) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x18c, 0) != PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x188, 0x40550000) != + PCIBIOS_SUCCESSFUL) + return -EIO; + + if (readl(dev->bar + APPLE_H9P_REG_BOOT_STATE) == + APPLE_H9P_REG_BOOT_STATE_MAGIC) + dev_dbg(dev->dev, "Apple H9P NVMe boot-state magic present\n"); + + core_mask = readl(dev->bar + APPLE_H9P_REG_CORE_MASK); + log_size = readl(dev->bar + APPLE_H9P_REG_LOG_SIZE); + scratch_size = readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE_REQ); + scratch_align = readl(dev->bar + APPLE_H9P_REG_SCRATCH_ALIGN_REQ); + + writel(0, dev->bar + NVME_REG_CC); + ret = readl_poll_timeout(dev->bar + NVME_REG_CSTS, csts, + !(csts & (NVME_CSTS_RDY | NVME_CSTS_CFS)), + 1000, 2000000); + if (ret) + return ret; + + ret = nvme_pci_apple_h9p_find_scratch(dev, scratch_size, + scratch_align); + if (ret) + return ret; + + dev_dbg(dev->dev, + "Apple H9P NVMe core_mask=%#x log_size=%#x scratch_req=%#x align=%#x\n", + core_mask, log_size, scratch_size, scratch_align); + return 0; +} + +static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + + if (!h9p) + return 0; + if (!h9p->scratch_size) + return -EINVAL; + + writel(APPLE_H9P_REG_INIT_REGULAR, dev->bar + APPLE_H9P_REG_INIT); + writel(lower_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_LO); + writel(upper_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_HI); + writel(h9p->scratch_size, dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + + return 0; +} + +static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req **req, + unsigned int *tag) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int idx; + + spin_lock_irqsave(&h9p->req_lock, flags); + idx = find_next_zero_bit(h9p->used_req, APPLE_H9P_NVMMU_MAX_REQS, + (h9p->last_req + 1) % + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) + idx = find_first_zero_bit(h9p->used_req, + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) { + spin_unlock_irqrestore(&h9p->req_lock, flags); + dev_dbg_ratelimited(dev->dev, + "Apple H9P NVMe FlatDMA slots exhausted\n"); + return BLK_STS_RESOURCE; + } + + h9p->last_req = idx; + __set_bit(idx, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); + + *req = &h9p->req[idx]; + *tag = idx; + (*req)->npages = 0; + memset((*req)->pages, 0, sizeof((*req)->pages)); + return BLK_STS_OK; +} + +static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req *req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int tag; + + if (!h9p || !req) + return; + + tag = req - h9p->req; + if (tag >= APPLE_H9P_NVMMU_MAX_REQS) + return; + + apple_h9p_pcie_map_nvmmu(dev->dev, tag, NULL, 0, NULL); + req->npages = 0; + + spin_lock_irqsave(&h9p->req_lock, flags); + __clear_bit(tag, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); +} + +static bool nvme_pci_apple_h9p_unmap_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + unsigned int i; + + if (!iod->apple_h9p_req) + return false; + + for (i = 0; i < iod->nr_dma_vecs; i++) + dma_unmap_page(dev->dev, iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, rq_dma_dir(req)); + if (iod->dma_vecs) { + mempool_free(iod->dma_vecs, dev->dmavec_mempool); + iod->dma_vecs = NULL; + } + iod->nr_dma_vecs = 0; + + nvme_pci_apple_h9p_free_req(dev, iod->apple_h9p_req); + iod->apple_h9p_req = NULL; + iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; + return true; +} + +static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + struct apple_h9p_nvme_req *hreq; + struct req_iterator iter; + struct bio_vec bv; + dma_addr_t flatdma; + u64 phys, offs = 0; + unsigned int tag, npages = 0, consumed = 0; + unsigned int total = blk_rq_payload_bytes(req); + blk_status_t status; + int ret; + + if (!dev->apple_h9p) + return BLK_STS_NOTSUPP; + if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) + return BLK_STS_IOERR; + + status = nvme_pci_apple_h9p_alloc_req(dev, &hreq, &tag); + if (status) + return status; + + iod->apple_h9p_req = hreq; + iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); + if (!iod->dma_vecs) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + rq_for_each_bvec(bv, req, iter) { + dma_addr_t dma_addr; + unsigned int len = bv.bv_len; + + if (WARN_ON_ONCE(iod->nr_dma_vecs >= + blk_rq_nr_phys_segments(req))) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + dma_addr = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, dma_addr)) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + iod->dma_vecs[iod->nr_dma_vecs].addr = dma_addr; + iod->dma_vecs[iod->nr_dma_vecs].len = len; + iod->nr_dma_vecs++; + + phys = page_to_phys(bv.bv_page) + bv.bv_offset; + if (!consumed) { + offs = phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1); + phys -= offs; + len += offs; + } else if (phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1)) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment is not page-aligned: phys=%#llx\n", + phys); + status = BLK_STS_IOERR; + goto out_unmap; + } + + if (consumed + bv.bv_len != total && + (len & (APPLE_H9P_NVMMU_PAGE_SIZE - 1))) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment length is not page-aligned: len=%#x\n", + len); + status = BLK_STS_IOERR; + goto out_unmap; + } + + while (len) { + if (npages >= APPLE_H9P_NVMMU_MAX_PAGES) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->pages[npages++] = phys; + phys += APPLE_H9P_NVMMU_PAGE_SIZE; + len = len > APPLE_H9P_NVMMU_PAGE_SIZE ? + len - APPLE_H9P_NVMMU_PAGE_SIZE : 0; + } + + consumed += bv.bv_len; + } + + ret = apple_h9p_pcie_map_nvmmu(dev->dev, tag, hreq->pages, npages, + &flatdma); + if (ret) { + status = errno_to_blk_status(ret); + if (status == BLK_STS_NOTSUPP) + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->npages = npages; + iod->total_len = total; + iod->cmd.common.flags |= NVME_CMD_APPLE_H9P_FLATDMA; + iod->cmd.common.dptr.prp1 = cpu_to_le64(flatdma + offs); + iod->cmd.common.dptr.prp2 = 0; + return BLK_STS_OK; + +out_unmap: + nvme_pci_apple_h9p_unmap_data(req); + return status; +} diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2c443efa450c28..3e03b898fbbc20 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -48,22 +48,6 @@ #define NVME_MAX_BYTES SZ_8M #define NVME_MAX_NR_DESCRIPTORS 5 -#define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) - -#define APPLE_H9P_REG_INIT 0x1800 -#define APPLE_H9P_REG_INIT_REGULAR 0 -#define APPLE_H9P_REG_SCRATCH_SIZE_REQ 0x1808 -#define APPLE_H9P_REG_SCRATCH_ALIGN_REQ 0x180c -#define APPLE_H9P_REG_SCRATCH_BASE_LO 0x1810 -#define APPLE_H9P_REG_SCRATCH_BASE_HI 0x1814 -#define APPLE_H9P_REG_SCRATCH_SIZE 0x1818 -#define APPLE_H9P_REG_CORE_MASK 0x1824 -#define APPLE_H9P_REG_LOG_SIZE 0x1828 -#define APPLE_H9P_REG_BOOT_STATE 0x1b18 -#define APPLE_H9P_REG_BOOT_STATE_MAGIC 0xbfbfbfbfu -#define APPLE_H9P_NVME_MAX_SECTORS \ - (APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE >> SECTOR_SHIFT) - /* * For data SGLs we support a single descriptors worth of SGL entries. * For PRPs, segments don't matter at all. @@ -910,344 +894,7 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, le32_to_cpu(sg_list[i].length), dir, attrs); } -struct apple_h9p_nvme_req { - u64 pages[APPLE_H9P_NVMMU_MAX_PAGES]; - unsigned int npages; -}; - -struct apple_h9p_nvme { - dma_addr_t scratch_dma; - u32 scratch_size; - struct apple_h9p_nvme_req req[APPLE_H9P_NVMMU_MAX_REQS]; - DECLARE_BITMAP(used_req, APPLE_H9P_NVMMU_MAX_REQS); - unsigned int last_req; - /* Protects the FlatDMA request-slot bitmap. */ - spinlock_t req_lock; -}; - -static bool nvme_pci_is_apple_h9p(struct pci_dev *pdev) -{ - return pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2002; -} - -static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, - u32 scratch_size_req, - u32 scratch_align_req) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - struct pci_dev *pdev = to_pci_dev(dev->dev); - struct pci_host_bridge *bridge; - struct device_node *pcie_np; - struct device_node *mem_np; - struct resource res; - resource_size_t size; - u32 iova; - int ret; - - bridge = pci_find_host_bridge(pdev->bus); - if (!bridge) - return -ENODEV; - - pcie_np = bridge->dev.parent ? bridge->dev.parent->of_node : NULL; - if (!pcie_np) - pcie_np = bridge->dev.of_node; - if (!pcie_np) - return -ENODEV; - - mem_np = of_parse_phandle(pcie_np, "memory-region", 0); - if (!mem_np) - return -ENODEV; - - ret = of_address_to_resource(mem_np, 0, &res); - if (ret) - goto out_put_mem; - - ret = of_property_read_u32(pcie_np, "apple,nvmmu-iova", &iova); - if (ret) - goto out_put_mem; - - if (!scratch_size_req || scratch_size_req == U32_MAX) { - ret = -EINVAL; - goto out_put_mem; - } - if (!scratch_align_req || scratch_align_req == U32_MAX) - scratch_align_req = 1; - - size = resource_size(&res); - if (size < scratch_size_req || size > U32_MAX) { - ret = -ENOSPC; - goto out_put_mem; - } - if (!IS_ALIGNED(res.start, scratch_align_req) || - !IS_ALIGNED(iova, scratch_align_req)) { - ret = -EINVAL; - goto out_put_mem; - } - - h9p->scratch_dma = iova; - h9p->scratch_size = size; - dev_dbg(dev->dev, "Apple H9P NVMe scratch %#x@%pa as dma %#llx\n", - h9p->scratch_size, &res.start, (u64)h9p->scratch_dma); - -out_put_mem: - of_node_put(mem_np); - return ret; -} - -static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - u32 csts, core_mask, log_size; - u32 scratch_size, scratch_align; - int ret; - - if (!dev->apple_h9p) - return 0; - - if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != - PCIBIOS_SUCCESSFUL || - pci_write_config_dword(pdev, 0x17c, 0x10081008) != - PCIBIOS_SUCCESSFUL || - pci_write_config_dword(pdev, 0x18c, 0) != PCIBIOS_SUCCESSFUL || - pci_write_config_dword(pdev, 0x188, 0x40550000) != - PCIBIOS_SUCCESSFUL) - return -EIO; - - if (readl(dev->bar + APPLE_H9P_REG_BOOT_STATE) == - APPLE_H9P_REG_BOOT_STATE_MAGIC) - dev_dbg(dev->dev, "Apple H9P NVMe boot-state magic present\n"); - - core_mask = readl(dev->bar + APPLE_H9P_REG_CORE_MASK); - log_size = readl(dev->bar + APPLE_H9P_REG_LOG_SIZE); - scratch_size = readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE_REQ); - scratch_align = readl(dev->bar + APPLE_H9P_REG_SCRATCH_ALIGN_REQ); - - writel(0, dev->bar + NVME_REG_CC); - ret = readl_poll_timeout(dev->bar + NVME_REG_CSTS, csts, - !(csts & (NVME_CSTS_RDY | NVME_CSTS_CFS)), - 1000, 2000000); - if (ret) - return ret; - - ret = nvme_pci_apple_h9p_find_scratch(dev, scratch_size, - scratch_align); - if (ret) - return ret; - - dev_dbg(dev->dev, - "Apple H9P NVMe core_mask=%#x log_size=%#x scratch_req=%#x align=%#x\n", - core_mask, log_size, scratch_size, scratch_align); - return 0; -} - -static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - - if (!h9p) - return 0; - if (!h9p->scratch_size) - return -EINVAL; - - writel(APPLE_H9P_REG_INIT_REGULAR, dev->bar + APPLE_H9P_REG_INIT); - writel(lower_32_bits(h9p->scratch_dma), - dev->bar + APPLE_H9P_REG_SCRATCH_BASE_LO); - writel(upper_32_bits(h9p->scratch_dma), - dev->bar + APPLE_H9P_REG_SCRATCH_BASE_HI); - writel(h9p->scratch_size, dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); - readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); - - return 0; -} - -static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, - struct apple_h9p_nvme_req **req, - unsigned int *tag) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - unsigned long flags; - unsigned int idx; - - spin_lock_irqsave(&h9p->req_lock, flags); - idx = find_next_zero_bit(h9p->used_req, APPLE_H9P_NVMMU_MAX_REQS, - (h9p->last_req + 1) % - APPLE_H9P_NVMMU_MAX_REQS); - if (idx >= APPLE_H9P_NVMMU_MAX_REQS) - idx = find_first_zero_bit(h9p->used_req, - APPLE_H9P_NVMMU_MAX_REQS); - if (idx >= APPLE_H9P_NVMMU_MAX_REQS) { - spin_unlock_irqrestore(&h9p->req_lock, flags); - dev_dbg_ratelimited(dev->dev, - "Apple H9P NVMe FlatDMA slots exhausted\n"); - return BLK_STS_RESOURCE; - } - - h9p->last_req = idx; - __set_bit(idx, h9p->used_req); - spin_unlock_irqrestore(&h9p->req_lock, flags); - - *req = &h9p->req[idx]; - *tag = idx; - (*req)->npages = 0; - memset((*req)->pages, 0, sizeof((*req)->pages)); - return BLK_STS_OK; -} - -static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, - struct apple_h9p_nvme_req *req) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - unsigned long flags; - unsigned int tag; - - if (!h9p || !req) - return; - - tag = req - h9p->req; - if (tag >= APPLE_H9P_NVMMU_MAX_REQS) - return; - - apple_h9p_pcie_map_nvmmu(dev->dev, tag, NULL, 0, NULL); - req->npages = 0; - - spin_lock_irqsave(&h9p->req_lock, flags); - __clear_bit(tag, h9p->used_req); - spin_unlock_irqrestore(&h9p->req_lock, flags); -} - -static bool nvme_pci_apple_h9p_unmap_data(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - struct nvme_dev *dev = nvmeq->dev; - unsigned int i; - - if (!iod->apple_h9p_req) - return false; - - for (i = 0; i < iod->nr_dma_vecs; i++) - dma_unmap_page(dev->dev, iod->dma_vecs[i].addr, - iod->dma_vecs[i].len, rq_dma_dir(req)); - if (iod->dma_vecs) { - mempool_free(iod->dma_vecs, dev->dmavec_mempool); - iod->dma_vecs = NULL; - } - iod->nr_dma_vecs = 0; - - nvme_pci_apple_h9p_free_req(dev, iod->apple_h9p_req); - iod->apple_h9p_req = NULL; - iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; - return true; -} - -static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - struct nvme_dev *dev = nvmeq->dev; - struct apple_h9p_nvme_req *hreq; - struct req_iterator iter; - struct bio_vec bv; - dma_addr_t flatdma; - u64 phys, offs = 0; - unsigned int tag, npages = 0, consumed = 0; - unsigned int total = blk_rq_payload_bytes(req); - blk_status_t status; - int ret; - - if (!dev->apple_h9p) - return BLK_STS_NOTSUPP; - if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) - return BLK_STS_IOERR; - - status = nvme_pci_apple_h9p_alloc_req(dev, &hreq, &tag); - if (status) - return status; - - iod->apple_h9p_req = hreq; - iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); - if (!iod->dma_vecs) { - status = BLK_STS_RESOURCE; - goto out_unmap; - } - - rq_for_each_bvec(bv, req, iter) { - dma_addr_t dma_addr; - unsigned int len = bv.bv_len; - - if (WARN_ON_ONCE(iod->nr_dma_vecs >= - blk_rq_nr_phys_segments(req))) { - status = BLK_STS_IOERR; - goto out_unmap; - } - - dma_addr = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0); - if (dma_mapping_error(dev->dev, dma_addr)) { - status = BLK_STS_RESOURCE; - goto out_unmap; - } - - iod->dma_vecs[iod->nr_dma_vecs].addr = dma_addr; - iod->dma_vecs[iod->nr_dma_vecs].len = len; - iod->nr_dma_vecs++; - - phys = page_to_phys(bv.bv_page) + bv.bv_offset; - if (!consumed) { - offs = phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1); - phys -= offs; - len += offs; - } else if (phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1)) { - dev_err_ratelimited(dev->dev, - "Apple H9P FlatDMA segment is not page-aligned: phys=%#llx\n", - phys); - status = BLK_STS_IOERR; - goto out_unmap; - } - - if (consumed + bv.bv_len != total && - (len & (APPLE_H9P_NVMMU_PAGE_SIZE - 1))) { - dev_err_ratelimited(dev->dev, - "Apple H9P FlatDMA segment length is not page-aligned: len=%#x\n", - len); - status = BLK_STS_IOERR; - goto out_unmap; - } - - while (len) { - if (npages >= APPLE_H9P_NVMMU_MAX_PAGES) { - status = BLK_STS_IOERR; - goto out_unmap; - } - - hreq->pages[npages++] = phys; - phys += APPLE_H9P_NVMMU_PAGE_SIZE; - len = len > APPLE_H9P_NVMMU_PAGE_SIZE ? - len - APPLE_H9P_NVMMU_PAGE_SIZE : 0; - } - - consumed += bv.bv_len; - } - - ret = apple_h9p_pcie_map_nvmmu(dev->dev, tag, hreq->pages, npages, - &flatdma); - if (ret) { - status = errno_to_blk_status(ret); - if (status == BLK_STS_NOTSUPP) - status = BLK_STS_IOERR; - goto out_unmap; - } - - hreq->npages = npages; - iod->total_len = total; - iod->cmd.common.flags |= NVME_CMD_APPLE_H9P_FLATDMA; - iod->cmd.common.dptr.prp1 = cpu_to_le64(flatdma + offs); - iod->cmd.common.dptr.prp2 = 0; - return BLK_STS_OK; - -out_unmap: - nvme_pci_apple_h9p_unmap_data(req); - return status; -} +#include "pci-apple-h9p.c" static void nvme_unmap_metadata(struct request *req) { @@ -4690,7 +4337,7 @@ static const struct pci_device_id nvme_id_table[] = { */ .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_QDEPTH_ONE }, - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2002), + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_H9P_NVME), .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_SHARED_TAGS }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, From 602a5a24c5f88ca737e24a5330b322d457741178 Mon Sep 17 00:00:00 2001 From: Pauli1Go <101004482+Pauli1Go@users.noreply.github.com> Date: Mon, 29 Jun 2026 00:23:06 +0200 Subject: [PATCH 3/3] apple: clean up H9P NVMe integration Build the Apple H9P FlatDMA glue as a normal nvme-pci object instead of including a .c file from pci.c. Add a small internal dma_ops hook table so the remaining H9P integration points are explicit and generic. Remove the direct PMGR regmap power forcing from the H9P PCIe host. The live iPad7 boot now relies on the attached genpd power domains and still enumerates the Apple NVMe endpoint. Name the remaining PCIe/PHY bring-up constants used by the H9P port setup sequence, keeping the tested register sequence unchanged. Signed-off-by: Pauli1Go <101004482+Pauli1Go@users.noreply.github.com> --- drivers/nvme/host/Makefile | 2 +- drivers/nvme/host/pci-apple-h9p.c | 92 ++++++-- drivers/nvme/host/pci-internal.h | 202 ++++++++++++++++ drivers/nvme/host/pci.c | 245 ++++--------------- drivers/pci/controller/pcie-apple-h9p.c | 299 +++++++++++------------- 5 files changed, 466 insertions(+), 374 deletions(-) create mode 100644 drivers/nvme/host/pci-internal.h diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 6414ec968f99ae..c07c3a77a1b7c4 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -19,7 +19,7 @@ nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-core-$(CONFIG_NVME_HOST_AUTH) += auth.o -nvme-y += pci.o +nvme-y += pci.o pci-apple-h9p.o nvme-fabrics-y += fabrics.o diff --git a/drivers/nvme/host/pci-apple-h9p.c b/drivers/nvme/host/pci-apple-h9p.c index 06cc12995f8312..6edac48f84f4da 100644 --- a/drivers/nvme/host/pci-apple-h9p.c +++ b/drivers/nvme/host/pci-apple-h9p.c @@ -1,12 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 /* * Apple H9P/T8010 PCI NVMe glue. - * - * This file is included by pci.c because the FlatDMA path needs access to - * nvme-pci's private request, queue and controller structs. */ -#define PCI_DEVICE_ID_APPLE_H9P_NVME 0x2002 +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pci-internal.h" + #define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) #define APPLE_H9P_REG_INIT 0x1800 @@ -38,17 +45,35 @@ struct apple_h9p_nvme { spinlock_t req_lock; }; -static bool nvme_pci_is_apple_h9p(struct pci_dev *pdev) +static struct apple_h9p_nvme *nvme_pci_apple_h9p(struct nvme_dev *dev) +{ + return dev->dma_data; +} + +static int nvme_pci_apple_h9p_init(struct nvme_dev *dev, int node) +{ + struct apple_h9p_nvme *h9p; + + h9p = kzalloc_node(sizeof(*h9p), GFP_KERNEL, node); + if (!h9p) + return -ENOMEM; + + spin_lock_init(&h9p->req_lock); + dev->dma_data = h9p; + return 0; +} + +static void nvme_pci_apple_h9p_exit(struct nvme_dev *dev) { - return pdev->vendor == PCI_VENDOR_ID_APPLE && - pdev->device == PCI_DEVICE_ID_APPLE_H9P_NVME; + kfree(dev->dma_data); + dev->dma_data = NULL; } static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, u32 scratch_size_req, u32 scratch_align_req) { - struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); struct pci_dev *pdev = to_pci_dev(dev->dev); struct pci_host_bridge *bridge; struct device_node *pcie_np; @@ -115,7 +140,7 @@ static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) u32 scratch_size, scratch_align; int ret; - if (!dev->apple_h9p) + if (!nvme_pci_apple_h9p(dev)) return 0; if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != @@ -156,7 +181,7 @@ static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) { - struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); if (!h9p) return 0; @@ -178,7 +203,7 @@ static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, struct apple_h9p_nvme_req **req, unsigned int *tag) { - struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); unsigned long flags; unsigned int idx; @@ -210,7 +235,7 @@ static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, struct apple_h9p_nvme_req *req) { - struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct apple_h9p_nvme *h9p = nvme_pci_apple_h9p(dev); unsigned long flags; unsigned int tag; @@ -236,7 +261,7 @@ static bool nvme_pci_apple_h9p_unmap_data(struct request *req) struct nvme_dev *dev = nvmeq->dev; unsigned int i; - if (!iod->apple_h9p_req) + if (!iod->dma_private) return false; for (i = 0; i < iod->nr_dma_vecs; i++) @@ -248,8 +273,8 @@ static bool nvme_pci_apple_h9p_unmap_data(struct request *req) } iod->nr_dma_vecs = 0; - nvme_pci_apple_h9p_free_req(dev, iod->apple_h9p_req); - iod->apple_h9p_req = NULL; + nvme_pci_apple_h9p_free_req(dev, iod->dma_private); + iod->dma_private = NULL; iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; return true; } @@ -269,7 +294,7 @@ static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) blk_status_t status; int ret; - if (!dev->apple_h9p) + if (!nvme_pci_apple_h9p(dev)) return BLK_STS_NOTSUPP; if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) return BLK_STS_IOERR; @@ -278,7 +303,7 @@ static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) if (status) return status; - iod->apple_h9p_req = hreq; + iod->dma_private = hreq; iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); if (!iod->dma_vecs) { status = BLK_STS_RESOURCE; @@ -362,3 +387,36 @@ static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) nvme_pci_apple_h9p_unmap_data(req); return status; } + +static bool nvme_pci_apple_h9p_reuse_admin_irq(struct nvme_dev *dev, + struct pci_dev *pdev, + struct nvme_queue *adminq) +{ + return (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) && + pdev->msi_enabled && + test_bit(NVMEQ_ENABLED, &adminq->flags); +} + +static u32 nvme_pci_apple_h9p_queue_depth(struct nvme_dev *dev, u32 depth) +{ + return min_t(u32, depth, APPLE_H9P_NVMMU_MAX_REQS); +} + +static u32 nvme_pci_apple_h9p_max_hw_sectors(struct nvme_dev *dev, + u32 max_hw_sectors) +{ + return min_t(u32, max_hw_sectors, APPLE_H9P_NVME_MAX_SECTORS); +} + +const struct nvme_pci_dma_ops nvme_pci_apple_h9p_ops = { + .quirks = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_SHARED_TAGS, + .init = nvme_pci_apple_h9p_init, + .exit = nvme_pci_apple_h9p_exit, + .preinit = nvme_pci_apple_h9p_preinit, + .prepare_enable = nvme_pci_apple_h9p_prepare_enable, + .map_data = nvme_pci_apple_h9p_map_data, + .unmap_data = nvme_pci_apple_h9p_unmap_data, + .reuse_admin_irq = nvme_pci_apple_h9p_reuse_admin_irq, + .queue_depth = nvme_pci_apple_h9p_queue_depth, + .max_hw_sectors = nvme_pci_apple_h9p_max_hw_sectors, +}; diff --git a/drivers/nvme/host/pci-internal.h b/drivers/nvme/host/pci-internal.h new file mode 100644 index 00000000000000..c8175821d94c30 --- /dev/null +++ b/drivers/nvme/host/pci-internal.h @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Private nvme-pci structures shared with controller-specific glue. + */ + +#ifndef _NVME_PCI_INTERNAL_H +#define _NVME_PCI_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvme.h" + +#define PCI_DEVICE_ID_APPLE_H9P_NVME 0x2002 + +#define NVME_MAX_NR_DESCRIPTORS 5 + +struct nvme_dev; +struct nvme_queue; + +struct nvme_descriptor_pools { + struct dma_pool *large; + struct dma_pool *small; +}; + +struct nvme_pci_dma_ops { + u32 quirks; + int (*init)(struct nvme_dev *dev, int node); + void (*exit)(struct nvme_dev *dev); + int (*preinit)(struct nvme_dev *dev); + int (*prepare_enable)(struct nvme_dev *dev); + blk_status_t (*map_data)(struct request *req); + bool (*unmap_data)(struct request *req); + bool (*reuse_admin_irq)(struct nvme_dev *dev, struct pci_dev *pdev, + struct nvme_queue *adminq); + u32 (*queue_depth)(struct nvme_dev *dev, u32 depth); + u32 (*max_hw_sectors)(struct nvme_dev *dev, u32 max_hw_sectors); +}; + +/* + * Represents an NVM Express device. Each nvme_dev is a PCI function. + */ +struct nvme_dev { + struct nvme_queue *queues; + struct blk_mq_tag_set tagset; + struct blk_mq_tag_set admin_tagset; + u32 __iomem *dbs; + struct device *dev; + unsigned int online_queues; + unsigned int max_qid; + unsigned int io_queues[HCTX_MAX_TYPES]; + unsigned int num_vecs; + u32 q_depth; + int io_sqes; + u32 db_stride; + void __iomem *bar; + unsigned long bar_mapped_size; + /* protects shutdown sequencing against reset and remove paths */ + struct mutex shutdown_lock; + bool subsystem; + u64 cmb_size; + bool cmb_use_sqes; + u32 cmbsz; + u32 cmbloc; + struct nvme_ctrl ctrl; + u32 last_ps; + bool hmb; + struct sg_table *hmb_sgt; + mempool_t *dmavec_mempool; + const struct nvme_pci_dma_ops *dma_ops; + void *dma_data; + + /* shadow doorbell buffer support: */ + __le32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; + __le32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; + + /* host memory buffer support: */ + u64 host_mem_size; + u32 nr_host_mem_descs; + u32 host_mem_descs_size; + dma_addr_t host_mem_descs_dma; + struct nvme_host_mem_buf_desc *host_mem_descs; + void **host_mem_desc_bufs; + unsigned int nr_allocated_queues; + unsigned int nr_write_queues; + unsigned int nr_poll_queues; + struct nvme_descriptor_pools descriptor_pools[]; +}; + +/* + * An NVM Express queue. Each device has at least two (one for admin + * commands and one for I/O commands). + */ +struct nvme_queue { + struct nvme_dev *dev; + struct nvme_descriptor_pools descriptor_pools; + /* protects SQ tail updates */ + spinlock_t sq_lock; + void *sq_cmds; + /* protects CQ polling state; only used for poll queues */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u32 q_depth; + u16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + __le32 *dbbuf_sq_db; + __le32 *dbbuf_cq_db; + __le32 *dbbuf_sq_ei; + __le32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static inline size_t nvme_pci_sq_size(const struct nvme_queue *q) +{ + return q->q_depth << q->sqes; +} + +static inline size_t nvme_pci_cq_size(const struct nvme_queue *q) +{ + return q->q_depth * sizeof(struct nvme_completion); +} + +/* bits for iod->flags */ +enum nvme_iod_flags { + /* this command has been aborted by the timeout handler */ + IOD_ABORTED = 1U << 0, + + /* uses the small descriptor pool */ + IOD_SMALL_DESCRIPTOR = 1U << 1, + + /* single segment dma mapping */ + IOD_SINGLE_SEGMENT = 1U << 2, + + /* Data payload contains p2p memory */ + IOD_DATA_P2P = 1U << 3, + + /* Metadata contains p2p memory */ + IOD_META_P2P = 1U << 4, + + /* Data payload contains MMIO memory */ + IOD_DATA_MMIO = 1U << 5, + + /* Metadata contains MMIO memory */ + IOD_META_MMIO = 1U << 6, + + /* Metadata using non-coalesced MPTR */ + IOD_SINGLE_META_SEGMENT = 1U << 7, +}; + +struct nvme_dma_vec { + dma_addr_t addr; + unsigned int len; +}; + +/* + * The nvme_iod describes the data in an I/O. + */ +struct nvme_iod { + struct nvme_request req; + struct nvme_command cmd; + u8 flags; + u8 nr_descriptors; + + size_t total_len; + struct dma_iova_state dma_state; + void *descriptors[NVME_MAX_NR_DESCRIPTORS]; + struct nvme_dma_vec *dma_vecs; + unsigned int nr_dma_vecs; + + dma_addr_t meta_dma; + size_t meta_total_len; + struct dma_iova_state meta_dma_state; + struct nvme_sgl_desc *meta_descriptor; + void *dma_private; +}; + +extern const struct nvme_pci_dma_ops nvme_pci_apple_h9p_ops; + +#endif /* _NVME_PCI_INTERNAL_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3e03b898fbbc20..7ff959e6ce8e0a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -5,7 +5,6 @@ */ #include -#include #include #include #include @@ -35,9 +34,7 @@ #include "trace.h" #include "nvme.h" - -#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) -#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) +#include "pci-internal.h" /* Optimisation for I/Os between 4k and 128k */ #define NVME_SMALL_POOL_SIZE 256 @@ -46,7 +43,6 @@ * Arbitrary upper bound. */ #define NVME_MAX_BYTES SZ_8M -#define NVME_MAX_NR_DESCRIPTORS 5 /* * For data SGLs we support a single descriptors worth of SGL entries. @@ -281,69 +277,10 @@ static bool noacpi; module_param(noacpi, bool, 0444); MODULE_PARM_DESC(noacpi, "disable acpi bios quirks"); -struct nvme_dev; -struct nvme_queue; -struct apple_h9p_nvme; - static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_delete_io_queues(struct nvme_dev *dev); static void nvme_update_attrs(struct nvme_dev *dev); -struct nvme_descriptor_pools { - struct dma_pool *large; - struct dma_pool *small; -}; - -/* - * Represents an NVM Express device. Each nvme_dev is a PCI function. - */ -struct nvme_dev { - struct nvme_queue *queues; - struct blk_mq_tag_set tagset; - struct blk_mq_tag_set admin_tagset; - u32 __iomem *dbs; - struct device *dev; - unsigned online_queues; - unsigned max_qid; - unsigned io_queues[HCTX_MAX_TYPES]; - unsigned int num_vecs; - u32 q_depth; - int io_sqes; - u32 db_stride; - void __iomem *bar; - unsigned long bar_mapped_size; - struct mutex shutdown_lock; - bool subsystem; - u64 cmb_size; - bool cmb_use_sqes; - u32 cmbsz; - u32 cmbloc; - struct nvme_ctrl ctrl; - u32 last_ps; - bool hmb; - struct sg_table *hmb_sgt; - mempool_t *dmavec_mempool; - struct apple_h9p_nvme *apple_h9p; - - /* shadow doorbell buffer support: */ - __le32 *dbbuf_dbs; - dma_addr_t dbbuf_dbs_dma_addr; - __le32 *dbbuf_eis; - dma_addr_t dbbuf_eis_dma_addr; - - /* host memory buffer support: */ - u64 host_mem_size; - u32 nr_host_mem_descs; - u32 host_mem_descs_size; - dma_addr_t host_mem_descs_dma; - struct nvme_host_mem_buf_desc *host_mem_descs; - void **host_mem_desc_bufs; - unsigned int nr_allocated_queues; - unsigned int nr_write_queues; - unsigned int nr_poll_queues; - struct nvme_descriptor_pools descriptor_pools[]; -}; - static int io_queue_depth_set(const char *val, const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE, @@ -365,95 +302,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) return container_of(ctrl, struct nvme_dev, ctrl); } -/* - * An NVM Express queue. Each device has at least two (one for admin - * commands and one for I/O commands). - */ -struct nvme_queue { - struct nvme_dev *dev; - struct nvme_descriptor_pools descriptor_pools; - spinlock_t sq_lock; - void *sq_cmds; - /* only used for poll queues: */ - spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; - struct nvme_completion *cqes; - dma_addr_t sq_dma_addr; - dma_addr_t cq_dma_addr; - u32 __iomem *q_db; - u32 q_depth; - u16 cq_vector; - u16 sq_tail; - u16 last_sq_tail; - u16 cq_head; - u16 qid; - u8 cq_phase; - u8 sqes; - unsigned long flags; -#define NVMEQ_ENABLED 0 -#define NVMEQ_SQ_CMB 1 -#define NVMEQ_DELETE_ERROR 2 -#define NVMEQ_POLLED 3 - __le32 *dbbuf_sq_db; - __le32 *dbbuf_cq_db; - __le32 *dbbuf_sq_ei; - __le32 *dbbuf_cq_ei; - struct completion delete_done; -}; - -/* bits for iod->flags */ -enum nvme_iod_flags { - /* this command has been aborted by the timeout handler */ - IOD_ABORTED = 1U << 0, - - /* uses the small descriptor pool */ - IOD_SMALL_DESCRIPTOR = 1U << 1, - - /* single segment dma mapping */ - IOD_SINGLE_SEGMENT = 1U << 2, - - /* Data payload contains p2p memory */ - IOD_DATA_P2P = 1U << 3, - - /* Metadata contains p2p memory */ - IOD_META_P2P = 1U << 4, - - /* Data payload contains MMIO memory */ - IOD_DATA_MMIO = 1U << 5, - - /* Metadata contains MMIO memory */ - IOD_META_MMIO = 1U << 6, - - /* Metadata using non-coalesced MPTR */ - IOD_SINGLE_META_SEGMENT = 1U << 7, -}; - -struct nvme_dma_vec { - dma_addr_t addr; - unsigned int len; -}; - -/* - * The nvme_iod describes the data in an I/O. - */ -struct nvme_iod { - struct nvme_request req; - struct nvme_command cmd; - u8 flags; - u8 nr_descriptors; - - size_t total_len; - struct dma_iova_state dma_state; - void *descriptors[NVME_MAX_NR_DESCRIPTORS]; - struct nvme_dma_vec *dma_vecs; - unsigned int nr_dma_vecs; - - dma_addr_t meta_dma; - size_t meta_total_len; - struct dma_iova_state meta_dma_state; - struct nvme_sgl_desc *meta_descriptor; - void *apple_h9p_req; -}; - static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) { return dev->nr_allocated_queues * 8 * dev->db_stride; @@ -894,8 +742,6 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, le32_to_cpu(sg_list[i].length), dir, attrs); } -#include "pci-apple-h9p.c" - static void nvme_unmap_metadata(struct request *req) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; @@ -942,7 +788,8 @@ static void nvme_unmap_data(struct request *req) struct device *dma_dev = nvmeq->dev->dev; unsigned int attrs = 0; - if (nvmeq->dev->apple_h9p && nvme_pci_apple_h9p_unmap_data(req)) + if (nvmeq->dev->dma_ops && nvmeq->dev->dma_ops->unmap_data && + nvmeq->dev->dma_ops->unmap_data(req)) return; if (iod->flags & IOD_SINGLE_SEGMENT) { @@ -1259,8 +1106,8 @@ static blk_status_t nvme_map_data(struct request *req) struct blk_dma_iter iter; blk_status_t ret; - if (dev->apple_h9p) { - ret = nvme_pci_apple_h9p_map_data(req); + if (dev->dma_ops && dev->dma_ops->map_data) { + ret = dev->dma_ops->map_data(req); if (ret != BLK_STS_NOTSUPP) return ret; } @@ -1421,7 +1268,7 @@ static blk_status_t nvme_prep_rq(struct request *req) iod->meta_total_len = 0; iod->nr_dma_vecs = 0; iod->dma_vecs = NULL; - iod->apple_h9p_req = NULL; + iod->dma_private = NULL; ret = nvme_setup_cmd(req->q->queuedata, req); if (ret) @@ -2031,17 +1878,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) static void nvme_free_queue(struct nvme_queue *nvmeq) { - dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq), - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); + dma_free_coherent(nvmeq->dev->dev, nvme_pci_cq_size(nvmeq), + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); if (!nvmeq->sq_cmds) return; if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), - nvmeq->sq_cmds, SQ_SIZE(nvmeq)); + nvmeq->sq_cmds, nvme_pci_sq_size(nvmeq)); } else { - dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq), - nvmeq->sq_cmds, nvmeq->sq_dma_addr); + dma_free_coherent(nvmeq->dev->dev, nvme_pci_sq_size(nvmeq), + nvmeq->sq_cmds, nvmeq->sq_dma_addr); } } @@ -2128,7 +1975,8 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, struct pci_dev *pdev = to_pci_dev(dev->dev); if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { - nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq)); + nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, + nvme_pci_sq_size(nvmeq)); if (nvmeq->sq_cmds) { nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, nvmeq->sq_cmds); @@ -2137,12 +1985,13 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } - pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq)); + pci_free_p2pmem(pdev, nvmeq->sq_cmds, + nvme_pci_sq_size(nvmeq)); } } - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq), - &nvmeq->sq_dma_addr, GFP_KERNEL); + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, nvme_pci_sq_size(nvmeq), + &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) return -ENOMEM; return 0; @@ -2157,7 +2006,7 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES; nvmeq->q_depth = depth; - nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq), + nvmeq->cqes = dma_alloc_coherent(dev->dev, nvme_pci_cq_size(nvmeq), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) goto free_nvmeq; @@ -2177,8 +2026,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) return 0; free_cqdma: - dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes, - nvmeq->cq_dma_addr); + dma_free_coherent(dev->dev, nvme_pci_cq_size(nvmeq), + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); free_nvmeq: return -ENOMEM; } @@ -2206,7 +2055,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; - memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq)); + memset((void *)nvmeq->cqes, 0, nvme_pci_cq_size(nvmeq)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; wmb(); /* ensure the first interrupt sees the initialization */ @@ -2408,9 +2257,11 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); - result = nvme_pci_apple_h9p_prepare_enable(dev); - if (result) - return result; + if (dev->dma_ops && dev->dma_ops->prepare_enable) { + result = dev->dma_ops->prepare_enable(dev); + if (result) + return result; + } result = nvme_enable_ctrl(&dev->ctrl); if (result) @@ -3013,10 +2864,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = nvme_setup_io_queues_trylock(dev); if (result) return result; - reuse_single_vector = dev->apple_h9p && - (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) && - pdev->msi_enabled && - test_bit(NVMEQ_ENABLED, &adminq->flags); + reuse_single_vector = dev->dma_ops && dev->dma_ops->reuse_admin_irq && + dev->dma_ops->reuse_admin_irq(dev, pdev, adminq); if (!reuse_single_vector && test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) @@ -3294,14 +3143,15 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", dev->q_depth); } - if (dev->apple_h9p && - dev->q_depth > APPLE_H9P_NVMMU_MAX_REQS) - dev->q_depth = APPLE_H9P_NVMMU_MAX_REQS; + if (dev->dma_ops && dev->dma_ops->queue_depth) + dev->q_depth = dev->dma_ops->queue_depth(dev, dev->q_depth); dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ - result = nvme_pci_apple_h9p_preinit(dev); - if (result) - goto free_irq; + if (dev->dma_ops && dev->dma_ops->preinit) { + result = dev->dma_ops->preinit(dev); + if (result) + goto free_irq; + } nvme_map_cmb(dev); @@ -3425,7 +3275,8 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) nvme_free_tagset(dev); put_device(dev->dev); kfree(dev->queues); - kfree(dev->apple_h9p); + if (dev->dma_ops && dev->dma_ops->exit) + dev->dma_ops->exit(dev); kfree(dev); } @@ -3760,13 +3611,14 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks |= qentry->enabled_quirks; quirks &= ~qentry->disabled_quirks; } - if (nvme_pci_is_apple_h9p(pdev)) { - quirks |= NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_SHARED_TAGS; - dev->apple_h9p = kzalloc_node(sizeof(*dev->apple_h9p), - GFP_KERNEL, node); - if (!dev->apple_h9p) + if (pdev->vendor == PCI_VENDOR_ID_APPLE && + pdev->device == PCI_DEVICE_ID_APPLE_H9P_NVME) + dev->dma_ops = &nvme_pci_apple_h9p_ops; + if (dev->dma_ops) { + quirks |= dev->dma_ops->quirks; + ret = dev->dma_ops->init(dev, node); + if (ret) goto out_put_device; - spin_lock_init(&dev->apple_h9p->req_lock); } ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); @@ -3787,10 +3639,10 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev->ctrl.max_hw_sectors = min_t(u32, NVME_MAX_BYTES >> SECTOR_SHIFT, dma_opt_mapping_size(&pdev->dev) >> 9); - if (dev->apple_h9p) + if (dev->dma_ops && dev->dma_ops->max_hw_sectors) dev->ctrl.max_hw_sectors = - min_t(u32, dev->ctrl.max_hw_sectors, - APPLE_H9P_NVME_MAX_SECTORS); + dev->dma_ops->max_hw_sectors(dev, + dev->ctrl.max_hw_sectors); dev->ctrl.max_segments = NVME_MAX_SEGS; dev->ctrl.max_integrity_segments = 1; return dev; @@ -3798,7 +3650,8 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, out_put_device: put_device(dev->dev); kfree(dev->queues); - kfree(dev->apple_h9p); + if (dev->dma_ops && dev->dma_ops->exit) + dev->dma_ops->exit(dev); out_free_dev: kfree(dev); return ERR_PTR(ret); diff --git a/drivers/pci/controller/pcie-apple-h9p.c b/drivers/pci/controller/pcie-apple-h9p.c index 5ea301e2e2a7f9..2b0c2bb9a3b22a 100644 --- a/drivers/pci/controller/pcie-apple-h9p.c +++ b/drivers/pci/controller/pcie-apple-h9p.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -31,7 +30,6 @@ #include #include #include -#include #include #include @@ -43,16 +41,64 @@ #define H9P_MSI_PER_PORT (H9P_NUM_MSI / H9P_NUM_PORTS) #define H9P_CFG_PORT_STRIDE 0x8000 - -#define H9P_PHY0_PORTSTAT(port) (0x0100 + (port) * 0x0080) +#define H9P_CFG_PORT_MISC 0x08e0 + +#define H9P_PHY0_COMMON_CTL0 0x0004 +#define H9P_PHY0_COMMON_CTL1 0x0014 +#define H9P_PHY0_COMMON_CTL2 0x0024 +#define H9P_PHY0_COMMON_CTL3 0x0034 +#define H9P_PHY0_COMMON_CTL_ENABLE BIT(0) +#define H9P_PHY0_COMMON_CTL_INIT BIT(4) +#define H9P_PHY0_PORT_STRIDE 0x0080 +#define H9P_PHY0_PORTSTAT(port) (0x0100 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_PORT_CTL0(port) (0x0100 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_PORT_CTL1(port) (0x0124 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_PORT_CTL2(port) (0x0134 + (port) * H9P_PHY0_PORT_STRIDE) +#define H9P_PHY0_COMMON_STAT 0x0028 +#define H9P_PHY0_COMMON_STAT_INIT_DONE BIT(4) +#define H9P_PHY0_COMMON_STAT_READY BIT(0) +#define H9P_PHY0_PORT_LINK_RATE(port) (0x4020 + (port) * 0x0040) #define H9P_PHY1_PORTMASK 0x000c +#define H9P_PHY2_EQ_COMMON0 0x0180 +#define H9P_PHY2_EQ_COMMON1 0x0184 +#define H9P_PHY2_EQ_TIME0 0x0090 +#define H9P_PHY2_EQ_TIME1 0x0098 +#define H9P_PHY2_PORT_STRIDE 0x0800 +#define H9P_PHY2_PORT(port, reg) ((reg) + (port) * H9P_PHY2_PORT_STRIDE) +#define H9P_PHY2_PORT_EQ_CTL 0x10088 +#define H9P_PHY2_PORT_IDLE 0x10784 +#define H9P_PHY2_PORT_EQ_PRESET 0x10004 +#define H9P_PHY2_PORT_RX_CTL0 0x20788 +#define H9P_PHY2_PORT_RX_CTL1 0x207a0 +#define H9P_PHY2_PORT_RX_CTL2 0x207a8 +#define H9P_PHY2_PORT_RX_CTL3 0x20400 +#define H9P_PHY2_PORT_TIMER0 0x2009c +#define H9P_PHY2_PORT_TIMER1 0x200dc +#define H9P_PHY2_PORT_TIMER2 0x200a0 +#define H9P_PHY2_PORT_TIMER3 0x200e0 +#define H9P_PHY2_PORT_TIMER4 0x200a4 +#define H9P_PHY2_PORT_TIMER5 0x200e4 +#define H9P_PHY2_PORT_CLEAR0 0x20330 +#define H9P_PHY2_PORT_CLEAR1 0x20340 +#define H9P_PHY2_PORT_CLEAR2 0x20350 + #define H9P_PORT_LTSSMCTL 0x0080 +#define H9P_PORT_LTSSM_ENABLE BIT(0) #define H9P_PORT_IRQSTAT 0x0100 #define H9P_PORT_IRQMASK 0x0104 +#define H9P_PORT_IRQMASK_PRE_LINK 0xff002fff +#define H9P_PORT_IRQSTAT_PRE_LINK 0x00ffd000 +#define H9P_PORT_IRQMASK_LINK_UP 0xff002f0f +#define H9P_PORT_PWRCTL 0x0124 +#define H9P_PORT_PWRCTL_INIT 0x31 #define H9P_PORT_MSIVECBASE 0x0128 #define H9P_PORT_ENABLE 0x0140 +#define H9P_PORT_ENABLE_APPLE BIT(31) #define H9P_PORT_LINKSTS 0x0208 +#define H9P_PORT_LINKSTS_LTSSM GENMASK(13, 8) +#define H9P_PORT_LTSSM_DETECT 0x11 +#define H9P_PORT_LTSSM_L0 0x14 #define H9P_LINK_SPEED_2_5GT 1 #define H9P_LINK_SPEED_8GT 3 @@ -61,6 +107,10 @@ #define H9P_PCIECLK_POSTUP1 0x000c #define H9P_PCIECLK_POSTUP2 0x4104 #define H9P_PCIECLK_POSTUP3 0x4100 +#define H9P_PCIECLK_POSTUP0_VALUE 0x00000007 +#define H9P_PCIECLK_POSTUP1_VALUE 0x80010005 +#define H9P_PCIECLK_POSTUP2_VALUE 0x00000003 +#define H9P_PCIECLK_POSTUP3_VALUE 0x00000003 #define H9P_NVMMU_TCB_CTRL 0x0004 #define H9P_NVMMU_TCB_BASE_LO 0x0008 @@ -83,16 +133,6 @@ #define H9P_DEFAULT_MSI_DOORBELL 0xbffff000ULL -#define APPLE_PMGR_AUTO_ENABLE BIT(28) -#define APPLE_PMGR_WAS_CLKGATED BIT(9) -#define APPLE_PMGR_WAS_PWRGATED BIT(8) -#define APPLE_PMGR_PS_ACTUAL GENMASK(7, 4) -#define APPLE_PMGR_PS_TARGET GENMASK(3, 0) -#define APPLE_PMGR_FLAGS (APPLE_PMGR_WAS_CLKGATED | \ - APPLE_PMGR_WAS_PWRGATED) -#define APPLE_PMGR_PS_ACTIVE 0xf -#define APPLE_PMGR_PS_SET_TIMEOUT_US 10000 - struct apple_h9p_tunable { u32 offset; u32 size; @@ -331,95 +371,6 @@ static struct apple_h9p_pcie *apple_h9p_pcie_lookup(struct device *dev) return bridge ? pci_host_bridge_priv(bridge) : NULL; } -static int apple_h9p_pcie_force_power_domain(struct apple_h9p_pcie *pcie, - struct device_node *pd_np) -{ - struct device *dev = pcie->dev; - struct device_node *pmgr_np; - struct regmap *regmap; - u32 offset; - u32 val; - int ret; - - ret = of_property_read_u32_index(pd_np, "reg", 0, &offset); - if (ret) - return dev_err_probe(dev, ret, "%pOF missing PMGR reg\n", - pd_np); - - pmgr_np = of_get_parent(pd_np); - if (!pmgr_np) - return dev_err_probe(dev, -EINVAL, - "%pOF has no PMGR parent\n", pd_np); - - regmap = syscon_node_to_regmap(pmgr_np); - of_node_put(pmgr_np); - if (IS_ERR(regmap)) - return dev_err_probe(dev, PTR_ERR(regmap), - "%pOF missing PMGR regmap\n", pd_np); - - ret = regmap_read(regmap, offset, &val); - if (ret) - return dev_err_probe(dev, ret, - "%pOF PMGR read failed\n", pd_np); - - val &= ~(APPLE_PMGR_AUTO_ENABLE | APPLE_PMGR_FLAGS | - APPLE_PMGR_PS_TARGET); - val |= FIELD_PREP(APPLE_PMGR_PS_TARGET, APPLE_PMGR_PS_ACTIVE); - - ret = regmap_write(regmap, offset, val); - if (ret) - return dev_err_probe(dev, ret, - "%pOF PMGR write failed\n", pd_np); - - ret = regmap_read_poll_timeout_atomic(regmap, offset, val, - FIELD_GET(APPLE_PMGR_PS_ACTUAL, - val) == - APPLE_PMGR_PS_ACTIVE, 1, - APPLE_PMGR_PS_SET_TIMEOUT_US); - if (ret) - return dev_err_probe(dev, ret, - "%pOF PMGR active timeout\n", pd_np); - - val &= ~APPLE_PMGR_FLAGS; - val |= APPLE_PMGR_AUTO_ENABLE; - - ret = regmap_write(regmap, offset, val); - if (ret) - return dev_err_probe(dev, ret, - "%pOF PMGR auto-enable failed\n", - pd_np); - - return 0; -} - -static int apple_h9p_pcie_force_power_domains(struct apple_h9p_pcie *pcie) -{ - struct device *dev = pcie->dev; - struct device_node *pd_np; - int count; - int i; - int ret; - - count = of_count_phandle_with_args(dev->of_node, "power-domains", - "#power-domain-cells"); - if (count <= 0) - return 0; - - for (i = 0; i < count; i++) { - pd_np = of_parse_phandle(dev->of_node, "power-domains", i); - if (!pd_np) - return dev_err_probe(dev, -EINVAL, - "missing power-domain %d\n", i); - - ret = apple_h9p_pcie_force_power_domain(pcie, pd_np); - of_node_put(pd_np); - if (ret) - return ret; - } - - return 0; -} - static int apple_h9p_pcie_config_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { @@ -1000,10 +951,14 @@ static int apple_h9p_pcieclk_postup(struct apple_h9p_pcie *pcie) if (!pcie->base_pcieclk_postup) return 0; - writel(0x00000007, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP0); - writel(0x80010005, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP1); - writel(0x00000003, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP2); - writel(0x00000003, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP3); + writel(H9P_PCIECLK_POSTUP0_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP0); + writel(H9P_PCIECLK_POSTUP1_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP1); + writel(H9P_PCIECLK_POSTUP2_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP2); + writel(H9P_PCIECLK_POSTUP3_VALUE, + pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP3); return 0; } @@ -1012,8 +967,8 @@ static bool apple_h9p_link_up(struct apple_h9p_pcie *pcie, unsigned int port) { u32 linksts = readl(pcie->base_port[port] + H9P_PORT_LINKSTS); - linksts = (linksts >> 8) & 0x3f; - return linksts >= 0x11 && linksts <= 0x14; + linksts = FIELD_GET(H9P_PORT_LINKSTS_LTSSM, linksts); + return linksts >= H9P_PORT_LTSSM_DETECT && linksts <= H9P_PORT_LTSSM_L0; } static int apple_h9p_setup_port(struct apple_h9p_pcie *pcie, unsigned int port) @@ -1027,23 +982,25 @@ static int apple_h9p_setup_port(struct apple_h9p_pcie *pcie, unsigned int port) gpiod_direction_output(pcie->perst[port], 0); - h9p_rmw(pcie->base_phy[0] + 0x134 + 0x80 * port, 1, 0); - h9p_rmw(pcie->base_phy[0] + 0x124 + 0x80 * port, 0, 1); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL2(port), 1, 0); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL1(port), 0, 1); - ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 0x10, 0x10, 0x10, - 250000); + ret = apple_h9p_wait(pcie->base_phy[0] + H9P_PHY0_COMMON_STAT, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, 250000); if (ret) return dev_err_probe(dev, ret, "port %u init timeout\n", port); usleep_range(250, 1000); - h9p_rmw(pcie->base_phy[0] + 0x100 + 0x80 * port, 0, 1); - h9p_rmw(pcie->base_phy[0] + 0x100 + 0x80 * port, 0x100, 0); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL0(port), 0, 1); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL0(port), 0x100, 0); usleep_range(500, 1000); - h9p_rmw(pcie->base_phy[0] + 0x134 + 0x80 * port, 0, 1); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL2(port), 0, 1); writel(port ? 0 : H9P_LINK_SPEED_8GT, - pcie->base_phy[0] + 0x4020 + 0x40 * port); - h9p_rmw(pcie->base_phy[0] + 0x124 + 0x80 * port, 0x100, 0); + pcie->base_phy[0] + H9P_PHY0_PORT_LINK_RATE(port)); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL1(port), 0x100, 0); cap = apple_h9p_read_pci_cap(pcie, port << 3, PCI_CAP_ID_EXP); if (cap) @@ -1057,14 +1014,17 @@ static int apple_h9p_setup_port(struct apple_h9p_pcie *pcie, unsigned int port) apple_h9p_apply_tunables(pcie->base_port[port], h9p_port_tunables, ARRAY_SIZE(h9p_port_tunables)); - h9p_rmw(pcie->base_config + port * H9P_CFG_PORT_STRIDE + 0x8e0, - 0, 1); + h9p_rmw(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + H9P_CFG_PORT_MISC, 0, 1); - writel(0xff002fff, pcie->base_port[port] + H9P_PORT_IRQMASK); - writel(0x00ffd000, pcie->base_port[port] + H9P_PORT_IRQSTAT); + writel(H9P_PORT_IRQMASK_PRE_LINK, + pcie->base_port[port] + H9P_PORT_IRQMASK); + writel(H9P_PORT_IRQSTAT_PRE_LINK, + pcie->base_port[port] + H9P_PORT_IRQSTAT); - h9p_rmw(pcie->base_port[port] + H9P_PORT_ENABLE, 0, 0x80000000); - writel(0x31, pcie->base_port[port] + 0x124); + h9p_rmw(pcie->base_port[port] + H9P_PORT_ENABLE, 0, + H9P_PORT_ENABLE_APPLE); + writel(H9P_PORT_PWRCTL_INIT, pcie->base_port[port] + H9P_PORT_PWRCTL); writel(port * 0x10001 * H9P_MSI_PER_PORT, pcie->base_port[port] + H9P_PORT_MSIVECBASE); @@ -1083,33 +1043,46 @@ static int apple_h9p_setup_port(struct apple_h9p_pcie *pcie, unsigned int port) return dev_err_probe(dev, ret, "port %u PHY up timeout\n", port); - h9p_rmw(pcie->base_phy[2] + 0x180, 0, 0x4000); - h9p_rmw(pcie->base_phy[2] + 0x184, 0, 0x4000); - h9p_rmw(pcie->base_phy[2] + 0x90, 0xfff, 100); - h9p_rmw(pcie->base_phy[2] + 0x98, 0xfff, 25); - h9p_rmw(pcie->base_phy[2] + 0x10088 + 0x800 * port, 0, 0x4000); - writel(0, pcie->base_phy[2] + 0x10784 + 0x800 * port); - h9p_rmw(pcie->base_phy[2] + 0x10004 + 0x800 * port, 0xfff, 0x600); - writel(0x3105, pcie->base_phy[2] + 0x20788 + 0x800 * port); - h9p_rmw(pcie->base_phy[2] + 0x207a0 + 0x800 * port, 0xff, 0x9f); - h9p_rmw(pcie->base_phy[2] + 0x207a8 + 0x800 * port, 0xff, 0x01); - h9p_rmw(pcie->base_phy[2] + 0x20400 + 0x800 * port, 0x1f, 0x0a); - writel(175, pcie->base_phy[2] + 0x2009c + 0x800 * port); - writel(175, pcie->base_phy[2] + 0x200dc + 0x800 * port); - writel(333, pcie->base_phy[2] + 0x200a0 + 0x800 * port); - writel(333, pcie->base_phy[2] + 0x200e0 + 0x800 * port); - writel(530, pcie->base_phy[2] + 0x200a4 + 0x800 * port); - writel(530, pcie->base_phy[2] + 0x200e4 + 0x800 * port); - writel(0, pcie->base_phy[2] + 0x20330 + 0x800 * port); - writel(0, pcie->base_phy[2] + 0x20340 + 0x800 * port); - writel(0, pcie->base_phy[2] + 0x20350 + 0x800 * port); - - writel(0xff002f0f, pcie->base_port[port] + H9P_PORT_IRQMASK); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_COMMON0, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_COMMON1, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_TIME0, 0xfff, 100); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_EQ_TIME1, 0xfff, 25); + h9p_rmw(pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_EQ_CTL), + 0, 0x4000); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_IDLE)); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_EQ_PRESET), 0xfff, 0x600); + writel(0x3105, pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL0)); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL1), 0xff, 0x9f); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL2), 0xff, 0x01); + h9p_rmw(pcie->base_phy[2] + + H9P_PHY2_PORT(port, H9P_PHY2_PORT_RX_CTL3), 0x1f, 0x0a); + writel(175, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER0)); + writel(175, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER1)); + writel(333, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER2)); + writel(333, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER3)); + writel(530, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER4)); + writel(530, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_TIMER5)); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_CLEAR0)); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_CLEAR1)); + writel(0, pcie->base_phy[2] + H9P_PHY2_PORT(port, H9P_PHY2_PORT_CLEAR2)); + + writel(H9P_PORT_IRQMASK_LINK_UP, + pcie->base_port[port] + H9P_PORT_IRQMASK); usleep_range(5000, 10000); - h9p_rmw(pcie->base_port[port] + H9P_PORT_LTSSMCTL, 0, 1); + h9p_rmw(pcie->base_port[port] + H9P_PORT_LTSSMCTL, 0, + H9P_PORT_LTSSM_ENABLE); ret = apple_h9p_wait(pcie->base_port[port] + H9P_PORT_LINKSTS, - 0x3f00, 0x1100, 0x1400, 500000); + H9P_PORT_LINKSTS_LTSSM, + FIELD_PREP(H9P_PORT_LINKSTS_LTSSM, + H9P_PORT_LTSSM_DETECT), + FIELD_PREP(H9P_PORT_LINKSTS_LTSSM, + H9P_PORT_LTSSM_L0), + 500000); if (ret) dev_warn(dev, "port %u link did not reach L0\n", port); @@ -1121,26 +1094,36 @@ static int apple_h9p_setup_ports(struct apple_h9p_pcie *pcie) unsigned int port; int ret; - writel(0x10, pcie->base_phy[0] + 0x0004); - h9p_rmw(pcie->base_phy[0] + 0x124, 0, 1); + writel(H9P_PHY0_COMMON_CTL_INIT, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL0); + h9p_rmw(pcie->base_phy[0] + H9P_PHY0_PORT_CTL1(0), 0, + H9P_PHY0_COMMON_CTL_ENABLE); - ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 0x10, 0x10, 0x10, - 250000); + ret = apple_h9p_wait(pcie->base_phy[0] + H9P_PHY0_COMMON_STAT, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, + H9P_PHY0_COMMON_STAT_INIT_DONE, 250000); if (ret) return dev_err_probe(pcie->dev, ret, "global PHY init timeout\n"); - ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 1, 1, 1, 250000); + ret = apple_h9p_wait(pcie->base_phy[0] + H9P_PHY0_COMMON_STAT, + H9P_PHY0_COMMON_STAT_READY, + H9P_PHY0_COMMON_STAT_READY, + H9P_PHY0_COMMON_STAT_READY, 250000); if (ret) return dev_err_probe(pcie->dev, ret, "global PHY ready timeout\n"); - writel(1, pcie->base_phy[0] + 0x34); + writel(H9P_PHY0_COMMON_CTL_ENABLE, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL3); apple_h9p_apply_tunables(pcie->base_phy[0], h9p_phy0_tunables, ARRAY_SIZE(h9p_phy0_tunables)); - writel(1, pcie->base_phy[0] + 0x14); + writel(H9P_PHY0_COMMON_CTL_ENABLE, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL1); usleep_range(5000, 10000); - writel(1, pcie->base_phy[0] + 0x24); + writel(H9P_PHY0_COMMON_CTL_ENABLE, + pcie->base_phy[0] + H9P_PHY0_COMMON_CTL2); usleep_range(500, 1000); for (port = 0; port < H9P_NUM_PORTS; port++) { @@ -1317,10 +1300,6 @@ static int apple_h9p_pcie_probe(struct platform_device *pdev) if (ret) return ret; - ret = apple_h9p_pcie_force_power_domains(pcie); - if (ret) - return ret; - pcie->pinctrl = devm_pinctrl_get_select_default(dev); if (PTR_ERR(pcie->pinctrl) == -ENODEV) pcie->pinctrl = NULL;