From 5d758f1d5fadcb3cb5ec0e82a5c4d8bfc08f94d1 Mon Sep 17 00:00:00 2001 From: Paul Praschl <101004482+Pauli1Go@users.noreply.github.com> Date: Sun, 28 Jun 2026 22:52:55 +0200 Subject: [PATCH 1/2] apple: t8010: add clean H9P NVMe storage --- .../bindings/pci/apple,t8010-pcie.yaml | 237 +++ arch/arm64/boot/dts/apple/t8010-ipad7.dtsi | 117 ++ drivers/iommu/Kconfig | 11 + drivers/iommu/Makefile | 1 + drivers/iommu/apple-t8010-dart.c | 680 ++++++++ drivers/nvme/host/pci.c | 444 +++++- drivers/pci/controller/Kconfig | 13 + drivers/pci/controller/Makefile | 1 + drivers/pci/controller/pcie-apple-h9p.c | 1368 +++++++++++++++++ include/linux/apple-h9p-pcie.h | 28 + 10 files changed, 2889 insertions(+), 11 deletions(-) create mode 100644 Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml create mode 100644 drivers/iommu/apple-t8010-dart.c create mode 100644 drivers/pci/controller/pcie-apple-h9p.c create mode 100644 include/linux/apple-h9p-pcie.h diff --git a/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml b/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml new file mode 100644 index 00000000000000..db6b1e88180262 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/apple,t8010-pcie.yaml @@ -0,0 +1,237 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/apple,t8010-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Apple H9P/T8010 PCIe Host Controller + +maintainers: + - Hector Martin + +description: + Apple A10/T8010 devices use an older H9P PCIe root complex for the + internal storage path. It exposes one ECAM window shared by up to four + root ports, controller-specific PHY and port register windows, and a + controller-local MSI block. + +allOf: + - $ref: /schemas/pci/pci-host-bridge.yaml# + - $ref: /schemas/interrupt-controller/msi-controller.yaml# + +properties: + compatible: + const: apple,t8010-pcie + + reg: + minItems: 10 + maxItems: 10 + + reg-names: + items: + - const: config + - const: phy0 + - const: phy1 + - const: phy2 + - const: port0 + - const: port1 + - const: port2 + - const: port3 + - const: nvmmu0 + - const: pcieclk-postup + + interrupts: + description: + Four port state interrupts followed by 32 MSI interrupts and the + optional NVMMU fault interrupt for the active storage port. + minItems: 37 + maxItems: 37 + + clocks: + minItems: 3 + maxItems: 3 + + clock-names: + items: + - const: core + - const: aux + - const: ref + + power-domains: + minItems: 3 + maxItems: 3 + + power-domain-names: + items: + - const: core + - const: aux + - const: ref + + reset-gpios: + description: + PERST# GPIOs indexed by PCIe root port. + minItems: 1 + maxItems: 4 + + clkreq-gpios: + description: + CLKREQ# GPIOs indexed by PCIe root port. + minItems: 1 + maxItems: 4 + + msi-controller: true + + msi-parent: true + + apple,msi-doorbell: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + MSI doorbell address programmed into downstream endpoints. + + apple,enabled-ports: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Bitmask of root ports that should be powered and trained. + minimum: 1 + maximum: 15 + + apple,nvmmu-iova: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Base device-visible address for the reserved NVMMU/SART window. + + memory-region: + description: + Reserved physical window programmed into the H9P NVMMU/SART path. + maxItems: 1 + + interrupt-controller: true + + '#interrupt-cells': + const: 1 + +required: + - compatible + - reg + - reg-names + - interrupts + - clocks + - clock-names + - power-domains + - power-domain-names + - reset-gpios + - clkreq-gpios + - bus-range + - ranges + - msi-controller + - msi-parent + - apple,enabled-ports + - apple,nvmmu-iova + - memory-region + - '#interrupt-cells' + +unevaluatedProperties: false + +examples: + - | + #include + #include + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + pcie0_nvmmu_window: nvmmu-window@8bee00000 { + reg = <0x8 0xbee00000 0x0 0x01200000>; + no-map; + }; + }; + + soc { + #address-cells = <2>; + #size-cells = <2>; + + pcie0: pcie@610000000 { + compatible = "apple,t8010-pcie"; + device_type = "pci"; + + reg = <0x6 0x10000000 0x0 0x1000000>, + <0x6 0x00000000 0x0 0x8000>, + <0x6 0x00008000 0x0 0x4000>, + <0x6 0x0a000000 0x0 0x40000>, + <0x6 0x01000000 0x0 0x4000>, + <0x6 0x02000000 0x0 0x4000>, + <0x6 0x03000000 0x0 0x4000>, + <0x6 0x04000000 0x0 0x4000>, + <0x6 0x01004000 0x0 0x4000>, + <0x6 0x00010000 0x0 0x8000>; + reg-names = "config", "phy0", "phy1", "phy2", + "port0", "port1", "port2", "port3", + "nvmmu0", "pcieclk-postup"; + + interrupt-parent = <&aic>; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + + msi-controller; + msi-parent = <&pcie0>; + apple,msi-doorbell = <0xbffff000>; + apple,enabled-ports = <0x1>; + apple,nvmmu-iova = <0xbc000000>; + memory-region = <&pcie0_nvmmu_window>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + bus-range = <0x00 0x0f>; + ranges = <0x03000000 0x0 0xc0000000 + 0x7 0xc0000000 0x0 0x40000000>; + + clocks = <&clkref>, <&clkref>, <&clkref>; + clock-names = "core", "aux", "ref"; + power-domains = <&ps_pcie>, <&ps_pcie_aux>, + <&ps_pcie_ref>; + power-domain-names = "core", "aux", "ref"; + + reset-gpios = <&pinctrl_ap 12 GPIO_ACTIVE_HIGH>; + clkreq-gpios = <&pinctrl_ap 16 GPIO_ACTIVE_HIGH>; + }; + }; + +... diff --git a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi index bd0e9c0b5696fa..bb89ca3e5df7b9 100644 --- a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi +++ b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi @@ -12,3 +12,120 @@ * now. */ #include "t8010-ipad6.dtsi" + +/ { + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + pcie0_nvmmu_window: nvmmu-window@8bee00000 { + reg = <0x8 0xbee00000 0x0 0x01200000>; + no-map; + }; + }; + + soc { + pcie0_dart0: iommu@601008000 { + compatible = "apple,t8010-dart"; + reg = <0x6 0x01008000 0x0 0x4000>; + #iommu-cells = <1>; + interrupt-parent = <&aic>; + interrupts = ; + power-domains = <&ps_pcie>; + pcie-dart; + }; + + pcie0: pcie@610000000 { + compatible = "apple,t8010-pcie"; + device_type = "pci"; + status = "okay"; + + reg = <0x6 0x10000000 0x0 0x1000000>, + <0x6 0x00000000 0x0 0x8000>, + <0x6 0x00008000 0x0 0x4000>, + <0x6 0x0a000000 0x0 0x40000>, + <0x6 0x01000000 0x0 0x4000>, + <0x6 0x02000000 0x0 0x4000>, + <0x6 0x03000000 0x0 0x4000>, + <0x6 0x04000000 0x0 0x4000>, + <0x6 0x01004000 0x0 0x4000>, + <0x6 0x00010000 0x0 0x8000>; + reg-names = "config", "phy0", "phy1", "phy2", + "port0", "port1", "port2", "port3", + "nvmmu0", "pcieclk-postup"; + + interrupt-parent = <&aic>; + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + + msi-controller; + msi-parent = <&pcie0>; + apple,msi-doorbell = <0xbffff000>; + apple,enabled-ports = <0x1>; + apple,nvmmu-iova = <0xbc000000>; + memory-region = <&pcie0_nvmmu_window>; + + iommu-map = <0x100 &pcie0_dart0 0 1>; + iommu-map-mask = <0xff00>; + + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + interrupt-controller; + bus-range = <0x00 0x0f>; + ranges = <0x03000000 0x0 0xc0000000 + 0x7 0xc0000000 0x0 0x40000000>; + + clocks = <&clkref>, <&clkref>, <&clkref>; + clock-names = "core", "aux", "ref"; + power-domains = <&ps_pcie>, <&ps_pcie_aux>, + <&ps_pcie_ref>; + power-domain-names = "core", "aux", "ref"; + + reset-gpios = <&pinctrl_ap 12 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 13 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 14 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 15 GPIO_ACTIVE_HIGH>; + clkreq-gpios = <&pinctrl_ap 16 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 17 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 18 GPIO_ACTIVE_HIGH>, + <&pinctrl_ap 19 GPIO_ACTIVE_HIGH>; + }; + }; +}; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index f86262b11416d1..a2ba9de4375878 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -315,6 +315,17 @@ config APPLE_DART Say Y here if you are using an Apple SoC. +config APPLE_T8010_DART + tristate "Apple T8010 legacy DART IOMMU support" + depends on ARCH_APPLE || COMPILE_TEST + select IOMMU_API + help + Support for the older DART layout used by Apple A10/T8010 PCIe. + This covers the pre-M1 register layout where TTBRs start at 0x40 + and the PCIe aperture is offset into the device IOVA space. + Enable this when bringing up H9P/Apple NVMe storage on T8010 + devices that cannot use the newer apple-dart register layout. + config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 0275821f4ef985..dc0ef895c74c0c 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -36,4 +36,5 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o +obj-$(CONFIG_APPLE_T8010_DART) += apple-t8010-dart.o obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o diff --git a/drivers/iommu/apple-t8010-dart.c b/drivers/iommu/apple-t8010-dart.c new file mode 100644 index 00000000000000..9d927a4e46593a --- /dev/null +++ b/drivers/iommu/apple-t8010-dart.c @@ -0,0 +1,680 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DART IOMMU on Apple T8010/A10 SoCs + * + * Copyright (C) 2020 Corellium LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DART_TLB_OP 0x0000 +#define DART_TLB_OP_FLUSH 0x00000002 +#define DART_TLB_OP_SID_SHIFT 8 +#define DART_TLB_OP_SID(sid4) (1 << ((sid4) + 8)) +#define DART_TLB_OP_BUSY BIT(3) +#define DART_CONFIG 0x000C +#define DART_CONFIG_TXEN(sid4) (1 << ((sid4) * 8 + 7)) +#define DART_ERROR_STATUS 0x0010 +#define DART_ERROR_AXI_REQ0 0x0014 +#define DART_ERROR_AXI_REQ1 0x0018 +#define DART_ERROR_ADDRESS 0x001C +#define DART_DIAG_CONFIG 0x0020 +#define DART_UNKNOWN_24 0x0024 +#define DART_SID_REMAP 0x0028 +#define DART_UNKNOWN_2C 0x002C +#define DART_FETCH_CONFIG 0x0030 +#define DART_PERF_CONFIG 0x0078 +#define DART_TLB_MISS 0x007C +#define DART_TLB_WAIT 0x0080 +#define DART_TLB_HIT 0x0084 +#define DART_ST_MISS 0x0088 +#define DART_ST_WAIT 0x008C +#define DART_ST_HIT 0x0090 +#define DART_TTBR(sid4, l1idx4) (0x0040 + 16 * (sid4) + 4 * (l1idx4)) +#define DART_TTBR_VALID BIT(31) +#define DART_TTBR_MASK 0x00FFFFFF +#define DART_TLB_STATUS 0x1000 +#define DART_TLB_UNKNOWN(idx) (0x1004 + 4 * (idx)) +#define DART_STT_PA_DATA(idx) (0x2000 + 4 * (idx)) +#define DART_STT_PA_DATA_COUNT 1024 +#define DART_SMMU_TLB_CFG 0x3000 +#define DART_SMMU_TLB_DATA_RD 0x3100 +#define DART_SMMU_TLB_DATA_RD_COUNT 4 +#define DART_DATA_DEBUG_IDX 0x3120 +#define DART_DATA_DEBUG_CNTL 0x3124 +#define DART_DATA_DEBUG_CNTL_READ BIT(0) +#define DART_DATA_DEBUG_CNTL_BUSY BIT(2) +#define DART_TLB_TAG(idx) (0x3800 + 4 * (idx)) +#define DART_TLB_TAG_COUNT 128 + +#define DART_PTE_STATE_MASK 3 +#define DART_PTE_STATE_INVALID 0 +#define DART_PTE_STATE_NEXT 3 +#define DART_PTE_STATE_VALID 3 +#define DART_PTE_ADDR_MASK 0xFFFFFF000ull + +#define DART_NUM_SID 4 +#define DART_PAGE_SHIFT 12 + +#define DART_PAGE_SIZE BIT(DART_PAGE_SHIFT) +#define DART_PAGE_MASK (DART_PAGE_SIZE - 1ul) + +struct apple_t8010_dart_iommu { + struct device *dev; + struct iommu_device iommu; + void __iomem *base; + int is_init; + int is_pcie; + u64 iova_offset; + u64 **l2dma[DART_NUM_SID]; + u64 *l1dma[DART_NUM_SID]; + /* Protects DART page table allocation and register updates. */ + spinlock_t dart_lock; +}; + +struct apple_t8010_dart_iommu_domain { + struct iommu_domain domain; + struct apple_t8010_dart_iommu *iommu; + int sid; +}; + +struct apple_t8010_dart_iommu_devdata { + struct apple_t8010_dart_iommu *iommu; + u32 sid; +}; + +static irqreturn_t apple_t8010_dart_iommu_irq(int irq, void *dev_id) +{ + struct apple_t8010_dart_iommu *im = dev_id; + u32 status, axi_req[2], addr, tlbstat; + + status = readl(im->base + DART_ERROR_STATUS); + tlbstat = readl(im->base + DART_TLB_STATUS); + axi_req[0] = readl(im->base + DART_ERROR_AXI_REQ0); + axi_req[1] = readl(im->base + DART_ERROR_AXI_REQ1); + addr = readl(im->base + DART_ERROR_ADDRESS); + + writel(status, im->base + DART_ERROR_STATUS); + writel(tlbstat, im->base + DART_TLB_STATUS); + + dev_err(im->dev, + "STATUS %08x AXI_REQ %08x:%08x ADDR %08x TLBSTAT %08x\n", + status, axi_req[0], axi_req[1], addr, tlbstat); + + return IRQ_HANDLED; +} + +static void apple_t8010_dart_tlb_flush(struct apple_t8010_dart_iommu *im, + u32 sidmask, int need_lock) +{ + unsigned long flags; + u32 status; + + if (need_lock) + spin_lock_irqsave(&im->dart_lock, flags); + writel(DART_TLB_OP_FLUSH | (sidmask << DART_TLB_OP_SID_SHIFT), + im->base + DART_TLB_OP); + while (1) { + status = readl(im->base + DART_TLB_OP); + if (!(status & DART_TLB_OP_BUSY)) + break; + } + if (need_lock) + spin_unlock_irqrestore(&im->dart_lock, flags); +} + +static u64 *apple_t8010_dart_get_pte(struct apple_t8010_dart_iommu *im, u32 sid, + u64 iova, int optional, + unsigned long *flags) +{ + unsigned int i, l1idx, l1base, l2idx, npgs, npg; + u64 phys, **l1pt, *l1dma, *l2dma; + void *dmava, *ptva; + dma_addr_t dmah; + + if (im->is_pcie) + sid = 0; + + if (!im->l1dma[sid]) { + spin_unlock_irqrestore(&im->dart_lock, *flags); + ptva = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + DART_PAGE_SHIFT + 2 - + PAGE_SHIFT); + dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE * 4, &dmah, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + spin_lock_irqsave(&im->dart_lock, *flags); + if (!im->l1dma[sid]) { + if (!ptva || !dmava) { + if (ptva) + free_pages((unsigned long)ptva, + DART_PAGE_SHIFT + 2 - + PAGE_SHIFT); + else + dev_err(im->dev, + "failed to allocate shadow L1 pagetable\n"); + if (dmava) + dma_free_attrs(im->dev, + DART_PAGE_SIZE * 4, + dmava, dmah, + DMA_ATTR_WRITE_COMBINE); + else + dev_err(im->dev, + "failed to allocate uncached L1 pagetable\n"); + return NULL; + } + im->l2dma[sid] = ptva; + im->l1dma[sid] = dmava; + phys = dmah; + for (i = 0; i < 4; i++) + writel((((phys >> DART_PAGE_SHIFT) + i) & + DART_TTBR_MASK) | + DART_TTBR_VALID, + im->base + DART_TTBR(sid, i)); + } else { + if (ptva) + free_pages((unsigned long)ptva, + DART_PAGE_SHIFT + 2 - PAGE_SHIFT); + if (dmava) + dma_free_attrs(im->dev, DART_PAGE_SIZE * 4, + dmava, dmah, + DMA_ATTR_WRITE_COMBINE); + } + } + + l1pt = im->l2dma[sid]; + l1idx = (iova >> 21) & 0x7FF; + + if (!l1pt[l1idx]) { + if (optional) + return NULL; + if (DART_PAGE_SHIFT < PAGE_SHIFT) + npgs = PAGE_SHIFT - DART_PAGE_SHIFT; + else + npgs = 0; + spin_unlock_irqrestore(&im->dart_lock, *flags); + dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE << npgs, &dmah, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + spin_lock_irqsave(&im->dart_lock, *flags); + if (!l1pt[l1idx]) { + if (!dmava) { + dev_err(im->dev, + "failed to allocate uncached L2 pagetable\n"); + return NULL; + } + npg = 1 << npgs; + phys = dmah; + l1dma = im->l1dma[sid]; + l1base = (l1idx >> npgs) << npgs; + for (i = 0; i < npg; i++) { + l1pt[l1base + i] = + dmava + (i << DART_PAGE_SHIFT); + l1dma[l1base + i] = + ((phys + (i << DART_PAGE_SHIFT)) & + DART_PTE_ADDR_MASK) | + DART_PTE_STATE_NEXT; + } + } else if (dmava) + dma_free_attrs(im->dev, DART_PAGE_SIZE << npgs, dmava, + dmah, DMA_ATTR_WRITE_COMBINE); + } + + l2dma = l1pt[l1idx]; + l2idx = (iova >> 12) & 0x1FF; + return &l2dma[l2idx]; +} + +static void apple_t8010_dart_iommu_enable(struct apple_t8010_dart_iommu *im, + u32 sid) +{ + u32 val; + + val = readl(im->base + DART_CONFIG); + if (val & DART_CONFIG_TXEN(sid)) + return; + writel(val | DART_CONFIG_TXEN(sid), im->base + DART_CONFIG); + if (!(readl(im->base + DART_CONFIG) & DART_CONFIG_TXEN(sid))) + dev_err(im->dev, "failed to enable SID %d: 0x%08x.\n", sid, + readl(im->base + DART_CONFIG)); +} + +static bool apple_t8010_dart_iommu_capable(struct device *dev, + enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + return false; + } +} + +static struct apple_t8010_dart_iommu_domain * +to_apple_t8010_dart_iommu_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct apple_t8010_dart_iommu_domain, domain); +} + +static struct iommu_domain * +apple_t8010_dart_iommu_domain_alloc_paging(struct device *dev) +{ + struct apple_t8010_dart_iommu_domain *idom; + + idom = kzalloc_obj(*idom, GFP_KERNEL); + if (!idom) + return NULL; + + idom->domain.pgsize_bitmap = SZ_4K; + idom->sid = -1; + + return &idom->domain; +} + +static void apple_t8010_dart_iommu_domain_free(struct iommu_domain *domain) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + kfree(idom); +} + +static int apple_t8010_dart_iommu_attach_device(struct iommu_domain *domain, + struct device *dev, + struct iommu_domain *old) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu_devdata *idd; + struct apple_t8010_dart_iommu *im; + unsigned long flags; + u32 sid, i, j; + + idd = dev_iommu_priv_get(dev); + if (!idd) + return -ENODEV; + im = idd->iommu; + + if (idom->iommu && idom->iommu != im) { + dev_err(dev, + "different DART already assigned to IOMMU domain.\n"); + return -EINVAL; + } + + if (!idom->iommu) { + idom->iommu = im; + if (im->is_pcie) { + idom->domain.geometry.aperture_start = 0x80000000ul; + idom->domain.geometry.aperture_end = 0xBBFFFFFFul; + } else { + idom->domain.geometry.aperture_start = 0x00004000ul; + idom->domain.geometry.aperture_end = 0xFFFFFFFFul; + } + idom->domain.geometry.force_aperture = true; + } + + sid = im->is_pcie ? 0 : idd->sid; + if (idom->sid >= 0 && idom->sid != sid) { + dev_err(dev, + "multiple SIDs mapped to the same IOMMU domain.\n"); + return -EEXIST; + } + idom->sid = sid; + + spin_lock_irqsave(&im->dart_lock, flags); + + if (!im->is_init) { + im->is_init = 1; + writel(0x0020FFFC, im->base + DART_UNKNOWN_24); + writel(0x00000000, im->base + DART_UNKNOWN_2C); + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + writel(0x00000000, im->base + DART_TTBR(i, j)); + writel(0x000E0303, im->base + DART_FETCH_CONFIG); + writel(0x00000100, im->base + DART_DIAG_CONFIG); + for (i = 0; i < 6; i++) + writel(0x00000000, im->base + DART_TLB_UNKNOWN(i)); + writel(0x03F3FFFF, im->base + DART_TLB_STATUS); + + apple_t8010_dart_tlb_flush(im, 15, 0); + } + apple_t8010_dart_iommu_enable(im, sid); + + spin_unlock_irqrestore(&im->dart_lock, flags); + + return 0; +} + +static int apple_t8010_dart_iommu_map_pages(struct iommu_domain *domain, + unsigned long iova, + phys_addr_t paddr, size_t pgsize, + size_t pgcount, int prot, gfp_t gfp, + size_t *mapped) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu *im = idom->iommu; + u64 len = (u64)pgsize * pgcount; + u64 end = iova + len - 1; + unsigned int i, npg; + unsigned long flags; + u64 *ptep; + int ret = 0; + + if (!im || idom->sid < 0) + return -EINVAL; + + if (!len || end < iova || iova < domain->geometry.aperture_start || + end > domain->geometry.aperture_end) + return -EINVAL; + + npg = (len + DART_PAGE_MASK) >> DART_PAGE_SHIFT; + + if (iova < im->iova_offset) + return -EINVAL; + iova -= im->iova_offset; + + spin_lock_irqsave(&im->dart_lock, flags); + for (i = 0; i < npg; i++) { + ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 0, &flags); + if (!ptep) { + ret = -ENOMEM; + break; + } + *ptep = (paddr & DART_PTE_ADDR_MASK) | DART_PTE_STATE_VALID; + iova += DART_PAGE_SIZE; + paddr += DART_PAGE_SIZE; + if (mapped) + *mapped += DART_PAGE_SIZE; + } + spin_unlock_irqrestore(&im->dart_lock, flags); + + return ret; +} + +static phys_addr_t +apple_t8010_dart_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu *im = idom->iommu; + unsigned long flags; + u64 *ptep, result = 0; + + if (idom->sid < 0) + return 0; + + if (iova < im->iova_offset) + return 0; + iova -= im->iova_offset; + + spin_lock_irqsave(&im->dart_lock, flags); + ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); + if (ptep) + result = *ptep; + spin_unlock_irqrestore(&im->dart_lock, flags); + + if (result & DART_PTE_STATE_MASK) + result = (result & DART_PTE_ADDR_MASK) | + (iova & DART_PAGE_MASK); + return result; +} + +static size_t apple_t8010_dart_iommu_unmap_pages(struct iommu_domain *domain, + unsigned long iova, + size_t pgsize, + size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + struct apple_t8010_dart_iommu *im = idom->iommu; + size_t size = pgsize * pgcount; + unsigned int i, npg = (size + DART_PAGE_MASK) >> DART_PAGE_SHIFT; + unsigned long flags; + u64 *ptep; + + if (idom->sid < 0) + return 0; + + if (iova < im->iova_offset) + return 0; + iova -= im->iova_offset; + + spin_lock_irqsave(&im->dart_lock, flags); + for (i = 0; i < npg; i++) { + ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); + if (ptep) + *ptep = 0; + iova += DART_PAGE_SIZE; + } + spin_unlock_irqrestore(&im->dart_lock, flags); + + return size; +} + +static void apple_t8010_dart_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + if (!idom->iommu) + return; + + if (idom->sid >= 0) + apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); +} + +static void apple_t8010_dart_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + if (!idom->iommu) + return; + + if (idom->sid >= 0) + apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); +} + +static int apple_t8010_dart_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, + size_t size) +{ + struct apple_t8010_dart_iommu_domain *idom = + to_apple_t8010_dart_iommu_domain(domain); + + if (!idom->iommu) + return 0; + + if (idom->sid >= 0) + apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); + return 0; +} + +static const struct iommu_ops apple_t8010_dart_iommu_ops; + +static struct iommu_device * +apple_t8010_dart_iommu_probe_device(struct device *dev) +{ + struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); + + if (!idd || !idd->iommu) + return ERR_PTR(-ENODEV); + + device_link_add(dev, idd->iommu->dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + + return &idd->iommu->iommu; +} + +static void apple_t8010_dart_iommu_release_device(struct device *dev) +{ + struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); + + dev_iommu_priv_set(dev, NULL); + kfree(idd); +} + +static struct iommu_group * +apple_t8010_dart_iommu_device_group(struct device *dev) +{ +#ifdef CONFIG_PCI + if (dev_is_pci(dev)) + return pci_device_group(dev); +#endif + + return generic_device_group(dev); +} + +static int apple_t8010_dart_iommu_of_xlate(struct device *dev, + const struct of_phandle_args *args) +{ + struct platform_device *iommu_dev; + struct apple_t8010_dart_iommu_devdata *data; + + data = kzalloc_obj(*data, GFP_KERNEL); + if (!data) + return -ENOMEM; + + iommu_dev = of_find_device_by_node(args->np); + if (!iommu_dev) { + kfree(data); + return -ENODEV; + } + + data->iommu = platform_get_drvdata(iommu_dev); + if (!data->iommu) { + platform_device_put(iommu_dev); + kfree(data); + return -ENODEV; + } + + data->sid = args->args[0]; + dev_iommu_priv_set(dev, data); + + platform_device_put(iommu_dev); + + return 0; +} + +static const struct iommu_ops apple_t8010_dart_iommu_ops = { + .capable = apple_t8010_dart_iommu_capable, + .of_xlate = apple_t8010_dart_iommu_of_xlate, + .domain_alloc_paging = apple_t8010_dart_iommu_domain_alloc_paging, + .probe_device = apple_t8010_dart_iommu_probe_device, + .release_device = apple_t8010_dart_iommu_release_device, + .device_group = apple_t8010_dart_iommu_device_group, + .owner = THIS_MODULE, + .default_domain_ops = + &(const struct iommu_domain_ops){ + .attach_dev = apple_t8010_dart_iommu_attach_device, + .map_pages = apple_t8010_dart_iommu_map_pages, + .unmap_pages = apple_t8010_dart_iommu_unmap_pages, + .iova_to_phys = apple_t8010_dart_iommu_iova_to_phys, + .flush_iotlb_all = + apple_t8010_dart_iommu_flush_iotlb_all, + .iotlb_sync = apple_t8010_dart_iommu_iotlb_sync, + .iotlb_sync_map = apple_t8010_dart_iommu_iotlb_sync_map, + .free = apple_t8010_dart_iommu_domain_free, + }, +}; + +static int apple_t8010_dart_iommu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *node = dev->of_node; + struct apple_t8010_dart_iommu *im; + struct resource *r; + int ret = 0, irq; + + im = devm_kzalloc(dev, sizeof(struct apple_t8010_dart_iommu), + GFP_KERNEL); + if (!im) + return -ENOMEM; + + im->dev = &pdev->dev; + platform_set_drvdata(pdev, im); + + spin_lock_init(&im->dart_lock); + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + return dev_err_probe(dev, ret, "failed to set DMA mask\n"); + + if (of_property_read_bool(pdev->dev.of_node, "pcie-dart")) { + im->is_pcie = 1; + im->iova_offset = 0x80000000ul; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + im->base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(im->base)) + return PTR_ERR(im->base); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(&pdev->dev, irq, apple_t8010_dart_iommu_irq, 0, + dev_name(&pdev->dev), im); + if (ret < 0) + return ret; + + ret = iommu_device_sysfs_add(&im->iommu, dev, NULL, node->name); + if (ret) + return ret; + + ret = iommu_device_register(&im->iommu, &apple_t8010_dart_iommu_ops, + dev); + if (ret) + goto err_sysfs_remove; + + return 0; + +err_sysfs_remove: + iommu_device_sysfs_remove(&im->iommu); + return ret; +} + +static void apple_t8010_dart_iommu_remove(struct platform_device *pdev) +{ + struct apple_t8010_dart_iommu *im = platform_get_drvdata(pdev); + + iommu_device_unregister(&im->iommu); + iommu_device_sysfs_remove(&im->iommu); +} + +static const struct of_device_id apple_t8010_dart_iommu_match[] = { + { .compatible = "apple,t8010-dart" }, + {}, +}; +MODULE_DEVICE_TABLE(of, apple_t8010_dart_iommu_match); + +static struct platform_driver apple_t8010_dart_iommu_driver = { + .probe = apple_t8010_dart_iommu_probe, + .remove = apple_t8010_dart_iommu_remove, + .driver = { + .name = "apple-t8010-dart", + .of_match_table = apple_t8010_dart_iommu_match, + }, +}; +module_platform_driver(apple_t8010_dart_iommu_driver); + +MODULE_DESCRIPTION("Apple T8010 legacy DART IOMMU driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b5f84620067899..2c443efa450c28 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -5,7 +5,9 @@ */ #include +#include #include +#include #include #include #include @@ -13,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +23,8 @@ #include #include #include +#include +#include #include #include #include @@ -43,6 +48,22 @@ #define NVME_MAX_BYTES SZ_8M #define NVME_MAX_NR_DESCRIPTORS 5 +#define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) + +#define APPLE_H9P_REG_INIT 0x1800 +#define APPLE_H9P_REG_INIT_REGULAR 0 +#define APPLE_H9P_REG_SCRATCH_SIZE_REQ 0x1808 +#define APPLE_H9P_REG_SCRATCH_ALIGN_REQ 0x180c +#define APPLE_H9P_REG_SCRATCH_BASE_LO 0x1810 +#define APPLE_H9P_REG_SCRATCH_BASE_HI 0x1814 +#define APPLE_H9P_REG_SCRATCH_SIZE 0x1818 +#define APPLE_H9P_REG_CORE_MASK 0x1824 +#define APPLE_H9P_REG_LOG_SIZE 0x1828 +#define APPLE_H9P_REG_BOOT_STATE 0x1b18 +#define APPLE_H9P_REG_BOOT_STATE_MAGIC 0xbfbfbfbfu +#define APPLE_H9P_NVME_MAX_SECTORS \ + (APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE >> SECTOR_SHIFT) + /* * For data SGLs we support a single descriptors worth of SGL entries. * For PRPs, segments don't matter at all. @@ -278,6 +299,7 @@ MODULE_PARM_DESC(noacpi, "disable acpi bios quirks"); struct nvme_dev; struct nvme_queue; +struct apple_h9p_nvme; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_delete_io_queues(struct nvme_dev *dev); @@ -317,6 +339,7 @@ struct nvme_dev { bool hmb; struct sg_table *hmb_sgt; mempool_t *dmavec_mempool; + struct apple_h9p_nvme *apple_h9p; /* shadow doorbell buffer support: */ __le32 *dbbuf_dbs; @@ -444,6 +467,7 @@ struct nvme_iod { size_t meta_total_len; struct dma_iova_state meta_dma_state; struct nvme_sgl_desc *meta_descriptor; + void *apple_h9p_req; }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -886,6 +910,345 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, le32_to_cpu(sg_list[i].length), dir, attrs); } +struct apple_h9p_nvme_req { + u64 pages[APPLE_H9P_NVMMU_MAX_PAGES]; + unsigned int npages; +}; + +struct apple_h9p_nvme { + dma_addr_t scratch_dma; + u32 scratch_size; + struct apple_h9p_nvme_req req[APPLE_H9P_NVMMU_MAX_REQS]; + DECLARE_BITMAP(used_req, APPLE_H9P_NVMMU_MAX_REQS); + unsigned int last_req; + /* Protects the FlatDMA request-slot bitmap. */ + spinlock_t req_lock; +}; + +static bool nvme_pci_is_apple_h9p(struct pci_dev *pdev) +{ + return pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2002; +} + +static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, + u32 scratch_size_req, + u32 scratch_align_req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct pci_host_bridge *bridge; + struct device_node *pcie_np; + struct device_node *mem_np; + struct resource res; + resource_size_t size; + u32 iova; + int ret; + + bridge = pci_find_host_bridge(pdev->bus); + if (!bridge) + return -ENODEV; + + pcie_np = bridge->dev.parent ? bridge->dev.parent->of_node : NULL; + if (!pcie_np) + pcie_np = bridge->dev.of_node; + if (!pcie_np) + return -ENODEV; + + mem_np = of_parse_phandle(pcie_np, "memory-region", 0); + if (!mem_np) + return -ENODEV; + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_mem; + + ret = of_property_read_u32(pcie_np, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_mem; + + if (!scratch_size_req || scratch_size_req == U32_MAX) { + ret = -EINVAL; + goto out_put_mem; + } + if (!scratch_align_req || scratch_align_req == U32_MAX) + scratch_align_req = 1; + + size = resource_size(&res); + if (size < scratch_size_req || size > U32_MAX) { + ret = -ENOSPC; + goto out_put_mem; + } + if (!IS_ALIGNED(res.start, scratch_align_req) || + !IS_ALIGNED(iova, scratch_align_req)) { + ret = -EINVAL; + goto out_put_mem; + } + + h9p->scratch_dma = iova; + h9p->scratch_size = size; + dev_dbg(dev->dev, "Apple H9P NVMe scratch %#x@%pa as dma %#llx\n", + h9p->scratch_size, &res.start, (u64)h9p->scratch_dma); + +out_put_mem: + of_node_put(mem_np); + return ret; +} + +static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts, core_mask, log_size; + u32 scratch_size, scratch_align; + int ret; + + if (!dev->apple_h9p) + return 0; + + if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x17c, 0x10081008) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x18c, 0) != PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x188, 0x40550000) != + PCIBIOS_SUCCESSFUL) + return -EIO; + + if (readl(dev->bar + APPLE_H9P_REG_BOOT_STATE) == + APPLE_H9P_REG_BOOT_STATE_MAGIC) + dev_dbg(dev->dev, "Apple H9P NVMe boot-state magic present\n"); + + core_mask = readl(dev->bar + APPLE_H9P_REG_CORE_MASK); + log_size = readl(dev->bar + APPLE_H9P_REG_LOG_SIZE); + scratch_size = readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE_REQ); + scratch_align = readl(dev->bar + APPLE_H9P_REG_SCRATCH_ALIGN_REQ); + + writel(0, dev->bar + NVME_REG_CC); + ret = readl_poll_timeout(dev->bar + NVME_REG_CSTS, csts, + !(csts & (NVME_CSTS_RDY | NVME_CSTS_CFS)), + 1000, 2000000); + if (ret) + return ret; + + ret = nvme_pci_apple_h9p_find_scratch(dev, scratch_size, + scratch_align); + if (ret) + return ret; + + dev_dbg(dev->dev, + "Apple H9P NVMe core_mask=%#x log_size=%#x scratch_req=%#x align=%#x\n", + core_mask, log_size, scratch_size, scratch_align); + return 0; +} + +static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + + if (!h9p) + return 0; + if (!h9p->scratch_size) + return -EINVAL; + + writel(APPLE_H9P_REG_INIT_REGULAR, dev->bar + APPLE_H9P_REG_INIT); + writel(lower_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_LO); + writel(upper_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_HI); + writel(h9p->scratch_size, dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + + return 0; +} + +static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req **req, + unsigned int *tag) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int idx; + + spin_lock_irqsave(&h9p->req_lock, flags); + idx = find_next_zero_bit(h9p->used_req, APPLE_H9P_NVMMU_MAX_REQS, + (h9p->last_req + 1) % + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) + idx = find_first_zero_bit(h9p->used_req, + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) { + spin_unlock_irqrestore(&h9p->req_lock, flags); + dev_dbg_ratelimited(dev->dev, + "Apple H9P NVMe FlatDMA slots exhausted\n"); + return BLK_STS_RESOURCE; + } + + h9p->last_req = idx; + __set_bit(idx, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); + + *req = &h9p->req[idx]; + *tag = idx; + (*req)->npages = 0; + memset((*req)->pages, 0, sizeof((*req)->pages)); + return BLK_STS_OK; +} + +static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req *req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int tag; + + if (!h9p || !req) + return; + + tag = req - h9p->req; + if (tag >= APPLE_H9P_NVMMU_MAX_REQS) + return; + + apple_h9p_pcie_map_nvmmu(dev->dev, tag, NULL, 0, NULL); + req->npages = 0; + + spin_lock_irqsave(&h9p->req_lock, flags); + __clear_bit(tag, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); +} + +static bool nvme_pci_apple_h9p_unmap_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + unsigned int i; + + if (!iod->apple_h9p_req) + return false; + + for (i = 0; i < iod->nr_dma_vecs; i++) + dma_unmap_page(dev->dev, iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, rq_dma_dir(req)); + if (iod->dma_vecs) { + mempool_free(iod->dma_vecs, dev->dmavec_mempool); + iod->dma_vecs = NULL; + } + iod->nr_dma_vecs = 0; + + nvme_pci_apple_h9p_free_req(dev, iod->apple_h9p_req); + iod->apple_h9p_req = NULL; + iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; + return true; +} + +static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + struct apple_h9p_nvme_req *hreq; + struct req_iterator iter; + struct bio_vec bv; + dma_addr_t flatdma; + u64 phys, offs = 0; + unsigned int tag, npages = 0, consumed = 0; + unsigned int total = blk_rq_payload_bytes(req); + blk_status_t status; + int ret; + + if (!dev->apple_h9p) + return BLK_STS_NOTSUPP; + if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) + return BLK_STS_IOERR; + + status = nvme_pci_apple_h9p_alloc_req(dev, &hreq, &tag); + if (status) + return status; + + iod->apple_h9p_req = hreq; + iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); + if (!iod->dma_vecs) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + rq_for_each_bvec(bv, req, iter) { + dma_addr_t dma_addr; + unsigned int len = bv.bv_len; + + if (WARN_ON_ONCE(iod->nr_dma_vecs >= + blk_rq_nr_phys_segments(req))) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + dma_addr = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, dma_addr)) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + iod->dma_vecs[iod->nr_dma_vecs].addr = dma_addr; + iod->dma_vecs[iod->nr_dma_vecs].len = len; + iod->nr_dma_vecs++; + + phys = page_to_phys(bv.bv_page) + bv.bv_offset; + if (!consumed) { + offs = phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1); + phys -= offs; + len += offs; + } else if (phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1)) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment is not page-aligned: phys=%#llx\n", + phys); + status = BLK_STS_IOERR; + goto out_unmap; + } + + if (consumed + bv.bv_len != total && + (len & (APPLE_H9P_NVMMU_PAGE_SIZE - 1))) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment length is not page-aligned: len=%#x\n", + len); + status = BLK_STS_IOERR; + goto out_unmap; + } + + while (len) { + if (npages >= APPLE_H9P_NVMMU_MAX_PAGES) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->pages[npages++] = phys; + phys += APPLE_H9P_NVMMU_PAGE_SIZE; + len = len > APPLE_H9P_NVMMU_PAGE_SIZE ? + len - APPLE_H9P_NVMMU_PAGE_SIZE : 0; + } + + consumed += bv.bv_len; + } + + ret = apple_h9p_pcie_map_nvmmu(dev->dev, tag, hreq->pages, npages, + &flatdma); + if (ret) { + status = errno_to_blk_status(ret); + if (status == BLK_STS_NOTSUPP) + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->npages = npages; + iod->total_len = total; + iod->cmd.common.flags |= NVME_CMD_APPLE_H9P_FLATDMA; + iod->cmd.common.dptr.prp1 = cpu_to_le64(flatdma + offs); + iod->cmd.common.dptr.prp2 = 0; + return BLK_STS_OK; + +out_unmap: + nvme_pci_apple_h9p_unmap_data(req); + return status; +} + static void nvme_unmap_metadata(struct request *req) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; @@ -932,6 +1295,9 @@ static void nvme_unmap_data(struct request *req) struct device *dma_dev = nvmeq->dev->dev; unsigned int attrs = 0; + if (nvmeq->dev->apple_h9p && nvme_pci_apple_h9p_unmap_data(req)) + return; + if (iod->flags & IOD_SINGLE_SEGMENT) { static_assert(offsetof(union nvme_data_ptr, prp1) == offsetof(union nvme_data_ptr, sgl.addr)); @@ -1246,6 +1612,12 @@ static blk_status_t nvme_map_data(struct request *req) struct blk_dma_iter iter; blk_status_t ret; + if (dev->apple_h9p) { + ret = nvme_pci_apple_h9p_map_data(req); + if (ret != BLK_STS_NOTSUPP) + return ret; + } + /* * Try to skip the DMA iterator for single segment requests, as that * significantly improves performances for small I/O sizes. @@ -1401,6 +1773,8 @@ static blk_status_t nvme_prep_rq(struct request *req) iod->total_len = 0; iod->meta_total_len = 0; iod->nr_dma_vecs = 0; + iod->dma_vecs = NULL; + iod->apple_h9p_req = NULL; ret = nvme_setup_cmd(req->q->queuedata, req); if (ret) @@ -1450,6 +1824,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ret = nvme_prep_rq(req); if (unlikely(ret)) return ret; + spin_lock(&nvmeq->sq_lock); nvme_sq_copy_cmd(nvmeq, &iod->cmd); nvme_write_sq_db(nvmeq, bd->last); @@ -2386,6 +2761,10 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); + result = nvme_pci_apple_h9p_prepare_enable(dev); + if (result) + return result; + result = nvme_enable_ctrl(&dev->ctrl); if (result) return result; @@ -2942,6 +3321,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) unsigned int nr_io_queues; unsigned long size; int result; + bool reuse_single_vector = false; /* * Sample the module parameters once at reset time so that we have @@ -2986,7 +3366,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = nvme_setup_io_queues_trylock(dev); if (result) return result; - if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + reuse_single_vector = dev->apple_h9p && + (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR) && + pdev->msi_enabled && + test_bit(NVMEQ_ENABLED, &adminq->flags); + + if (!reuse_single_vector && + test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) pci_free_irq(pdev, 0, adminq); if (dev->cmb_use_sqes) { @@ -3014,19 +3400,27 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) retry: /* Deregister the admin queue's interrupt */ - if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) + if (!reuse_single_vector && + test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags)) pci_free_irq(pdev, 0, adminq); /* * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - pci_free_irq_vectors(pdev); + if (reuse_single_vector) { + result = 1; + dev->io_queues[HCTX_TYPE_DEFAULT] = 1; + dev->io_queues[HCTX_TYPE_READ] = 0; + dev->io_queues[HCTX_TYPE_POLL] = 0; + } else { + pci_free_irq_vectors(pdev); - result = nvme_setup_irqs(dev, nr_io_queues); - if (result <= 0) { - result = -EIO; - goto out_unlock; + result = nvme_setup_irqs(dev, nr_io_queues); + if (result <= 0) { + result = -EIO; + goto out_unlock; + } } dev->num_vecs = result; @@ -3039,10 +3433,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * path to scale better, even if the receive path is limited by the * number of interrupts. */ - result = queue_request_irq(adminq); - if (result) - goto out_unlock; - set_bit(NVMEQ_ENABLED, &adminq->flags); + if (reuse_single_vector) { + result = 0; + } else { + result = queue_request_irq(adminq); + if (result) + goto out_unlock; + set_bit(NVMEQ_ENABLED, &adminq->flags); + } mutex_unlock(&dev->shutdown_lock); result = nvme_create_io_queues(dev); @@ -3249,8 +3647,15 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", dev->q_depth); } + if (dev->apple_h9p && + dev->q_depth > APPLE_H9P_NVMMU_MAX_REQS) + dev->q_depth = APPLE_H9P_NVMMU_MAX_REQS; dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ + result = nvme_pci_apple_h9p_preinit(dev); + if (result) + goto free_irq; + nvme_map_cmb(dev); pci_save_state(pdev); @@ -3373,6 +3778,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) nvme_free_tagset(dev); put_device(dev->dev); kfree(dev->queues); + kfree(dev->apple_h9p); kfree(dev); } @@ -3707,6 +4113,14 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks |= qentry->enabled_quirks; quirks &= ~qentry->disabled_quirks; } + if (nvme_pci_is_apple_h9p(pdev)) { + quirks |= NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_SHARED_TAGS; + dev->apple_h9p = kzalloc_node(sizeof(*dev->apple_h9p), + GFP_KERNEL, node); + if (!dev->apple_h9p) + goto out_put_device; + spin_lock_init(&dev->apple_h9p->req_lock); + } ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); if (ret) @@ -3726,6 +4140,10 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev->ctrl.max_hw_sectors = min_t(u32, NVME_MAX_BYTES >> SECTOR_SHIFT, dma_opt_mapping_size(&pdev->dev) >> 9); + if (dev->apple_h9p) + dev->ctrl.max_hw_sectors = + min_t(u32, dev->ctrl.max_hw_sectors, + APPLE_H9P_NVME_MAX_SECTORS); dev->ctrl.max_segments = NVME_MAX_SEGS; dev->ctrl.max_integrity_segments = 1; return dev; @@ -3733,6 +4151,7 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, out_put_device: put_device(dev->dev); kfree(dev->queues); + kfree(dev->apple_h9p); out_free_dev: kfree(dev); return ERR_PTR(ret); @@ -4271,6 +4690,9 @@ static const struct pci_device_id nvme_id_table[] = { */ .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_QDEPTH_ONE }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2002), + .driver_data = NVME_QUIRK_SINGLE_VECTOR | + NVME_QUIRK_SHARED_TAGS }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), .driver_data = NVME_QUIRK_SINGLE_VECTOR | diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 2247709ef6d696..0b9e77af5aac39 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -54,6 +54,19 @@ config PCIE_APPLE If unsure, say Y if you have an Apple Silicon system. +config PCIE_APPLE_H9P + tristate "Apple H9P/T8010 PCIe controller" + depends on ARCH_APPLE || COMPILE_TEST + depends on OF + depends on PCI_MSI + select PCI_HOST_COMMON + help + Say Y here to enable the PCIe root complex found in Apple A10 + (T8010/H9P) devices. This controller is used for the internal + Apple NVMe storage path on devices such as iPad7,12. + It provides the legacy PHY/link setup and NVMMU/SART mapping + hooks needed before the standard PCI and NVMe layers can bind. + config PCI_VERSATILE bool "ARM Versatile PB PCI controller" depends on ARCH_VERSATILE || COMPILE_TEST diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index ac8db283f0fea7..a4b489bd4f473f 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o obj-$(CONFIG_PCIE_HISI_ERR) += pcie-hisi-error.o obj-$(CONFIG_PCIE_APPLE) += pcie-apple.o +obj-$(CONFIG_PCIE_APPLE_H9P) += pcie-apple-h9p.o obj-$(CONFIG_PCIE_MT7621) += pcie-mt7621.o obj-$(CONFIG_PCIE_ASPEED) += pcie-aspeed.o diff --git a/drivers/pci/controller/pcie-apple-h9p.c b/drivers/pci/controller/pcie-apple-h9p.c new file mode 100644 index 00000000000000..5ea301e2e2a7f9 --- /dev/null +++ b/drivers/pci/controller/pcie-apple-h9p.c @@ -0,0 +1,1368 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe host bridge driver for Apple H9P/T8010 SoCs. + * + * The controller exposes an ECAM-compatible root complex after the SoC-specific + * power, clock and PHY sequence has brought a port out of reset. The hardware + * differs enough from the Apple Silicon PCIe controller to keep the early H9P + * bring-up sequence separate, while still using the generic PCI host bridge + * and MSI subsystems. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "pci-host-common.h" + +#define H9P_NUM_PORTS 4 +#define H9P_NUM_MSI 32 +#define H9P_MSI_PER_PORT (H9P_NUM_MSI / H9P_NUM_PORTS) + +#define H9P_CFG_PORT_STRIDE 0x8000 + +#define H9P_PHY0_PORTSTAT(port) (0x0100 + (port) * 0x0080) +#define H9P_PHY1_PORTMASK 0x000c + +#define H9P_PORT_LTSSMCTL 0x0080 +#define H9P_PORT_IRQSTAT 0x0100 +#define H9P_PORT_IRQMASK 0x0104 +#define H9P_PORT_MSIVECBASE 0x0128 +#define H9P_PORT_ENABLE 0x0140 +#define H9P_PORT_LINKSTS 0x0208 + +#define H9P_LINK_SPEED_2_5GT 1 +#define H9P_LINK_SPEED_8GT 3 + +#define H9P_PCIECLK_POSTUP0 0x0000 +#define H9P_PCIECLK_POSTUP1 0x000c +#define H9P_PCIECLK_POSTUP2 0x4104 +#define H9P_PCIECLK_POSTUP3 0x4100 + +#define H9P_NVMMU_TCB_CTRL 0x0004 +#define H9P_NVMMU_TCB_BASE_LO 0x0008 +#define H9P_NVMMU_TCB_BASE_HI 0x000c +#define H9P_NVMMU_TCB_TABLE_LO 0x0010 +#define H9P_NVMMU_TCB_TABLE_HI 0x0014 +#define H9P_NVMMU_SART_CTRL 0x0020 +#define H9P_NVMMU_SART_VA_BASE 0x0024 +#define H9P_NVMMU_SART_VA_END 0x0028 +#define H9P_NVMMU_SART_PA_BASE 0x002c + +#define H9P_NVMMU_TCB_BYTES 0x80 +#define H9P_NVMMU_TCB_DWORDS (H9P_NVMMU_TCB_BYTES / sizeof(u32)) +#define H9P_NVMMU_SGL_WORDS APPLE_H9P_NVMMU_MAX_PAGES +#define H9P_NVMMU_FLATDMA_BASE 0x40000000ULL +#define H9P_NVMMU_FLATDMA_STRIDE SZ_8M +#define H9P_NVMMU_SART_ALIGNMENT SZ_1M +#define H9P_NVMMU_TCB_READ 0x100 +#define H9P_NVMMU_TCB_WRITE 0x200 + +#define H9P_DEFAULT_MSI_DOORBELL 0xbffff000ULL + +#define APPLE_PMGR_AUTO_ENABLE BIT(28) +#define APPLE_PMGR_WAS_CLKGATED BIT(9) +#define APPLE_PMGR_WAS_PWRGATED BIT(8) +#define APPLE_PMGR_PS_ACTUAL GENMASK(7, 4) +#define APPLE_PMGR_PS_TARGET GENMASK(3, 0) +#define APPLE_PMGR_FLAGS (APPLE_PMGR_WAS_CLKGATED | \ + APPLE_PMGR_WAS_PWRGATED) +#define APPLE_PMGR_PS_ACTIVE 0xf +#define APPLE_PMGR_PS_SET_TIMEOUT_US 10000 + +struct apple_h9p_tunable { + u32 offset; + u32 size; + u64 mask; + u64 data; +}; + +static const struct apple_h9p_tunable h9p_phy0_tunables[] = { + { 0x0008, 4, 0x7f7f7f7f, 0x00000000 }, + { 0x000c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0010, 4, 0x00000700, 0x00000000 }, + { 0x0018, 4, 0x00ffffff, 0x000c0960 }, + { 0x001c, 4, 0x00001fff, 0x0000092c }, + { 0x002c, 4, 0x000000ff, 0x00000009 }, + { 0x003c, 4, 0x80000000, 0x00000000 }, + { 0x0100, 4, 0x31100010, 0x01000000 }, + { 0x0108, 4, 0x00000707, 0x00000000 }, + { 0x010c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0110, 4, 0x00000011, 0x00000001 }, + { 0x0114, 4, 0x00000007, 0x00000000 }, + { 0x0118, 4, 0x00073f3f, 0x00043f00 }, + { 0x0120, 4, 0x0333003f, 0x0111000f }, + { 0x0130, 4, 0x000000ff, 0x0000000f }, + { 0x0138, 4, 0x0000007f, 0x0000003e }, + { 0x0180, 4, 0x31100010, 0x01000000 }, + { 0x0188, 4, 0x00000707, 0x00000000 }, + { 0x018c, 4, 0x00073f3f, 0x00043f00 }, + { 0x01a0, 4, 0x0333003f, 0x0111000f }, + { 0x01b0, 4, 0x000000ff, 0x0000000f }, + { 0x01b8, 4, 0x0000007f, 0x0000003e }, + { 0x0200, 4, 0x31100010, 0x01000000 }, + { 0x0208, 4, 0x00000707, 0x00000000 }, + { 0x020c, 4, 0x00073f3f, 0x00043f00 }, + { 0x0220, 4, 0x0333003f, 0x0111000f }, + { 0x0230, 4, 0x000000ff, 0x0000000f }, + { 0x0238, 4, 0x0000007f, 0x0000003e }, + { 0x0280, 4, 0x31100010, 0x01000000 }, + { 0x0288, 4, 0x00000707, 0x00000000 }, + { 0x028c, 4, 0x00073f3f, 0x00043f00 }, + { 0x02a0, 4, 0x0333003f, 0x0111000f }, + { 0x02b0, 4, 0x000000ff, 0x0000000f }, + { 0x02b8, 4, 0x0000007f, 0x0000003e }, + { 0x0100, 4, 0x00000010, 0x00000010 }, + { 0x0180, 4, 0x00000010, 0x00000000 }, + { 0x0200, 4, 0x00000010, 0x00000000 }, + { 0x0280, 4, 0x00000010, 0x00000000 }, +}; + +static const struct apple_h9p_tunable h9p_config_tunables[] = { + { 0x0098, 4, 0x0000000f, 0x00000000 }, + { 0x0164, 4, 0x00f8ff00, 0x00000000 }, + { 0x08e0, 4, 0x00000005, 0x00000005 }, +}; + +static const struct apple_h9p_tunable h9p_port_tunables[] = { + { 0x0090, 4, 0x000000ff, 0x00000028 }, + { 0x0130, 4, 0x0000000d, 0x00000005 }, + { 0x0134, 4, 0x00000001, 0x00000001 }, + { 0x0138, 4, 0x00007f7f, 0x00000000 }, + { 0x013c, 4, 0x00000002, 0x00000002 }, + { 0x0140, 4, 0x0073ffff, 0x00704c4b }, +}; + +struct apple_h9p_pcie { + struct device *dev; + struct platform_device *pdev; + struct pci_host_bridge *bridge; + struct pci_config_window *cfgwin; + + void __iomem *base_config; + void __iomem *base_phy[3]; + void __iomem *base_port[H9P_NUM_PORTS]; + void __iomem *base_pcieclk_postup; + + struct clk_bulk_data clks[3]; + struct gpio_desc *perst[H9P_NUM_PORTS]; + struct gpio_desc *clkreq[H9P_NUM_PORTS]; + struct gpio_descs *devpwr; + struct pinctrl *pinctrl; + u32 enabled_ports; + + struct apple_h9p_nvmmu { + struct apple_h9p_pcie *pcie; + void __iomem *base; + u64 pa_base; + u32 va_base; + u32 size; + void *tcb; + void *tcb_table; + void *tcb_sgl; + size_t tcb_size; + size_t tcb_table_size; + size_t tcb_sgl_size; + dma_addr_t tcb_dma; + dma_addr_t tcb_table_dma; + dma_addr_t tcb_sgl_dma; + } nvmmu[H9P_NUM_PORTS]; + + struct device **pd_dev; + struct device_link **pd_link; + int pd_count; + + DECLARE_BITMAP(used_msi[H9P_NUM_PORTS], H9P_MSI_PER_PORT); + u64 msi_doorbell; + /* Protects the per-port MSI allocation bitmaps. */ + spinlock_t used_msi_lock; + struct irq_domain *irq_dom; + struct irq_domain *msi_dom; + + struct apple_h9p_msi { + struct apple_h9p_pcie *pcie; + int virq; + bool disabled; + } msi[H9P_NUM_MSI]; +}; + +static inline void h9p_rmw(void __iomem *addr, u32 clear, u32 set) +{ + writel((readl(addr) & ~clear) | set, addr); +} + +static inline void h9p_rmww(void __iomem *addr, u16 clear, u16 set) +{ + writew((readw(addr) & ~clear) | set, addr); +} + +static inline u64 h9p_readsz(void __iomem *addr, u32 size) +{ + switch (size) { + case 1: + return readb(addr); + case 2: + return readw(addr); + case 4: + return readl(addr); + case 8: + return readq(addr); + default: + return 0; + } +} + +static inline void h9p_writesz(u64 value, void __iomem *addr, u32 size) +{ + switch (size) { + case 1: + writeb(value, addr); + break; + case 2: + writew(value, addr); + break; + case 4: + writel(value, addr); + break; + case 8: + writeq(value, addr); + break; + } +} + +static inline void h9p_writel_flush(u32 value, void __iomem *addr) +{ + writel(value, addr); + readl(addr); +} + +static void apple_h9p_pcie_detach_genpd(struct apple_h9p_pcie *pcie) +{ + int i; + + for (i = pcie->pd_count - 1; i >= 0; i--) { + if (pcie->pd_link[i]) + device_link_del(pcie->pd_link[i]); + if (!IS_ERR_OR_NULL(pcie->pd_dev[i])) + dev_pm_domain_detach(pcie->pd_dev[i], true); + } +} + +static int apple_h9p_pcie_attach_genpd(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + int i; + + pcie->pd_count = of_count_phandle_with_args(dev->of_node, + "power-domains", + "#power-domain-cells"); + if (pcie->pd_count <= 1) + return 0; + + pcie->pd_dev = devm_kcalloc(dev, pcie->pd_count, + sizeof(*pcie->pd_dev), GFP_KERNEL); + if (!pcie->pd_dev) + return -ENOMEM; + + pcie->pd_link = devm_kcalloc(dev, pcie->pd_count, + sizeof(*pcie->pd_link), GFP_KERNEL); + if (!pcie->pd_link) + return -ENOMEM; + + for (i = 0; i < pcie->pd_count; i++) { + pcie->pd_dev[i] = dev_pm_domain_attach_by_id(dev, i); + if (IS_ERR(pcie->pd_dev[i])) { + apple_h9p_pcie_detach_genpd(pcie); + return PTR_ERR(pcie->pd_dev[i]); + } + + pcie->pd_link[i] = device_link_add(dev, pcie->pd_dev[i], + DL_FLAG_STATELESS | + DL_FLAG_PM_RUNTIME | + DL_FLAG_RPM_ACTIVE); + if (!pcie->pd_link[i]) { + apple_h9p_pcie_detach_genpd(pcie); + return -EINVAL; + } + } + + return 0; +} + +static void apple_h9p_pcie_genpd_cleanup(void *data) +{ + apple_h9p_pcie_detach_genpd(data); +} + +static void apple_h9p_pcie_clk_cleanup(void *data) +{ + struct apple_h9p_pcie *pcie = data; + + clk_bulk_disable_unprepare(ARRAY_SIZE(pcie->clks), pcie->clks); +} + +static struct apple_h9p_pcie *apple_h9p_pcie_lookup(struct device *dev) +{ + struct pci_host_bridge *bridge = dev_get_drvdata(dev); + + return bridge ? pci_host_bridge_priv(bridge) : NULL; +} + +static int apple_h9p_pcie_force_power_domain(struct apple_h9p_pcie *pcie, + struct device_node *pd_np) +{ + struct device *dev = pcie->dev; + struct device_node *pmgr_np; + struct regmap *regmap; + u32 offset; + u32 val; + int ret; + + ret = of_property_read_u32_index(pd_np, "reg", 0, &offset); + if (ret) + return dev_err_probe(dev, ret, "%pOF missing PMGR reg\n", + pd_np); + + pmgr_np = of_get_parent(pd_np); + if (!pmgr_np) + return dev_err_probe(dev, -EINVAL, + "%pOF has no PMGR parent\n", pd_np); + + regmap = syscon_node_to_regmap(pmgr_np); + of_node_put(pmgr_np); + if (IS_ERR(regmap)) + return dev_err_probe(dev, PTR_ERR(regmap), + "%pOF missing PMGR regmap\n", pd_np); + + ret = regmap_read(regmap, offset, &val); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR read failed\n", pd_np); + + val &= ~(APPLE_PMGR_AUTO_ENABLE | APPLE_PMGR_FLAGS | + APPLE_PMGR_PS_TARGET); + val |= FIELD_PREP(APPLE_PMGR_PS_TARGET, APPLE_PMGR_PS_ACTIVE); + + ret = regmap_write(regmap, offset, val); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR write failed\n", pd_np); + + ret = regmap_read_poll_timeout_atomic(regmap, offset, val, + FIELD_GET(APPLE_PMGR_PS_ACTUAL, + val) == + APPLE_PMGR_PS_ACTIVE, 1, + APPLE_PMGR_PS_SET_TIMEOUT_US); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR active timeout\n", pd_np); + + val &= ~APPLE_PMGR_FLAGS; + val |= APPLE_PMGR_AUTO_ENABLE; + + ret = regmap_write(regmap, offset, val); + if (ret) + return dev_err_probe(dev, ret, + "%pOF PMGR auto-enable failed\n", + pd_np); + + return 0; +} + +static int apple_h9p_pcie_force_power_domains(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + struct device_node *pd_np; + int count; + int i; + int ret; + + count = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (count <= 0) + return 0; + + for (i = 0; i < count; i++) { + pd_np = of_parse_phandle(dev->of_node, "power-domains", i); + if (!pd_np) + return dev_err_probe(dev, -EINVAL, + "missing power-domain %d\n", i); + + ret = apple_h9p_pcie_force_power_domain(pcie, pd_np); + of_node_put(pd_np); + if (ret) + return ret; + } + + return 0; +} + +static int apple_h9p_pcie_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + struct pci_config_window *cfg = bus->sysdata; + + if (bus->number == cfg->busr.start && PCI_SLOT(devfn) >= H9P_NUM_PORTS) + return PCIBIOS_DEVICE_NOT_FOUND; + + return pci_generic_config_read(bus, devfn, where, size, val); +} + +static int apple_h9p_pcie_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct pci_config_window *cfg = bus->sysdata; + + if (bus->number == cfg->busr.start && PCI_SLOT(devfn) >= H9P_NUM_PORTS) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (where <= PCI_INTERRUPT_LINE && where + size > PCI_INTERRUPT_LINE) + val |= 0xffu << ((PCI_INTERRUPT_LINE - where) << 3); + + return pci_generic_config_write(bus, devfn, where, size, val); +} + +static unsigned int apple_h9p_pcie_bus_to_port(struct apple_h9p_pcie *pcie, + unsigned int bus) +{ + unsigned int port; + + for (port = 0; port < H9P_NUM_PORTS; port++) { + u32 cfg, sec, sub; + + cfg = readl(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + PCI_PRIMARY_BUS); + sec = (cfg >> 8) & 0xff; + sub = (cfg >> 16) & 0xff; + + if (!sec || !sub || sec == 0xff || sub == 0xff) + continue; + if (bus >= sec && bus <= sub) + return port; + } + + return H9P_NUM_PORTS; +} + +static int apple_h9p_pcie_device_port(struct apple_h9p_pcie *pcie, + struct device *dev) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) + return -ENODEV; + + pdev = to_pci_dev(dev); + if (!pdev->bus) + return -ENODEV; + + return apple_h9p_pcie_bus_to_port(pcie, pdev->bus->number); +} + +static void apple_h9p_msi_compose_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie) { + memset(msg, 0, sizeof(*msg)); + return; + } + + msg->address_lo = lower_32_bits(pcie->msi_doorbell); + msg->address_hi = upper_32_bits(pcie->msi_doorbell); + msg->data = d->hwirq; +} + +static void apple_h9p_msi_write_msg(struct irq_data *d, struct msi_msg *msg) +{ + pci_write_msi_msg(d->irq, msg); +} + +static int apple_h9p_msi_set_affinity(struct irq_data *d, + const struct cpumask *mask, bool force) +{ + return -EINVAL; +} + +static void apple_h9p_msi_mask(struct irq_data *d) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie || d->hwirq >= H9P_NUM_MSI || pcie->msi[d->hwirq].virq <= 0) + return; + + if (!pcie->msi[d->hwirq].disabled) { + disable_irq_nosync(pcie->msi[d->hwirq].virq); + pcie->msi[d->hwirq].disabled = true; + } +} + +static void apple_h9p_msi_unmask(struct irq_data *d) +{ + struct apple_h9p_pcie *pcie = irq_data_get_irq_chip_data(d); + + if (!pcie || d->hwirq >= H9P_NUM_MSI || pcie->msi[d->hwirq].virq <= 0) + return; + + if (pcie->msi[d->hwirq].disabled) { + enable_irq(pcie->msi[d->hwirq].virq); + pcie->msi[d->hwirq].disabled = false; + } +} + +static void apple_h9p_msi_ack(struct irq_data *d) +{ +} + +static struct irq_chip apple_h9p_msi_chip = { + .name = "Apple H9P PCIe MSI", + .irq_ack = apple_h9p_msi_ack, + .irq_mask = apple_h9p_msi_mask, + .irq_unmask = apple_h9p_msi_unmask, + .irq_compose_msi_msg = apple_h9p_msi_compose_msg, + .irq_write_msi_msg = apple_h9p_msi_write_msg, + .irq_set_affinity = apple_h9p_msi_set_affinity, +}; + +static void apple_h9p_msi_isr(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct apple_h9p_msi *msi = irq_desc_get_handler_data(desc); + struct apple_h9p_pcie *pcie = msi->pcie; + unsigned int idx = msi - pcie->msi; + unsigned int virq; + + chained_irq_enter(chip, desc); + virq = irq_find_mapping(pcie->irq_dom, idx); + if (virq) + generic_handle_irq(virq); + chained_irq_exit(chip, desc); +} + +static int apple_h9p_msi_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct apple_h9p_pcie *pcie = domain->host_data; + msi_alloc_info_t *info = args; + struct msi_desc *desc = info ? info->desc : NULL; + struct pci_dev *pdev = NULL; + unsigned long flags; + unsigned int bus = 0; + unsigned int port; + int slot; + + if (nr_irqs != 1) + return -ENOSPC; + + if (desc && desc->dev && dev_is_pci(desc->dev)) { + pdev = to_pci_dev(desc->dev); + if (pdev->bus) + bus = pdev->bus->number; + } + + if (bus < 1) + return -ENOSPC; + + port = apple_h9p_pcie_bus_to_port(pcie, bus); + if (port >= H9P_NUM_PORTS) + return -ENOSPC; + if (!(pcie->enabled_ports & BIT(port))) + return -ENOSPC; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + slot = find_first_zero_bit(pcie->used_msi[port], H9P_MSI_PER_PORT); + if (slot >= H9P_MSI_PER_PORT) { + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + return -ENOSPC; + } + __set_bit(slot, pcie->used_msi[port]); + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); + + irq_domain_set_info(domain, virq, port * H9P_MSI_PER_PORT + slot, + &apple_h9p_msi_chip, pcie, handle_edge_irq, + NULL, NULL); + return 0; +} + +static void apple_h9p_msi_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct apple_h9p_pcie *pcie = d ? irq_data_get_irq_chip_data(d) : NULL; + unsigned long flags; + unsigned int i; + + if (!pcie || !d) + return; + + spin_lock_irqsave(&pcie->used_msi_lock, flags); + for (i = 0; i < nr_irqs; i++) { + unsigned long hwirq = d->hwirq + i; + unsigned int port = hwirq / H9P_MSI_PER_PORT; + unsigned int slot = hwirq % H9P_MSI_PER_PORT; + + if (port < H9P_NUM_PORTS) + __clear_bit(slot, pcie->used_msi[port]); + } + spin_unlock_irqrestore(&pcie->used_msi_lock, flags); +} + +static const struct irq_domain_ops apple_h9p_msi_domain_ops = { + .alloc = apple_h9p_msi_alloc, + .free = apple_h9p_msi_free, +}; + +static struct irq_chip apple_h9p_msi_parent_chip = { + .name = "Apple H9P PCIe MSI parent", + .irq_ack = irq_chip_ack_parent, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_write_msi_msg = apple_h9p_msi_write_msg, +}; + +static struct msi_domain_info apple_h9p_msi_domain_info = { + .flags = MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX | + MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSI_MASK_PARENT, + .chip = &apple_h9p_msi_parent_chip, +}; + +static void apple_h9p_pcie_msi_cleanup(void *data) +{ + struct apple_h9p_pcie *pcie = data; + unsigned int i; + + for (i = 0; i < H9P_NUM_MSI; i++) { + if (pcie->msi[i].virq <= 0) + continue; + + irq_set_chained_handler_and_data(pcie->msi[i].virq, NULL, + NULL); + if (pcie->msi[i].disabled) { + enable_irq(pcie->msi[i].virq); + pcie->msi[i].disabled = false; + } + } + + if (pcie->msi_dom) { + irq_domain_remove(pcie->msi_dom); + pcie->msi_dom = NULL; + } + + if (pcie->irq_dom) { + irq_domain_remove(pcie->irq_dom); + pcie->irq_dom = NULL; + } +} + +static int apple_h9p_pcie_setup_msi(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + struct fwnode_handle *fwnode = dev_fwnode(dev); + unsigned int i; + int ret; + + pcie->irq_dom = irq_domain_create_linear(fwnode, H9P_NUM_MSI, + &apple_h9p_msi_domain_ops, + pcie); + if (!pcie->irq_dom) + return -ENOMEM; + + pcie->msi_dom = msi_create_irq_domain(fwnode, + &apple_h9p_msi_domain_info, + pcie->irq_dom); + if (!pcie->msi_dom) { + irq_domain_remove(pcie->irq_dom); + pcie->irq_dom = NULL; + return -ENOMEM; + } + + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_msi_cleanup, + pcie); + if (ret) + return ret; + + for (i = 0; i < H9P_NUM_MSI; i++) { + int irq = platform_get_irq(pcie->pdev, H9P_NUM_PORTS + i); + + if (irq < 0) + return irq; + + pcie->msi[i].pcie = pcie; + pcie->msi[i].virq = irq; + irq_set_chained_handler_and_data(irq, apple_h9p_msi_isr, + &pcie->msi[i]); + disable_irq(irq); + pcie->msi[i].disabled = true; + } + + return 0; +} + +static u64 apple_h9p_read_pci_cap(struct apple_h9p_pcie *pcie, + unsigned int busdevfn, u32 type) +{ + void __iomem *cfg = pcie->base_config + (busdevfn << 12); + u32 ptr = readl(cfg + PCI_CAPABILITY_LIST) & 0xff; + + while (ptr) { + u32 next = readl(cfg + ptr); + + if ((next & 0xff) == type) + return ptr; + ptr = (next >> 8) & 0xff; + } + + return 0; +} + +static int apple_h9p_wait(void __iomem *addr, u32 mask, u32 min, u32 max, + unsigned long timeout_us) +{ + u32 val; + + return readl_poll_timeout(addr, val, (val & mask) >= min && + (val & mask) <= max, 1000, timeout_us); +} + +static int apple_h9p_wait_gpio(struct gpio_desc *desc, int value, + unsigned long timeout_us) +{ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); + + do { + if (gpiod_get_raw_value(desc) == value) + return 0; + usleep_range(1000, 2000); + } while (ktime_before(ktime_get(), timeout)); + + return -ETIMEDOUT; +} + +static irqreturn_t apple_h9p_nvmmu_irq(int irq, void *data) +{ + struct apple_h9p_nvmmu *nvmmu = data; + struct apple_h9p_pcie *pcie = nvmmu->pcie; + unsigned int port = nvmmu - pcie->nvmmu; + + dev_err_ratelimited(pcie->dev, "port %u NVMMU fault interrupt\n", port); + return IRQ_HANDLED; +} + +static int apple_h9p_setup_nvmmu_port(struct apple_h9p_pcie *pcie, + unsigned int port) +{ + struct apple_h9p_nvmmu *nvmmu = &pcie->nvmmu[port]; + struct device *dev = pcie->dev; + struct device_node *mem_np; + struct resource res; + u32 iova; + int irq; + int ret; + + if (!nvmmu->base) + return 0; + + mem_np = of_parse_phandle(dev->of_node, "memory-region", port); + if (!mem_np) + return dev_err_probe(dev, -EINVAL, + "port %u NVMMU missing memory-region\n", + port); + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_node; + + ret = of_property_read_u32(dev->of_node, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_node; + + if (resource_size(&res) < H9P_NVMMU_SART_ALIGNMENT || + !IS_ALIGNED(res.start, H9P_NVMMU_SART_ALIGNMENT) || + !IS_ALIGNED(iova, H9P_NVMMU_SART_ALIGNMENT)) { + ret = -EINVAL; + goto out_put_node; + } + + nvmmu->pcie = pcie; + nvmmu->pa_base = res.start; + nvmmu->va_base = iova; + nvmmu->size = resource_size(&res); + nvmmu->tcb_size = round_up(APPLE_H9P_NVMMU_MAX_REQS * + H9P_NVMMU_TCB_BYTES, PAGE_SIZE); + nvmmu->tcb_table_size = PAGE_SIZE * 16; + nvmmu->tcb_sgl_size = round_up(APPLE_H9P_NVMMU_MAX_REQS * + H9P_NVMMU_SGL_WORDS * sizeof(u32), + PAGE_SIZE); + + nvmmu->tcb = dmam_alloc_attrs(dev, nvmmu->tcb_size, &nvmmu->tcb_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb) { + ret = -ENOMEM; + goto out_put_node; + } + + nvmmu->tcb_table = dmam_alloc_attrs(dev, nvmmu->tcb_table_size, + &nvmmu->tcb_table_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb_table) { + ret = -ENOMEM; + goto out_put_node; + } + + nvmmu->tcb_sgl = dmam_alloc_attrs(dev, nvmmu->tcb_sgl_size, + &nvmmu->tcb_sgl_dma, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); + if (!nvmmu->tcb_sgl) { + ret = -ENOMEM; + goto out_put_node; + } + + h9p_writel_flush(lower_32_bits(nvmmu->tcb_dma), + nvmmu->base + H9P_NVMMU_TCB_BASE_LO); + h9p_writel_flush(upper_32_bits(nvmmu->tcb_dma), + nvmmu->base + H9P_NVMMU_TCB_BASE_HI); + h9p_writel_flush(lower_32_bits(nvmmu->tcb_table_dma), + nvmmu->base + H9P_NVMMU_TCB_TABLE_LO); + h9p_writel_flush(upper_32_bits(nvmmu->tcb_table_dma), + nvmmu->base + H9P_NVMMU_TCB_TABLE_HI); + h9p_writel_flush(0x10000, nvmmu->base + H9P_NVMMU_TCB_CTRL); + + ret = apple_h9p_wait(nvmmu->base + H9P_NVMMU_TCB_CTRL, 0x10, 0, 0, + 250000); + if (ret) + goto out_put_node; + + h9p_writel_flush(nvmmu->va_base - 0x80000000U, + nvmmu->base + H9P_NVMMU_SART_VA_BASE); + h9p_writel_flush(round_up(nvmmu->va_base + nvmmu->size, + H9P_NVMMU_SART_ALIGNMENT) - 0x80100000U, + nvmmu->base + H9P_NVMMU_SART_VA_END); + h9p_writel_flush(nvmmu->pa_base >> 20, + nvmmu->base + H9P_NVMMU_SART_PA_BASE); + h9p_writel_flush(1, nvmmu->base + H9P_NVMMU_SART_CTRL); + + irq = platform_get_irq_optional(pcie->pdev, H9P_NUM_PORTS + + H9P_NUM_MSI + port); + if (irq > 0) { + ret = devm_request_irq(dev, irq, apple_h9p_nvmmu_irq, 0, + dev_name(dev), nvmmu); + if (ret) + goto out_put_node; + } else if (irq != -ENXIO) { + ret = irq; + goto out_put_node; + } + + dev_dbg(dev, "port %u NVMMU window %#x@%pa size %#x\n", port, + nvmmu->va_base, &res.start, nvmmu->size); + +out_put_node: + of_node_put(mem_np); + return ret; +} + +static int apple_h9p_setup_nvmmu(struct apple_h9p_pcie *pcie) +{ + unsigned int port; + int ret; + + for (port = 0; port < H9P_NUM_PORTS; port++) { + if (!(pcie->enabled_ports & BIT(port))) + continue; + + ret = apple_h9p_setup_nvmmu_port(pcie, port); + if (ret) + return dev_err_probe(pcie->dev, ret, + "port %u NVMMU setup failed\n", + port); + } + + return 0; +} + +int apple_h9p_pcie_map_nvmmu(struct device *dev, unsigned int tag, + const u64 *pages, unsigned int npages, + dma_addr_t *iova) +{ + struct apple_h9p_nvmmu *nvmmu; + struct apple_h9p_pcie *pcie; + struct device *host_dev = dev; + unsigned int port; + unsigned int i; + u64 sgl_dma; + u32 *tcb; + u32 *sgl; + int ret; + + if (tag >= APPLE_H9P_NVMMU_MAX_REQS || + npages > APPLE_H9P_NVMMU_MAX_PAGES) + return -EINVAL; + if (npages && !pages) + return -EINVAL; + + while (host_dev && host_dev->bus == dev->bus) + host_dev = host_dev->parent; + if (!host_dev || !host_dev->parent) + return -ENODEV; + + pcie = apple_h9p_pcie_lookup(host_dev->parent); + if (!pcie) + return -ENODEV; + + ret = apple_h9p_pcie_device_port(pcie, dev); + if (ret < 0) + return ret; + port = ret; + if (port >= H9P_NUM_PORTS || !(pcie->enabled_ports & BIT(port))) + return -ENODEV; + + nvmmu = &pcie->nvmmu[port]; + if (!nvmmu->base || !nvmmu->tcb || !nvmmu->tcb_sgl) + return -EOPNOTSUPP; + + tcb = (u32 *)nvmmu->tcb + tag * H9P_NVMMU_TCB_DWORDS; + sgl = (u32 *)nvmmu->tcb_sgl + tag * H9P_NVMMU_SGL_WORDS; + memset(tcb, 0, H9P_NVMMU_TCB_BYTES); + memset(sgl, 0, H9P_NVMMU_SGL_WORDS * sizeof(*sgl)); + + if (npages) { + tcb[0] = H9P_NVMMU_TCB_READ | H9P_NVMMU_TCB_WRITE; + tcb[1] = npages; + tcb[2] = pages[0] >> ilog2(APPLE_H9P_NVMMU_PAGE_SIZE); + for (i = 0; i < npages; i++) + sgl[i] = pages[i] >> ilog2(APPLE_H9P_NVMMU_PAGE_SIZE); + + sgl_dma = nvmmu->tcb_sgl_dma + + tag * H9P_NVMMU_SGL_WORDS * sizeof(*sgl); + memcpy(&tcb[4], &sgl_dma, sizeof(sgl_dma)); + if (iova) + *iova = H9P_NVMMU_FLATDMA_BASE + + tag * H9P_NVMMU_FLATDMA_STRIDE; + } else { + dma_wmb(); + h9p_writel_flush(tag, nvmmu->base + H9P_NVMMU_TCB_CTRL); + if (iova) + *iova = 0; + return 0; + } + + dma_wmb(); + return 0; +} +EXPORT_SYMBOL_GPL(apple_h9p_pcie_map_nvmmu); + +static void apple_h9p_apply_tunables(void __iomem *base, + const struct apple_h9p_tunable *tunables, + unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + u64 val = h9p_readsz(base + tunables[i].offset, tunables[i].size); + + if ((val & tunables[i].mask) == tunables[i].data) + continue; + val &= ~tunables[i].mask; + val |= tunables[i].data; + h9p_writesz(val, base + tunables[i].offset, tunables[i].size); + } +} + +static int apple_h9p_pcieclk_postup(struct apple_h9p_pcie *pcie) +{ + if (!pcie->base_pcieclk_postup) + return 0; + + writel(0x00000007, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP0); + writel(0x80010005, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP1); + writel(0x00000003, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP2); + writel(0x00000003, pcie->base_pcieclk_postup + H9P_PCIECLK_POSTUP3); + + return 0; +} + +static bool apple_h9p_link_up(struct apple_h9p_pcie *pcie, unsigned int port) +{ + u32 linksts = readl(pcie->base_port[port] + H9P_PORT_LINKSTS); + + linksts = (linksts >> 8) & 0x3f; + return linksts >= 0x11 && linksts <= 0x14; +} + +static int apple_h9p_setup_port(struct apple_h9p_pcie *pcie, unsigned int port) +{ + struct device *dev = pcie->dev; + u64 cap; + int ret; + + if (apple_h9p_link_up(pcie, port)) + return 0; + + gpiod_direction_output(pcie->perst[port], 0); + + h9p_rmw(pcie->base_phy[0] + 0x134 + 0x80 * port, 1, 0); + h9p_rmw(pcie->base_phy[0] + 0x124 + 0x80 * port, 0, 1); + + ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 0x10, 0x10, 0x10, + 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u init timeout\n", port); + + usleep_range(250, 1000); + h9p_rmw(pcie->base_phy[0] + 0x100 + 0x80 * port, 0, 1); + h9p_rmw(pcie->base_phy[0] + 0x100 + 0x80 * port, 0x100, 0); + usleep_range(500, 1000); + h9p_rmw(pcie->base_phy[0] + 0x134 + 0x80 * port, 0, 1); + + writel(port ? 0 : H9P_LINK_SPEED_8GT, + pcie->base_phy[0] + 0x4020 + 0x40 * port); + h9p_rmw(pcie->base_phy[0] + 0x124 + 0x80 * port, 0x100, 0); + + cap = apple_h9p_read_pci_cap(pcie, port << 3, PCI_CAP_ID_EXP); + if (cap) + h9p_rmww(pcie->base_config + port * H9P_CFG_PORT_STRIDE + + cap + PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS, + port ? H9P_LINK_SPEED_2_5GT : H9P_LINK_SPEED_8GT); + + apple_h9p_apply_tunables(pcie->base_config + port * H9P_CFG_PORT_STRIDE, + h9p_config_tunables, + ARRAY_SIZE(h9p_config_tunables)); + apple_h9p_apply_tunables(pcie->base_port[port], h9p_port_tunables, + ARRAY_SIZE(h9p_port_tunables)); + + h9p_rmw(pcie->base_config + port * H9P_CFG_PORT_STRIDE + 0x8e0, + 0, 1); + + writel(0xff002fff, pcie->base_port[port] + H9P_PORT_IRQMASK); + writel(0x00ffd000, pcie->base_port[port] + H9P_PORT_IRQSTAT); + + h9p_rmw(pcie->base_port[port] + H9P_PORT_ENABLE, 0, 0x80000000); + writel(0x31, pcie->base_port[port] + 0x124); + writel(port * 0x10001 * H9P_MSI_PER_PORT, + pcie->base_port[port] + H9P_PORT_MSIVECBASE); + + usleep_range(250, 1000); + ret = apple_h9p_wait_gpio(pcie->clkreq[port], 0, 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u CLKREQ# timeout\n", + port); + + gpiod_direction_output(pcie->perst[port], 1); + usleep_range(250, 1000); + + ret = apple_h9p_wait(pcie->base_phy[1] + H9P_PHY1_PORTMASK, + BIT(port), BIT(port), BIT(port), 250000); + if (ret) + return dev_err_probe(dev, ret, "port %u PHY up timeout\n", + port); + + h9p_rmw(pcie->base_phy[2] + 0x180, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + 0x184, 0, 0x4000); + h9p_rmw(pcie->base_phy[2] + 0x90, 0xfff, 100); + h9p_rmw(pcie->base_phy[2] + 0x98, 0xfff, 25); + h9p_rmw(pcie->base_phy[2] + 0x10088 + 0x800 * port, 0, 0x4000); + writel(0, pcie->base_phy[2] + 0x10784 + 0x800 * port); + h9p_rmw(pcie->base_phy[2] + 0x10004 + 0x800 * port, 0xfff, 0x600); + writel(0x3105, pcie->base_phy[2] + 0x20788 + 0x800 * port); + h9p_rmw(pcie->base_phy[2] + 0x207a0 + 0x800 * port, 0xff, 0x9f); + h9p_rmw(pcie->base_phy[2] + 0x207a8 + 0x800 * port, 0xff, 0x01); + h9p_rmw(pcie->base_phy[2] + 0x20400 + 0x800 * port, 0x1f, 0x0a); + writel(175, pcie->base_phy[2] + 0x2009c + 0x800 * port); + writel(175, pcie->base_phy[2] + 0x200dc + 0x800 * port); + writel(333, pcie->base_phy[2] + 0x200a0 + 0x800 * port); + writel(333, pcie->base_phy[2] + 0x200e0 + 0x800 * port); + writel(530, pcie->base_phy[2] + 0x200a4 + 0x800 * port); + writel(530, pcie->base_phy[2] + 0x200e4 + 0x800 * port); + writel(0, pcie->base_phy[2] + 0x20330 + 0x800 * port); + writel(0, pcie->base_phy[2] + 0x20340 + 0x800 * port); + writel(0, pcie->base_phy[2] + 0x20350 + 0x800 * port); + + writel(0xff002f0f, pcie->base_port[port] + H9P_PORT_IRQMASK); + usleep_range(5000, 10000); + + h9p_rmw(pcie->base_port[port] + H9P_PORT_LTSSMCTL, 0, 1); + ret = apple_h9p_wait(pcie->base_port[port] + H9P_PORT_LINKSTS, + 0x3f00, 0x1100, 0x1400, 500000); + if (ret) + dev_warn(dev, "port %u link did not reach L0\n", port); + + return 0; +} + +static int apple_h9p_setup_ports(struct apple_h9p_pcie *pcie) +{ + unsigned int port; + int ret; + + writel(0x10, pcie->base_phy[0] + 0x0004); + h9p_rmw(pcie->base_phy[0] + 0x124, 0, 1); + + ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 0x10, 0x10, 0x10, + 250000); + if (ret) + return dev_err_probe(pcie->dev, ret, + "global PHY init timeout\n"); + + ret = apple_h9p_wait(pcie->base_phy[0] + 0x28, 1, 1, 1, 250000); + if (ret) + return dev_err_probe(pcie->dev, ret, + "global PHY ready timeout\n"); + + writel(1, pcie->base_phy[0] + 0x34); + apple_h9p_apply_tunables(pcie->base_phy[0], h9p_phy0_tunables, + ARRAY_SIZE(h9p_phy0_tunables)); + writel(1, pcie->base_phy[0] + 0x14); + usleep_range(5000, 10000); + writel(1, pcie->base_phy[0] + 0x24); + usleep_range(500, 1000); + + for (port = 0; port < H9P_NUM_PORTS; port++) { + if (!(pcie->enabled_ports & BIT(port))) + continue; + + ret = apple_h9p_setup_port(pcie, port); + if (ret) + return ret; + } + + return 0; +} + +static int apple_h9p_pcie_init(struct pci_config_window *cfg) +{ + struct apple_h9p_pcie *pcie = apple_h9p_pcie_lookup(cfg->parent); + int ret; + + if (!pcie) + return -ENODEV; + + pcie->cfgwin = cfg; + pcie->base_config = cfg->win; + + ret = apple_h9p_pcieclk_postup(pcie); + if (ret) + return ret; + + ret = apple_h9p_setup_ports(pcie); + if (ret) + return ret; + + ret = apple_h9p_setup_nvmmu(pcie); + return ret; +} + +static const struct pci_ecam_ops apple_h9p_pcie_ecam_ops = { + .bus_shift = 20, + .init = apple_h9p_pcie_init, + .pci_ops = { + .map_bus = pci_ecam_map_bus, + .read = apple_h9p_pcie_config_read, + .write = apple_h9p_pcie_config_write, + }, +}; + +static int apple_h9p_pcie_map_resources(struct platform_device *pdev, + struct apple_h9p_pcie *pcie) +{ + struct device *dev = &pdev->dev; + unsigned int i; + + for (i = 0; i < 3; i++) { + char name[8]; + + snprintf(name, sizeof(name), "phy%u", i); + pcie->base_phy[i] = devm_platform_ioremap_resource_byname(pdev, + name); + if (IS_ERR(pcie->base_phy[i])) + return PTR_ERR(pcie->base_phy[i]); + } + + for (i = 0; i < H9P_NUM_PORTS; i++) { + char name[8]; + struct resource *res; + + snprintf(name, sizeof(name), "port%u", i); + pcie->base_port[i] = devm_platform_ioremap_resource_byname(pdev, + name); + if (IS_ERR(pcie->base_port[i])) + return PTR_ERR(pcie->base_port[i]); + + snprintf(name, sizeof(name), "nvmmu%u", i); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (res) { + pcie->nvmmu[i].base = devm_ioremap_resource(dev, res); + if (IS_ERR(pcie->nvmmu[i].base)) + return PTR_ERR(pcie->nvmmu[i].base); + } + } + + pcie->base_pcieclk_postup = + devm_platform_ioremap_resource_byname(pdev, "pcieclk-postup"); + if (IS_ERR(pcie->base_pcieclk_postup)) { + if (PTR_ERR(pcie->base_pcieclk_postup) == -EINVAL) + pcie->base_pcieclk_postup = NULL; + else + return dev_err_probe(dev, + PTR_ERR(pcie->base_pcieclk_postup), + "failed to map pcieclk post-up\n"); + } + + return 0; +} + +static int apple_h9p_pcie_get_gpios(struct apple_h9p_pcie *pcie) +{ + struct device *dev = pcie->dev; + unsigned int i; + + for (i = 0; i < H9P_NUM_PORTS; i++) { + if (!(pcie->enabled_ports & BIT(i))) + continue; + + pcie->perst[i] = devm_gpiod_get_index(dev, "reset", i, + GPIOD_OUT_LOW); + if (IS_ERR(pcie->perst[i])) + return dev_err_probe(dev, PTR_ERR(pcie->perst[i]), + "failed to get PERST#%u\n", i); + + pcie->clkreq[i] = devm_gpiod_get_index(dev, "clkreq", i, + GPIOD_IN); + if (IS_ERR(pcie->clkreq[i])) + return dev_err_probe(dev, PTR_ERR(pcie->clkreq[i]), + "failed to get CLKREQ#%u\n", i); + } + + pcie->devpwr = devm_gpiod_get_array_optional(dev, "devpwr", GPIOD_ASIS); + if (IS_ERR(pcie->devpwr)) + return dev_err_probe(dev, PTR_ERR(pcie->devpwr), + "failed to get device power GPIOs\n"); + + return 0; +} + +static int apple_h9p_pcie_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct pci_host_bridge *bridge; + struct apple_h9p_pcie *pcie; + int ret; + + bridge = devm_pci_alloc_host_bridge(dev, sizeof(*pcie)); + if (!bridge) + return -ENOMEM; + + pcie = pci_host_bridge_priv(bridge); + pcie->dev = dev; + pcie->pdev = pdev; + pcie->bridge = bridge; + spin_lock_init(&pcie->used_msi_lock); + + ret = of_property_read_u32(dev->of_node, "apple,enabled-ports", + &pcie->enabled_ports); + if (ret) + pcie->enabled_ports = BIT(0); + pcie->enabled_ports &= GENMASK(H9P_NUM_PORTS - 1, 0); + if (!pcie->enabled_ports) + return dev_err_probe(dev, -EINVAL, "no enabled ports\n"); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret) + return dev_err_probe(dev, ret, "failed to set DMA mask\n"); + + ret = apple_h9p_pcie_attach_genpd(pcie); + if (ret) + return dev_err_probe(dev, ret, "failed to attach power domains\n"); + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_genpd_cleanup, pcie); + if (ret) + return ret; + + pcie->clks[0].id = "core"; + pcie->clks[1].id = "aux"; + pcie->clks[2].id = "ref"; + ret = devm_clk_bulk_get(dev, ARRAY_SIZE(pcie->clks), pcie->clks); + if (ret) + return dev_err_probe(dev, ret, "failed to get clocks\n"); + + ret = clk_bulk_prepare_enable(ARRAY_SIZE(pcie->clks), pcie->clks); + if (ret) + return dev_err_probe(dev, ret, "failed to enable clocks\n"); + ret = devm_add_action_or_reset(dev, apple_h9p_pcie_clk_cleanup, pcie); + if (ret) + return ret; + + ret = apple_h9p_pcie_force_power_domains(pcie); + if (ret) + return ret; + + pcie->pinctrl = devm_pinctrl_get_select_default(dev); + if (PTR_ERR(pcie->pinctrl) == -ENODEV) + pcie->pinctrl = NULL; + else if (IS_ERR(pcie->pinctrl)) + return dev_err_probe(dev, PTR_ERR(pcie->pinctrl), + "failed to select pinctrl state\n"); + + ret = apple_h9p_pcie_map_resources(pdev, pcie); + if (ret) + return ret; + + ret = apple_h9p_pcie_get_gpios(pcie); + if (ret) + return ret; + + ret = of_property_read_u64(dev->of_node, "apple,msi-doorbell", + &pcie->msi_doorbell); + if (ret) + pcie->msi_doorbell = H9P_DEFAULT_MSI_DOORBELL; + + ret = apple_h9p_pcie_setup_msi(pcie); + if (ret) + return dev_err_probe(dev, ret, "failed to set up MSI\n"); + + return pci_host_common_init(pdev, bridge, &apple_h9p_pcie_ecam_ops); +} + +static const struct of_device_id apple_h9p_pcie_of_match[] = { + { .compatible = "apple,t8010-pcie" }, + { } +}; +MODULE_DEVICE_TABLE(of, apple_h9p_pcie_of_match); + +static struct platform_driver apple_h9p_pcie_driver = { + .probe = apple_h9p_pcie_probe, + .driver = { + .name = "pcie-apple-h9p", + .of_match_table = apple_h9p_pcie_of_match, + .suppress_bind_attrs = true, + }, +}; +module_platform_driver(apple_h9p_pcie_driver); + +MODULE_DESCRIPTION("Apple H9P/T8010 PCIe host bridge driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/apple-h9p-pcie.h b/include/linux/apple-h9p-pcie.h new file mode 100644 index 00000000000000..c29219c281b208 --- /dev/null +++ b/include/linux/apple-h9p-pcie.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_APPLE_H9P_PCIE_H +#define _LINUX_APPLE_H9P_PCIE_H + +#include +#include +#include + +struct device; + +#define APPLE_H9P_NVMMU_MAX_REQS 36 +#define APPLE_H9P_NVMMU_MAX_PAGES 256 +#define APPLE_H9P_NVMMU_PAGE_SIZE 4096 + +#if IS_REACHABLE(CONFIG_PCIE_APPLE_H9P) +int apple_h9p_pcie_map_nvmmu(struct device *dev, unsigned int tag, + const u64 *pages, unsigned int npages, + dma_addr_t *iova); +#else +static inline int apple_h9p_pcie_map_nvmmu(struct device *dev, + unsigned int tag, const u64 *pages, + unsigned int npages, dma_addr_t *iova) +{ + return -EOPNOTSUPP; +} +#endif + +#endif /* _LINUX_APPLE_H9P_PCIE_H */ From 60287da0389f466209a01a7f58fa7a2a278b27fe Mon Sep 17 00:00:00 2001 From: Pauli1Go <101004482+Pauli1Go@users.noreply.github.com> Date: Sun, 28 Jun 2026 23:34:21 +0200 Subject: [PATCH 2/2] apple: t8010: use upstream DART for H9P NVMe Drop the temporary T8010 DART driver. The Hoolock base now has S5L8960X/T8010 support in apple-dart, so the iPad7 PCIe DART can use the upstream compatible fallback plus apple,dma-range. Move the Apple H9P FlatDMA helpers out of the main nvme-pci body into a private include file. The remaining pci.c changes are limited to the H9P hooks that still need private nvme-pci access. Signed-off-by: Pauli1Go <101004482+Pauli1Go@users.noreply.github.com> --- arch/arm64/boot/dts/apple/t8010-ipad7.dtsi | 4 +- drivers/iommu/Kconfig | 11 - drivers/iommu/Makefile | 1 - drivers/iommu/apple-t8010-dart.c | 680 --------------------- drivers/nvme/host/pci-apple-h9p.c | 364 +++++++++++ drivers/nvme/host/pci.c | 357 +---------- 6 files changed, 368 insertions(+), 1049 deletions(-) delete mode 100644 drivers/iommu/apple-t8010-dart.c create mode 100644 drivers/nvme/host/pci-apple-h9p.c diff --git a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi index bb89ca3e5df7b9..882fec4e69b5f3 100644 --- a/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi +++ b/arch/arm64/boot/dts/apple/t8010-ipad7.dtsi @@ -27,13 +27,13 @@ soc { pcie0_dart0: iommu@601008000 { - compatible = "apple,t8010-dart"; + compatible = "apple,t8010-dart", "apple,s5l8960x-dart"; reg = <0x6 0x01008000 0x0 0x4000>; #iommu-cells = <1>; interrupt-parent = <&aic>; interrupts = ; power-domains = <&ps_pcie>; - pcie-dart; + apple,dma-range = <0x0 0x80000000 0x0 0x3c000000>; }; pcie0: pcie@610000000 { diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index a2ba9de4375878..f86262b11416d1 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -315,17 +315,6 @@ config APPLE_DART Say Y here if you are using an Apple SoC. -config APPLE_T8010_DART - tristate "Apple T8010 legacy DART IOMMU support" - depends on ARCH_APPLE || COMPILE_TEST - select IOMMU_API - help - Support for the older DART layout used by Apple A10/T8010 PCIe. - This covers the pre-M1 register layout where TTBRs start at 0x40 - and the PCIe aperture is offset into the device IOVA space. - Enable this when bringing up H9P/Apple NVMe storage on T8010 - devices that cannot use the newer apple-dart register layout. - config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index dc0ef895c74c0c..0275821f4ef985 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -36,5 +36,4 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o -obj-$(CONFIG_APPLE_T8010_DART) += apple-t8010-dart.o obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o diff --git a/drivers/iommu/apple-t8010-dart.c b/drivers/iommu/apple-t8010-dart.c deleted file mode 100644 index 9d927a4e46593a..00000000000000 --- a/drivers/iommu/apple-t8010-dart.c +++ /dev/null @@ -1,680 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DART IOMMU on Apple T8010/A10 SoCs - * - * Copyright (C) 2020 Corellium LLC - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DART_TLB_OP 0x0000 -#define DART_TLB_OP_FLUSH 0x00000002 -#define DART_TLB_OP_SID_SHIFT 8 -#define DART_TLB_OP_SID(sid4) (1 << ((sid4) + 8)) -#define DART_TLB_OP_BUSY BIT(3) -#define DART_CONFIG 0x000C -#define DART_CONFIG_TXEN(sid4) (1 << ((sid4) * 8 + 7)) -#define DART_ERROR_STATUS 0x0010 -#define DART_ERROR_AXI_REQ0 0x0014 -#define DART_ERROR_AXI_REQ1 0x0018 -#define DART_ERROR_ADDRESS 0x001C -#define DART_DIAG_CONFIG 0x0020 -#define DART_UNKNOWN_24 0x0024 -#define DART_SID_REMAP 0x0028 -#define DART_UNKNOWN_2C 0x002C -#define DART_FETCH_CONFIG 0x0030 -#define DART_PERF_CONFIG 0x0078 -#define DART_TLB_MISS 0x007C -#define DART_TLB_WAIT 0x0080 -#define DART_TLB_HIT 0x0084 -#define DART_ST_MISS 0x0088 -#define DART_ST_WAIT 0x008C -#define DART_ST_HIT 0x0090 -#define DART_TTBR(sid4, l1idx4) (0x0040 + 16 * (sid4) + 4 * (l1idx4)) -#define DART_TTBR_VALID BIT(31) -#define DART_TTBR_MASK 0x00FFFFFF -#define DART_TLB_STATUS 0x1000 -#define DART_TLB_UNKNOWN(idx) (0x1004 + 4 * (idx)) -#define DART_STT_PA_DATA(idx) (0x2000 + 4 * (idx)) -#define DART_STT_PA_DATA_COUNT 1024 -#define DART_SMMU_TLB_CFG 0x3000 -#define DART_SMMU_TLB_DATA_RD 0x3100 -#define DART_SMMU_TLB_DATA_RD_COUNT 4 -#define DART_DATA_DEBUG_IDX 0x3120 -#define DART_DATA_DEBUG_CNTL 0x3124 -#define DART_DATA_DEBUG_CNTL_READ BIT(0) -#define DART_DATA_DEBUG_CNTL_BUSY BIT(2) -#define DART_TLB_TAG(idx) (0x3800 + 4 * (idx)) -#define DART_TLB_TAG_COUNT 128 - -#define DART_PTE_STATE_MASK 3 -#define DART_PTE_STATE_INVALID 0 -#define DART_PTE_STATE_NEXT 3 -#define DART_PTE_STATE_VALID 3 -#define DART_PTE_ADDR_MASK 0xFFFFFF000ull - -#define DART_NUM_SID 4 -#define DART_PAGE_SHIFT 12 - -#define DART_PAGE_SIZE BIT(DART_PAGE_SHIFT) -#define DART_PAGE_MASK (DART_PAGE_SIZE - 1ul) - -struct apple_t8010_dart_iommu { - struct device *dev; - struct iommu_device iommu; - void __iomem *base; - int is_init; - int is_pcie; - u64 iova_offset; - u64 **l2dma[DART_NUM_SID]; - u64 *l1dma[DART_NUM_SID]; - /* Protects DART page table allocation and register updates. */ - spinlock_t dart_lock; -}; - -struct apple_t8010_dart_iommu_domain { - struct iommu_domain domain; - struct apple_t8010_dart_iommu *iommu; - int sid; -}; - -struct apple_t8010_dart_iommu_devdata { - struct apple_t8010_dart_iommu *iommu; - u32 sid; -}; - -static irqreturn_t apple_t8010_dart_iommu_irq(int irq, void *dev_id) -{ - struct apple_t8010_dart_iommu *im = dev_id; - u32 status, axi_req[2], addr, tlbstat; - - status = readl(im->base + DART_ERROR_STATUS); - tlbstat = readl(im->base + DART_TLB_STATUS); - axi_req[0] = readl(im->base + DART_ERROR_AXI_REQ0); - axi_req[1] = readl(im->base + DART_ERROR_AXI_REQ1); - addr = readl(im->base + DART_ERROR_ADDRESS); - - writel(status, im->base + DART_ERROR_STATUS); - writel(tlbstat, im->base + DART_TLB_STATUS); - - dev_err(im->dev, - "STATUS %08x AXI_REQ %08x:%08x ADDR %08x TLBSTAT %08x\n", - status, axi_req[0], axi_req[1], addr, tlbstat); - - return IRQ_HANDLED; -} - -static void apple_t8010_dart_tlb_flush(struct apple_t8010_dart_iommu *im, - u32 sidmask, int need_lock) -{ - unsigned long flags; - u32 status; - - if (need_lock) - spin_lock_irqsave(&im->dart_lock, flags); - writel(DART_TLB_OP_FLUSH | (sidmask << DART_TLB_OP_SID_SHIFT), - im->base + DART_TLB_OP); - while (1) { - status = readl(im->base + DART_TLB_OP); - if (!(status & DART_TLB_OP_BUSY)) - break; - } - if (need_lock) - spin_unlock_irqrestore(&im->dart_lock, flags); -} - -static u64 *apple_t8010_dart_get_pte(struct apple_t8010_dart_iommu *im, u32 sid, - u64 iova, int optional, - unsigned long *flags) -{ - unsigned int i, l1idx, l1base, l2idx, npgs, npg; - u64 phys, **l1pt, *l1dma, *l2dma; - void *dmava, *ptva; - dma_addr_t dmah; - - if (im->is_pcie) - sid = 0; - - if (!im->l1dma[sid]) { - spin_unlock_irqrestore(&im->dart_lock, *flags); - ptva = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - DART_PAGE_SHIFT + 2 - - PAGE_SHIFT); - dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE * 4, &dmah, - GFP_KERNEL | __GFP_ZERO, - DMA_ATTR_WRITE_COMBINE); - spin_lock_irqsave(&im->dart_lock, *flags); - if (!im->l1dma[sid]) { - if (!ptva || !dmava) { - if (ptva) - free_pages((unsigned long)ptva, - DART_PAGE_SHIFT + 2 - - PAGE_SHIFT); - else - dev_err(im->dev, - "failed to allocate shadow L1 pagetable\n"); - if (dmava) - dma_free_attrs(im->dev, - DART_PAGE_SIZE * 4, - dmava, dmah, - DMA_ATTR_WRITE_COMBINE); - else - dev_err(im->dev, - "failed to allocate uncached L1 pagetable\n"); - return NULL; - } - im->l2dma[sid] = ptva; - im->l1dma[sid] = dmava; - phys = dmah; - for (i = 0; i < 4; i++) - writel((((phys >> DART_PAGE_SHIFT) + i) & - DART_TTBR_MASK) | - DART_TTBR_VALID, - im->base + DART_TTBR(sid, i)); - } else { - if (ptva) - free_pages((unsigned long)ptva, - DART_PAGE_SHIFT + 2 - PAGE_SHIFT); - if (dmava) - dma_free_attrs(im->dev, DART_PAGE_SIZE * 4, - dmava, dmah, - DMA_ATTR_WRITE_COMBINE); - } - } - - l1pt = im->l2dma[sid]; - l1idx = (iova >> 21) & 0x7FF; - - if (!l1pt[l1idx]) { - if (optional) - return NULL; - if (DART_PAGE_SHIFT < PAGE_SHIFT) - npgs = PAGE_SHIFT - DART_PAGE_SHIFT; - else - npgs = 0; - spin_unlock_irqrestore(&im->dart_lock, *flags); - dmava = dma_alloc_attrs(im->dev, DART_PAGE_SIZE << npgs, &dmah, - GFP_KERNEL | __GFP_ZERO, - DMA_ATTR_WRITE_COMBINE); - spin_lock_irqsave(&im->dart_lock, *flags); - if (!l1pt[l1idx]) { - if (!dmava) { - dev_err(im->dev, - "failed to allocate uncached L2 pagetable\n"); - return NULL; - } - npg = 1 << npgs; - phys = dmah; - l1dma = im->l1dma[sid]; - l1base = (l1idx >> npgs) << npgs; - for (i = 0; i < npg; i++) { - l1pt[l1base + i] = - dmava + (i << DART_PAGE_SHIFT); - l1dma[l1base + i] = - ((phys + (i << DART_PAGE_SHIFT)) & - DART_PTE_ADDR_MASK) | - DART_PTE_STATE_NEXT; - } - } else if (dmava) - dma_free_attrs(im->dev, DART_PAGE_SIZE << npgs, dmava, - dmah, DMA_ATTR_WRITE_COMBINE); - } - - l2dma = l1pt[l1idx]; - l2idx = (iova >> 12) & 0x1FF; - return &l2dma[l2idx]; -} - -static void apple_t8010_dart_iommu_enable(struct apple_t8010_dart_iommu *im, - u32 sid) -{ - u32 val; - - val = readl(im->base + DART_CONFIG); - if (val & DART_CONFIG_TXEN(sid)) - return; - writel(val | DART_CONFIG_TXEN(sid), im->base + DART_CONFIG); - if (!(readl(im->base + DART_CONFIG) & DART_CONFIG_TXEN(sid))) - dev_err(im->dev, "failed to enable SID %d: 0x%08x.\n", sid, - readl(im->base + DART_CONFIG)); -} - -static bool apple_t8010_dart_iommu_capable(struct device *dev, - enum iommu_cap cap) -{ - switch (cap) { - case IOMMU_CAP_CACHE_COHERENCY: - return true; - default: - return false; - } -} - -static struct apple_t8010_dart_iommu_domain * -to_apple_t8010_dart_iommu_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct apple_t8010_dart_iommu_domain, domain); -} - -static struct iommu_domain * -apple_t8010_dart_iommu_domain_alloc_paging(struct device *dev) -{ - struct apple_t8010_dart_iommu_domain *idom; - - idom = kzalloc_obj(*idom, GFP_KERNEL); - if (!idom) - return NULL; - - idom->domain.pgsize_bitmap = SZ_4K; - idom->sid = -1; - - return &idom->domain; -} - -static void apple_t8010_dart_iommu_domain_free(struct iommu_domain *domain) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - kfree(idom); -} - -static int apple_t8010_dart_iommu_attach_device(struct iommu_domain *domain, - struct device *dev, - struct iommu_domain *old) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu_devdata *idd; - struct apple_t8010_dart_iommu *im; - unsigned long flags; - u32 sid, i, j; - - idd = dev_iommu_priv_get(dev); - if (!idd) - return -ENODEV; - im = idd->iommu; - - if (idom->iommu && idom->iommu != im) { - dev_err(dev, - "different DART already assigned to IOMMU domain.\n"); - return -EINVAL; - } - - if (!idom->iommu) { - idom->iommu = im; - if (im->is_pcie) { - idom->domain.geometry.aperture_start = 0x80000000ul; - idom->domain.geometry.aperture_end = 0xBBFFFFFFul; - } else { - idom->domain.geometry.aperture_start = 0x00004000ul; - idom->domain.geometry.aperture_end = 0xFFFFFFFFul; - } - idom->domain.geometry.force_aperture = true; - } - - sid = im->is_pcie ? 0 : idd->sid; - if (idom->sid >= 0 && idom->sid != sid) { - dev_err(dev, - "multiple SIDs mapped to the same IOMMU domain.\n"); - return -EEXIST; - } - idom->sid = sid; - - spin_lock_irqsave(&im->dart_lock, flags); - - if (!im->is_init) { - im->is_init = 1; - writel(0x0020FFFC, im->base + DART_UNKNOWN_24); - writel(0x00000000, im->base + DART_UNKNOWN_2C); - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - writel(0x00000000, im->base + DART_TTBR(i, j)); - writel(0x000E0303, im->base + DART_FETCH_CONFIG); - writel(0x00000100, im->base + DART_DIAG_CONFIG); - for (i = 0; i < 6; i++) - writel(0x00000000, im->base + DART_TLB_UNKNOWN(i)); - writel(0x03F3FFFF, im->base + DART_TLB_STATUS); - - apple_t8010_dart_tlb_flush(im, 15, 0); - } - apple_t8010_dart_iommu_enable(im, sid); - - spin_unlock_irqrestore(&im->dart_lock, flags); - - return 0; -} - -static int apple_t8010_dart_iommu_map_pages(struct iommu_domain *domain, - unsigned long iova, - phys_addr_t paddr, size_t pgsize, - size_t pgcount, int prot, gfp_t gfp, - size_t *mapped) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu *im = idom->iommu; - u64 len = (u64)pgsize * pgcount; - u64 end = iova + len - 1; - unsigned int i, npg; - unsigned long flags; - u64 *ptep; - int ret = 0; - - if (!im || idom->sid < 0) - return -EINVAL; - - if (!len || end < iova || iova < domain->geometry.aperture_start || - end > domain->geometry.aperture_end) - return -EINVAL; - - npg = (len + DART_PAGE_MASK) >> DART_PAGE_SHIFT; - - if (iova < im->iova_offset) - return -EINVAL; - iova -= im->iova_offset; - - spin_lock_irqsave(&im->dart_lock, flags); - for (i = 0; i < npg; i++) { - ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 0, &flags); - if (!ptep) { - ret = -ENOMEM; - break; - } - *ptep = (paddr & DART_PTE_ADDR_MASK) | DART_PTE_STATE_VALID; - iova += DART_PAGE_SIZE; - paddr += DART_PAGE_SIZE; - if (mapped) - *mapped += DART_PAGE_SIZE; - } - spin_unlock_irqrestore(&im->dart_lock, flags); - - return ret; -} - -static phys_addr_t -apple_t8010_dart_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu *im = idom->iommu; - unsigned long flags; - u64 *ptep, result = 0; - - if (idom->sid < 0) - return 0; - - if (iova < im->iova_offset) - return 0; - iova -= im->iova_offset; - - spin_lock_irqsave(&im->dart_lock, flags); - ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); - if (ptep) - result = *ptep; - spin_unlock_irqrestore(&im->dart_lock, flags); - - if (result & DART_PTE_STATE_MASK) - result = (result & DART_PTE_ADDR_MASK) | - (iova & DART_PAGE_MASK); - return result; -} - -static size_t apple_t8010_dart_iommu_unmap_pages(struct iommu_domain *domain, - unsigned long iova, - size_t pgsize, - size_t pgcount, - struct iommu_iotlb_gather *gather) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - struct apple_t8010_dart_iommu *im = idom->iommu; - size_t size = pgsize * pgcount; - unsigned int i, npg = (size + DART_PAGE_MASK) >> DART_PAGE_SHIFT; - unsigned long flags; - u64 *ptep; - - if (idom->sid < 0) - return 0; - - if (iova < im->iova_offset) - return 0; - iova -= im->iova_offset; - - spin_lock_irqsave(&im->dart_lock, flags); - for (i = 0; i < npg; i++) { - ptep = apple_t8010_dart_get_pte(im, idom->sid, iova, 1, &flags); - if (ptep) - *ptep = 0; - iova += DART_PAGE_SIZE; - } - spin_unlock_irqrestore(&im->dart_lock, flags); - - return size; -} - -static void apple_t8010_dart_iommu_flush_iotlb_all(struct iommu_domain *domain) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - if (!idom->iommu) - return; - - if (idom->sid >= 0) - apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); -} - -static void apple_t8010_dart_iommu_iotlb_sync(struct iommu_domain *domain, - struct iommu_iotlb_gather *gather) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - if (!idom->iommu) - return; - - if (idom->sid >= 0) - apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); -} - -static int apple_t8010_dart_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, - size_t size) -{ - struct apple_t8010_dart_iommu_domain *idom = - to_apple_t8010_dart_iommu_domain(domain); - - if (!idom->iommu) - return 0; - - if (idom->sid >= 0) - apple_t8010_dart_tlb_flush(idom->iommu, 1u << idom->sid, 1); - return 0; -} - -static const struct iommu_ops apple_t8010_dart_iommu_ops; - -static struct iommu_device * -apple_t8010_dart_iommu_probe_device(struct device *dev) -{ - struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); - - if (!idd || !idd->iommu) - return ERR_PTR(-ENODEV); - - device_link_add(dev, idd->iommu->dev, - DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); - - return &idd->iommu->iommu; -} - -static void apple_t8010_dart_iommu_release_device(struct device *dev) -{ - struct apple_t8010_dart_iommu_devdata *idd = dev_iommu_priv_get(dev); - - dev_iommu_priv_set(dev, NULL); - kfree(idd); -} - -static struct iommu_group * -apple_t8010_dart_iommu_device_group(struct device *dev) -{ -#ifdef CONFIG_PCI - if (dev_is_pci(dev)) - return pci_device_group(dev); -#endif - - return generic_device_group(dev); -} - -static int apple_t8010_dart_iommu_of_xlate(struct device *dev, - const struct of_phandle_args *args) -{ - struct platform_device *iommu_dev; - struct apple_t8010_dart_iommu_devdata *data; - - data = kzalloc_obj(*data, GFP_KERNEL); - if (!data) - return -ENOMEM; - - iommu_dev = of_find_device_by_node(args->np); - if (!iommu_dev) { - kfree(data); - return -ENODEV; - } - - data->iommu = platform_get_drvdata(iommu_dev); - if (!data->iommu) { - platform_device_put(iommu_dev); - kfree(data); - return -ENODEV; - } - - data->sid = args->args[0]; - dev_iommu_priv_set(dev, data); - - platform_device_put(iommu_dev); - - return 0; -} - -static const struct iommu_ops apple_t8010_dart_iommu_ops = { - .capable = apple_t8010_dart_iommu_capable, - .of_xlate = apple_t8010_dart_iommu_of_xlate, - .domain_alloc_paging = apple_t8010_dart_iommu_domain_alloc_paging, - .probe_device = apple_t8010_dart_iommu_probe_device, - .release_device = apple_t8010_dart_iommu_release_device, - .device_group = apple_t8010_dart_iommu_device_group, - .owner = THIS_MODULE, - .default_domain_ops = - &(const struct iommu_domain_ops){ - .attach_dev = apple_t8010_dart_iommu_attach_device, - .map_pages = apple_t8010_dart_iommu_map_pages, - .unmap_pages = apple_t8010_dart_iommu_unmap_pages, - .iova_to_phys = apple_t8010_dart_iommu_iova_to_phys, - .flush_iotlb_all = - apple_t8010_dart_iommu_flush_iotlb_all, - .iotlb_sync = apple_t8010_dart_iommu_iotlb_sync, - .iotlb_sync_map = apple_t8010_dart_iommu_iotlb_sync_map, - .free = apple_t8010_dart_iommu_domain_free, - }, -}; - -static int apple_t8010_dart_iommu_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct device_node *node = dev->of_node; - struct apple_t8010_dart_iommu *im; - struct resource *r; - int ret = 0, irq; - - im = devm_kzalloc(dev, sizeof(struct apple_t8010_dart_iommu), - GFP_KERNEL); - if (!im) - return -ENOMEM; - - im->dev = &pdev->dev; - platform_set_drvdata(pdev, im); - - spin_lock_init(&im->dart_lock); - - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (ret) - return dev_err_probe(dev, ret, "failed to set DMA mask\n"); - - if (of_property_read_bool(pdev->dev.of_node, "pcie-dart")) { - im->is_pcie = 1; - im->iova_offset = 0x80000000ul; - } - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - im->base = devm_ioremap_resource(&pdev->dev, r); - if (IS_ERR(im->base)) - return PTR_ERR(im->base); - - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, apple_t8010_dart_iommu_irq, 0, - dev_name(&pdev->dev), im); - if (ret < 0) - return ret; - - ret = iommu_device_sysfs_add(&im->iommu, dev, NULL, node->name); - if (ret) - return ret; - - ret = iommu_device_register(&im->iommu, &apple_t8010_dart_iommu_ops, - dev); - if (ret) - goto err_sysfs_remove; - - return 0; - -err_sysfs_remove: - iommu_device_sysfs_remove(&im->iommu); - return ret; -} - -static void apple_t8010_dart_iommu_remove(struct platform_device *pdev) -{ - struct apple_t8010_dart_iommu *im = platform_get_drvdata(pdev); - - iommu_device_unregister(&im->iommu); - iommu_device_sysfs_remove(&im->iommu); -} - -static const struct of_device_id apple_t8010_dart_iommu_match[] = { - { .compatible = "apple,t8010-dart" }, - {}, -}; -MODULE_DEVICE_TABLE(of, apple_t8010_dart_iommu_match); - -static struct platform_driver apple_t8010_dart_iommu_driver = { - .probe = apple_t8010_dart_iommu_probe, - .remove = apple_t8010_dart_iommu_remove, - .driver = { - .name = "apple-t8010-dart", - .of_match_table = apple_t8010_dart_iommu_match, - }, -}; -module_platform_driver(apple_t8010_dart_iommu_driver); - -MODULE_DESCRIPTION("Apple T8010 legacy DART IOMMU driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/pci-apple-h9p.c b/drivers/nvme/host/pci-apple-h9p.c new file mode 100644 index 00000000000000..06cc12995f8312 --- /dev/null +++ b/drivers/nvme/host/pci-apple-h9p.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Apple H9P/T8010 PCI NVMe glue. + * + * This file is included by pci.c because the FlatDMA path needs access to + * nvme-pci's private request, queue and controller structs. + */ + +#define PCI_DEVICE_ID_APPLE_H9P_NVME 0x2002 +#define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) + +#define APPLE_H9P_REG_INIT 0x1800 +#define APPLE_H9P_REG_INIT_REGULAR 0 +#define APPLE_H9P_REG_SCRATCH_SIZE_REQ 0x1808 +#define APPLE_H9P_REG_SCRATCH_ALIGN_REQ 0x180c +#define APPLE_H9P_REG_SCRATCH_BASE_LO 0x1810 +#define APPLE_H9P_REG_SCRATCH_BASE_HI 0x1814 +#define APPLE_H9P_REG_SCRATCH_SIZE 0x1818 +#define APPLE_H9P_REG_CORE_MASK 0x1824 +#define APPLE_H9P_REG_LOG_SIZE 0x1828 +#define APPLE_H9P_REG_BOOT_STATE 0x1b18 +#define APPLE_H9P_REG_BOOT_STATE_MAGIC 0xbfbfbfbfu +#define APPLE_H9P_NVME_MAX_SECTORS \ + (APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE >> SECTOR_SHIFT) + +struct apple_h9p_nvme_req { + u64 pages[APPLE_H9P_NVMMU_MAX_PAGES]; + unsigned int npages; +}; + +struct apple_h9p_nvme { + dma_addr_t scratch_dma; + u32 scratch_size; + struct apple_h9p_nvme_req req[APPLE_H9P_NVMMU_MAX_REQS]; + DECLARE_BITMAP(used_req, APPLE_H9P_NVMMU_MAX_REQS); + unsigned int last_req; + /* Protects the FlatDMA request-slot bitmap. */ + spinlock_t req_lock; +}; + +static bool nvme_pci_is_apple_h9p(struct pci_dev *pdev) +{ + return pdev->vendor == PCI_VENDOR_ID_APPLE && + pdev->device == PCI_DEVICE_ID_APPLE_H9P_NVME; +} + +static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, + u32 scratch_size_req, + u32 scratch_align_req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct pci_host_bridge *bridge; + struct device_node *pcie_np; + struct device_node *mem_np; + struct resource res; + resource_size_t size; + u32 iova; + int ret; + + bridge = pci_find_host_bridge(pdev->bus); + if (!bridge) + return -ENODEV; + + pcie_np = bridge->dev.parent ? bridge->dev.parent->of_node : NULL; + if (!pcie_np) + pcie_np = bridge->dev.of_node; + if (!pcie_np) + return -ENODEV; + + mem_np = of_parse_phandle(pcie_np, "memory-region", 0); + if (!mem_np) + return -ENODEV; + + ret = of_address_to_resource(mem_np, 0, &res); + if (ret) + goto out_put_mem; + + ret = of_property_read_u32(pcie_np, "apple,nvmmu-iova", &iova); + if (ret) + goto out_put_mem; + + if (!scratch_size_req || scratch_size_req == U32_MAX) { + ret = -EINVAL; + goto out_put_mem; + } + if (!scratch_align_req || scratch_align_req == U32_MAX) + scratch_align_req = 1; + + size = resource_size(&res); + if (size < scratch_size_req || size > U32_MAX) { + ret = -ENOSPC; + goto out_put_mem; + } + if (!IS_ALIGNED(res.start, scratch_align_req) || + !IS_ALIGNED(iova, scratch_align_req)) { + ret = -EINVAL; + goto out_put_mem; + } + + h9p->scratch_dma = iova; + h9p->scratch_size = size; + dev_dbg(dev->dev, "Apple H9P NVMe scratch %#x@%pa as dma %#llx\n", + h9p->scratch_size, &res.start, (u64)h9p->scratch_dma); + +out_put_mem: + of_node_put(mem_np); + return ret; +} + +static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts, core_mask, log_size; + u32 scratch_size, scratch_align; + int ret; + + if (!dev->apple_h9p) + return 0; + + if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x17c, 0x10081008) != + PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x18c, 0) != PCIBIOS_SUCCESSFUL || + pci_write_config_dword(pdev, 0x188, 0x40550000) != + PCIBIOS_SUCCESSFUL) + return -EIO; + + if (readl(dev->bar + APPLE_H9P_REG_BOOT_STATE) == + APPLE_H9P_REG_BOOT_STATE_MAGIC) + dev_dbg(dev->dev, "Apple H9P NVMe boot-state magic present\n"); + + core_mask = readl(dev->bar + APPLE_H9P_REG_CORE_MASK); + log_size = readl(dev->bar + APPLE_H9P_REG_LOG_SIZE); + scratch_size = readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE_REQ); + scratch_align = readl(dev->bar + APPLE_H9P_REG_SCRATCH_ALIGN_REQ); + + writel(0, dev->bar + NVME_REG_CC); + ret = readl_poll_timeout(dev->bar + NVME_REG_CSTS, csts, + !(csts & (NVME_CSTS_RDY | NVME_CSTS_CFS)), + 1000, 2000000); + if (ret) + return ret; + + ret = nvme_pci_apple_h9p_find_scratch(dev, scratch_size, + scratch_align); + if (ret) + return ret; + + dev_dbg(dev->dev, + "Apple H9P NVMe core_mask=%#x log_size=%#x scratch_req=%#x align=%#x\n", + core_mask, log_size, scratch_size, scratch_align); + return 0; +} + +static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + + if (!h9p) + return 0; + if (!h9p->scratch_size) + return -EINVAL; + + writel(APPLE_H9P_REG_INIT_REGULAR, dev->bar + APPLE_H9P_REG_INIT); + writel(lower_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_LO); + writel(upper_32_bits(h9p->scratch_dma), + dev->bar + APPLE_H9P_REG_SCRATCH_BASE_HI); + writel(h9p->scratch_size, dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); + + return 0; +} + +static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req **req, + unsigned int *tag) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int idx; + + spin_lock_irqsave(&h9p->req_lock, flags); + idx = find_next_zero_bit(h9p->used_req, APPLE_H9P_NVMMU_MAX_REQS, + (h9p->last_req + 1) % + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) + idx = find_first_zero_bit(h9p->used_req, + APPLE_H9P_NVMMU_MAX_REQS); + if (idx >= APPLE_H9P_NVMMU_MAX_REQS) { + spin_unlock_irqrestore(&h9p->req_lock, flags); + dev_dbg_ratelimited(dev->dev, + "Apple H9P NVMe FlatDMA slots exhausted\n"); + return BLK_STS_RESOURCE; + } + + h9p->last_req = idx; + __set_bit(idx, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); + + *req = &h9p->req[idx]; + *tag = idx; + (*req)->npages = 0; + memset((*req)->pages, 0, sizeof((*req)->pages)); + return BLK_STS_OK; +} + +static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, + struct apple_h9p_nvme_req *req) +{ + struct apple_h9p_nvme *h9p = dev->apple_h9p; + unsigned long flags; + unsigned int tag; + + if (!h9p || !req) + return; + + tag = req - h9p->req; + if (tag >= APPLE_H9P_NVMMU_MAX_REQS) + return; + + apple_h9p_pcie_map_nvmmu(dev->dev, tag, NULL, 0, NULL); + req->npages = 0; + + spin_lock_irqsave(&h9p->req_lock, flags); + __clear_bit(tag, h9p->used_req); + spin_unlock_irqrestore(&h9p->req_lock, flags); +} + +static bool nvme_pci_apple_h9p_unmap_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + unsigned int i; + + if (!iod->apple_h9p_req) + return false; + + for (i = 0; i < iod->nr_dma_vecs; i++) + dma_unmap_page(dev->dev, iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, rq_dma_dir(req)); + if (iod->dma_vecs) { + mempool_free(iod->dma_vecs, dev->dmavec_mempool); + iod->dma_vecs = NULL; + } + iod->nr_dma_vecs = 0; + + nvme_pci_apple_h9p_free_req(dev, iod->apple_h9p_req); + iod->apple_h9p_req = NULL; + iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; + return true; +} + +static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + struct apple_h9p_nvme_req *hreq; + struct req_iterator iter; + struct bio_vec bv; + dma_addr_t flatdma; + u64 phys, offs = 0; + unsigned int tag, npages = 0, consumed = 0; + unsigned int total = blk_rq_payload_bytes(req); + blk_status_t status; + int ret; + + if (!dev->apple_h9p) + return BLK_STS_NOTSUPP; + if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) + return BLK_STS_IOERR; + + status = nvme_pci_apple_h9p_alloc_req(dev, &hreq, &tag); + if (status) + return status; + + iod->apple_h9p_req = hreq; + iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); + if (!iod->dma_vecs) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + rq_for_each_bvec(bv, req, iter) { + dma_addr_t dma_addr; + unsigned int len = bv.bv_len; + + if (WARN_ON_ONCE(iod->nr_dma_vecs >= + blk_rq_nr_phys_segments(req))) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + dma_addr = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, dma_addr)) { + status = BLK_STS_RESOURCE; + goto out_unmap; + } + + iod->dma_vecs[iod->nr_dma_vecs].addr = dma_addr; + iod->dma_vecs[iod->nr_dma_vecs].len = len; + iod->nr_dma_vecs++; + + phys = page_to_phys(bv.bv_page) + bv.bv_offset; + if (!consumed) { + offs = phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1); + phys -= offs; + len += offs; + } else if (phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1)) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment is not page-aligned: phys=%#llx\n", + phys); + status = BLK_STS_IOERR; + goto out_unmap; + } + + if (consumed + bv.bv_len != total && + (len & (APPLE_H9P_NVMMU_PAGE_SIZE - 1))) { + dev_err_ratelimited(dev->dev, + "Apple H9P FlatDMA segment length is not page-aligned: len=%#x\n", + len); + status = BLK_STS_IOERR; + goto out_unmap; + } + + while (len) { + if (npages >= APPLE_H9P_NVMMU_MAX_PAGES) { + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->pages[npages++] = phys; + phys += APPLE_H9P_NVMMU_PAGE_SIZE; + len = len > APPLE_H9P_NVMMU_PAGE_SIZE ? + len - APPLE_H9P_NVMMU_PAGE_SIZE : 0; + } + + consumed += bv.bv_len; + } + + ret = apple_h9p_pcie_map_nvmmu(dev->dev, tag, hreq->pages, npages, + &flatdma); + if (ret) { + status = errno_to_blk_status(ret); + if (status == BLK_STS_NOTSUPP) + status = BLK_STS_IOERR; + goto out_unmap; + } + + hreq->npages = npages; + iod->total_len = total; + iod->cmd.common.flags |= NVME_CMD_APPLE_H9P_FLATDMA; + iod->cmd.common.dptr.prp1 = cpu_to_le64(flatdma + offs); + iod->cmd.common.dptr.prp2 = 0; + return BLK_STS_OK; + +out_unmap: + nvme_pci_apple_h9p_unmap_data(req); + return status; +} diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2c443efa450c28..3e03b898fbbc20 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -48,22 +48,6 @@ #define NVME_MAX_BYTES SZ_8M #define NVME_MAX_NR_DESCRIPTORS 5 -#define NVME_CMD_APPLE_H9P_FLATDMA BIT(5) - -#define APPLE_H9P_REG_INIT 0x1800 -#define APPLE_H9P_REG_INIT_REGULAR 0 -#define APPLE_H9P_REG_SCRATCH_SIZE_REQ 0x1808 -#define APPLE_H9P_REG_SCRATCH_ALIGN_REQ 0x180c -#define APPLE_H9P_REG_SCRATCH_BASE_LO 0x1810 -#define APPLE_H9P_REG_SCRATCH_BASE_HI 0x1814 -#define APPLE_H9P_REG_SCRATCH_SIZE 0x1818 -#define APPLE_H9P_REG_CORE_MASK 0x1824 -#define APPLE_H9P_REG_LOG_SIZE 0x1828 -#define APPLE_H9P_REG_BOOT_STATE 0x1b18 -#define APPLE_H9P_REG_BOOT_STATE_MAGIC 0xbfbfbfbfu -#define APPLE_H9P_NVME_MAX_SECTORS \ - (APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE >> SECTOR_SHIFT) - /* * For data SGLs we support a single descriptors worth of SGL entries. * For PRPs, segments don't matter at all. @@ -910,344 +894,7 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge, le32_to_cpu(sg_list[i].length), dir, attrs); } -struct apple_h9p_nvme_req { - u64 pages[APPLE_H9P_NVMMU_MAX_PAGES]; - unsigned int npages; -}; - -struct apple_h9p_nvme { - dma_addr_t scratch_dma; - u32 scratch_size; - struct apple_h9p_nvme_req req[APPLE_H9P_NVMMU_MAX_REQS]; - DECLARE_BITMAP(used_req, APPLE_H9P_NVMMU_MAX_REQS); - unsigned int last_req; - /* Protects the FlatDMA request-slot bitmap. */ - spinlock_t req_lock; -}; - -static bool nvme_pci_is_apple_h9p(struct pci_dev *pdev) -{ - return pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2002; -} - -static int nvme_pci_apple_h9p_find_scratch(struct nvme_dev *dev, - u32 scratch_size_req, - u32 scratch_align_req) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - struct pci_dev *pdev = to_pci_dev(dev->dev); - struct pci_host_bridge *bridge; - struct device_node *pcie_np; - struct device_node *mem_np; - struct resource res; - resource_size_t size; - u32 iova; - int ret; - - bridge = pci_find_host_bridge(pdev->bus); - if (!bridge) - return -ENODEV; - - pcie_np = bridge->dev.parent ? bridge->dev.parent->of_node : NULL; - if (!pcie_np) - pcie_np = bridge->dev.of_node; - if (!pcie_np) - return -ENODEV; - - mem_np = of_parse_phandle(pcie_np, "memory-region", 0); - if (!mem_np) - return -ENODEV; - - ret = of_address_to_resource(mem_np, 0, &res); - if (ret) - goto out_put_mem; - - ret = of_property_read_u32(pcie_np, "apple,nvmmu-iova", &iova); - if (ret) - goto out_put_mem; - - if (!scratch_size_req || scratch_size_req == U32_MAX) { - ret = -EINVAL; - goto out_put_mem; - } - if (!scratch_align_req || scratch_align_req == U32_MAX) - scratch_align_req = 1; - - size = resource_size(&res); - if (size < scratch_size_req || size > U32_MAX) { - ret = -ENOSPC; - goto out_put_mem; - } - if (!IS_ALIGNED(res.start, scratch_align_req) || - !IS_ALIGNED(iova, scratch_align_req)) { - ret = -EINVAL; - goto out_put_mem; - } - - h9p->scratch_dma = iova; - h9p->scratch_size = size; - dev_dbg(dev->dev, "Apple H9P NVMe scratch %#x@%pa as dma %#llx\n", - h9p->scratch_size, &res.start, (u64)h9p->scratch_dma); - -out_put_mem: - of_node_put(mem_np); - return ret; -} - -static int nvme_pci_apple_h9p_preinit(struct nvme_dev *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev->dev); - u32 csts, core_mask, log_size; - u32 scratch_size, scratch_align; - int ret; - - if (!dev->apple_h9p) - return 0; - - if (pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40) != - PCIBIOS_SUCCESSFUL || - pci_write_config_dword(pdev, 0x17c, 0x10081008) != - PCIBIOS_SUCCESSFUL || - pci_write_config_dword(pdev, 0x18c, 0) != PCIBIOS_SUCCESSFUL || - pci_write_config_dword(pdev, 0x188, 0x40550000) != - PCIBIOS_SUCCESSFUL) - return -EIO; - - if (readl(dev->bar + APPLE_H9P_REG_BOOT_STATE) == - APPLE_H9P_REG_BOOT_STATE_MAGIC) - dev_dbg(dev->dev, "Apple H9P NVMe boot-state magic present\n"); - - core_mask = readl(dev->bar + APPLE_H9P_REG_CORE_MASK); - log_size = readl(dev->bar + APPLE_H9P_REG_LOG_SIZE); - scratch_size = readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE_REQ); - scratch_align = readl(dev->bar + APPLE_H9P_REG_SCRATCH_ALIGN_REQ); - - writel(0, dev->bar + NVME_REG_CC); - ret = readl_poll_timeout(dev->bar + NVME_REG_CSTS, csts, - !(csts & (NVME_CSTS_RDY | NVME_CSTS_CFS)), - 1000, 2000000); - if (ret) - return ret; - - ret = nvme_pci_apple_h9p_find_scratch(dev, scratch_size, - scratch_align); - if (ret) - return ret; - - dev_dbg(dev->dev, - "Apple H9P NVMe core_mask=%#x log_size=%#x scratch_req=%#x align=%#x\n", - core_mask, log_size, scratch_size, scratch_align); - return 0; -} - -static int nvme_pci_apple_h9p_prepare_enable(struct nvme_dev *dev) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - - if (!h9p) - return 0; - if (!h9p->scratch_size) - return -EINVAL; - - writel(APPLE_H9P_REG_INIT_REGULAR, dev->bar + APPLE_H9P_REG_INIT); - writel(lower_32_bits(h9p->scratch_dma), - dev->bar + APPLE_H9P_REG_SCRATCH_BASE_LO); - writel(upper_32_bits(h9p->scratch_dma), - dev->bar + APPLE_H9P_REG_SCRATCH_BASE_HI); - writel(h9p->scratch_size, dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); - readl(dev->bar + APPLE_H9P_REG_SCRATCH_SIZE); - - return 0; -} - -static blk_status_t nvme_pci_apple_h9p_alloc_req(struct nvme_dev *dev, - struct apple_h9p_nvme_req **req, - unsigned int *tag) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - unsigned long flags; - unsigned int idx; - - spin_lock_irqsave(&h9p->req_lock, flags); - idx = find_next_zero_bit(h9p->used_req, APPLE_H9P_NVMMU_MAX_REQS, - (h9p->last_req + 1) % - APPLE_H9P_NVMMU_MAX_REQS); - if (idx >= APPLE_H9P_NVMMU_MAX_REQS) - idx = find_first_zero_bit(h9p->used_req, - APPLE_H9P_NVMMU_MAX_REQS); - if (idx >= APPLE_H9P_NVMMU_MAX_REQS) { - spin_unlock_irqrestore(&h9p->req_lock, flags); - dev_dbg_ratelimited(dev->dev, - "Apple H9P NVMe FlatDMA slots exhausted\n"); - return BLK_STS_RESOURCE; - } - - h9p->last_req = idx; - __set_bit(idx, h9p->used_req); - spin_unlock_irqrestore(&h9p->req_lock, flags); - - *req = &h9p->req[idx]; - *tag = idx; - (*req)->npages = 0; - memset((*req)->pages, 0, sizeof((*req)->pages)); - return BLK_STS_OK; -} - -static void nvme_pci_apple_h9p_free_req(struct nvme_dev *dev, - struct apple_h9p_nvme_req *req) -{ - struct apple_h9p_nvme *h9p = dev->apple_h9p; - unsigned long flags; - unsigned int tag; - - if (!h9p || !req) - return; - - tag = req - h9p->req; - if (tag >= APPLE_H9P_NVMMU_MAX_REQS) - return; - - apple_h9p_pcie_map_nvmmu(dev->dev, tag, NULL, 0, NULL); - req->npages = 0; - - spin_lock_irqsave(&h9p->req_lock, flags); - __clear_bit(tag, h9p->used_req); - spin_unlock_irqrestore(&h9p->req_lock, flags); -} - -static bool nvme_pci_apple_h9p_unmap_data(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - struct nvme_dev *dev = nvmeq->dev; - unsigned int i; - - if (!iod->apple_h9p_req) - return false; - - for (i = 0; i < iod->nr_dma_vecs; i++) - dma_unmap_page(dev->dev, iod->dma_vecs[i].addr, - iod->dma_vecs[i].len, rq_dma_dir(req)); - if (iod->dma_vecs) { - mempool_free(iod->dma_vecs, dev->dmavec_mempool); - iod->dma_vecs = NULL; - } - iod->nr_dma_vecs = 0; - - nvme_pci_apple_h9p_free_req(dev, iod->apple_h9p_req); - iod->apple_h9p_req = NULL; - iod->cmd.common.flags &= ~NVME_CMD_APPLE_H9P_FLATDMA; - return true; -} - -static blk_status_t nvme_pci_apple_h9p_map_data(struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = req->mq_hctx->driver_data; - struct nvme_dev *dev = nvmeq->dev; - struct apple_h9p_nvme_req *hreq; - struct req_iterator iter; - struct bio_vec bv; - dma_addr_t flatdma; - u64 phys, offs = 0; - unsigned int tag, npages = 0, consumed = 0; - unsigned int total = blk_rq_payload_bytes(req); - blk_status_t status; - int ret; - - if (!dev->apple_h9p) - return BLK_STS_NOTSUPP; - if (total > APPLE_H9P_NVMMU_MAX_PAGES * APPLE_H9P_NVMMU_PAGE_SIZE) - return BLK_STS_IOERR; - - status = nvme_pci_apple_h9p_alloc_req(dev, &hreq, &tag); - if (status) - return status; - - iod->apple_h9p_req = hreq; - iod->dma_vecs = mempool_alloc(dev->dmavec_mempool, GFP_ATOMIC); - if (!iod->dma_vecs) { - status = BLK_STS_RESOURCE; - goto out_unmap; - } - - rq_for_each_bvec(bv, req, iter) { - dma_addr_t dma_addr; - unsigned int len = bv.bv_len; - - if (WARN_ON_ONCE(iod->nr_dma_vecs >= - blk_rq_nr_phys_segments(req))) { - status = BLK_STS_IOERR; - goto out_unmap; - } - - dma_addr = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0); - if (dma_mapping_error(dev->dev, dma_addr)) { - status = BLK_STS_RESOURCE; - goto out_unmap; - } - - iod->dma_vecs[iod->nr_dma_vecs].addr = dma_addr; - iod->dma_vecs[iod->nr_dma_vecs].len = len; - iod->nr_dma_vecs++; - - phys = page_to_phys(bv.bv_page) + bv.bv_offset; - if (!consumed) { - offs = phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1); - phys -= offs; - len += offs; - } else if (phys & (APPLE_H9P_NVMMU_PAGE_SIZE - 1)) { - dev_err_ratelimited(dev->dev, - "Apple H9P FlatDMA segment is not page-aligned: phys=%#llx\n", - phys); - status = BLK_STS_IOERR; - goto out_unmap; - } - - if (consumed + bv.bv_len != total && - (len & (APPLE_H9P_NVMMU_PAGE_SIZE - 1))) { - dev_err_ratelimited(dev->dev, - "Apple H9P FlatDMA segment length is not page-aligned: len=%#x\n", - len); - status = BLK_STS_IOERR; - goto out_unmap; - } - - while (len) { - if (npages >= APPLE_H9P_NVMMU_MAX_PAGES) { - status = BLK_STS_IOERR; - goto out_unmap; - } - - hreq->pages[npages++] = phys; - phys += APPLE_H9P_NVMMU_PAGE_SIZE; - len = len > APPLE_H9P_NVMMU_PAGE_SIZE ? - len - APPLE_H9P_NVMMU_PAGE_SIZE : 0; - } - - consumed += bv.bv_len; - } - - ret = apple_h9p_pcie_map_nvmmu(dev->dev, tag, hreq->pages, npages, - &flatdma); - if (ret) { - status = errno_to_blk_status(ret); - if (status == BLK_STS_NOTSUPP) - status = BLK_STS_IOERR; - goto out_unmap; - } - - hreq->npages = npages; - iod->total_len = total; - iod->cmd.common.flags |= NVME_CMD_APPLE_H9P_FLATDMA; - iod->cmd.common.dptr.prp1 = cpu_to_le64(flatdma + offs); - iod->cmd.common.dptr.prp2 = 0; - return BLK_STS_OK; - -out_unmap: - nvme_pci_apple_h9p_unmap_data(req); - return status; -} +#include "pci-apple-h9p.c" static void nvme_unmap_metadata(struct request *req) { @@ -4690,7 +4337,7 @@ static const struct pci_device_id nvme_id_table[] = { */ .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_QDEPTH_ONE }, - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2002), + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_H9P_NVME), .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_SHARED_TAGS }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },