drivers/iommu/arm-smmu-nvidia.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2019-2020 NVIDIA CORPORATION.  All rights reserved.

#include <linux/bitfield.h>
#include <linux/delay.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/slab.h>

#include "arm-smmu.h"

/*
 * Tegra194 has three ARM MMU-500 Instances.
 * Two of them are used together and must be programmed identically for
 * interleaved IOVA accesses across them and translates accesses from
 * non-isochronous HW devices.
 * Third one is used for translating accesses from isochronous HW devices.
 * This implementation supports programming of the two instances that must
 * be programmed identically.
 * The third instance usage is through standard arm-smmu driver itself and
 * is out of scope of this implementation.
 */
#define NUM_SMMU_INSTANCES 2

struct nvidia_smmu {
	struct arm_smmu_device	smmu;
	void __iomem		*bases[NUM_SMMU_INSTANCES];
};

static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu,
					     unsigned int inst, int page)
{
	struct nvidia_smmu *nvidia_smmu;

	nvidia_smmu = container_of(smmu, struct nvidia_smmu, smmu);
	return nvidia_smmu->bases[inst] + (page << smmu->pgshift);
}

static u32 nvidia_smmu_read_reg(struct arm_smmu_device *smmu,
				int page, int offset)
{
	void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;

	return readl_relaxed(reg);
}

static void nvidia_smmu_write_reg(struct arm_smmu_device *smmu,
				  int page, int offset, u32 val)
{
	unsigned int i;

	for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
		void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;

		writel_relaxed(val, reg);
	}
}

static u64 nvidia_smmu_read_reg64(struct arm_smmu_device *smmu,
				  int page, int offset)
{
	void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;

	return readq_relaxed(reg);
}

static void nvidia_smmu_write_reg64(struct arm_smmu_device *smmu,
				    int page, int offset, u64 val)
{
	unsigned int i;

	for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
		void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;

		writeq_relaxed(val, reg);
	}
}

static void nvidia_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
				 int sync, int status)
{
	unsigned int delay;

	arm_smmu_writel(smmu, page, sync, 0);

	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
		unsigned int spin_cnt;

		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
			u32 val = 0;
			unsigned int i;

			for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
				void __iomem *reg;

				reg = nvidia_smmu_page(smmu, i, page) + status;
				val |= readl_relaxed(reg);
			}

			if (!(val & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
				return;

			cpu_relax();
		}

		udelay(delay);
	}

	dev_err_ratelimited(smmu->dev,
			    "TLB sync timed out -- SMMU may be deadlocked\n");
}

static int nvidia_smmu_reset(struct arm_smmu_device *smmu)
{
	unsigned int i;

	for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
		u32 val;
		void __iomem *reg = nvidia_smmu_page(smmu, i, ARM_SMMU_GR0) +
				    ARM_SMMU_GR0_sGFSR;

		/* clear global FSR */
		val = readl_relaxed(reg);
		writel_relaxed(val, reg);
	}

	return 0;
}

static const struct arm_smmu_impl nvidia_smmu_impl = {
	.read_reg = nvidia_smmu_read_reg,
	.write_reg = nvidia_smmu_write_reg,
	.read_reg64 = nvidia_smmu_read_reg64,
	.write_reg64 = nvidia_smmu_write_reg64,
	.reset = nvidia_smmu_reset,
	.tlb_sync = nvidia_smmu_tlb_sync,
};

struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
{
	struct resource *res;
	struct device *dev = smmu->dev;
	struct nvidia_smmu *nvidia_smmu;
	struct platform_device *pdev = to_platform_device(dev);

	nvidia_smmu = devm_kzalloc(dev, sizeof(*nvidia_smmu), GFP_KERNEL);
	if (!nvidia_smmu)
		return ERR_PTR(-ENOMEM);

	/*
	 * Copy the data from struct arm_smmu_device *smmu allocated in
	 * arm-smmu.c. The smmu from struct nvidia_smmu replaces the smmu
	 * pointer used in arm-smmu.c once this function returns.
	 * This is necessary to derive nvidia_smmu from smmu pointer passed
	 * through arm_smmu_impl function calls subsequently.
	 */
	nvidia_smmu->smmu = *smmu;
	/* Instance 0 is ioremapped by arm-smmu.c. */
	nvidia_smmu->bases[0] = smmu->base;

	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
	if (!res)
		return ERR_PTR(-ENODEV);

	nvidia_smmu->bases[1] = devm_ioremap_resource(dev, res);
	if (IS_ERR(nvidia_smmu->bases[1]))
		return ERR_CAST(nvidia_smmu->bases[1]);

	nvidia_smmu->smmu.impl = &nvidia_smmu_impl;

	/*
	 * Free the struct arm_smmu_device *smmu allocated in arm-smmu.c.
	 * Once this function returns, arm-smmu.c would use arm_smmu_device
	 * allocated as part of struct nvidia_smmu.
	 */
	devm_kfree(dev, smmu);

	return &nvidia_smmu->smmu;
}