coherency.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /*
  2. * Coherency fabric (Aurora) support for Armada 370 and XP platforms.
  3. *
  4. * Copyright (C) 2012 Marvell
  5. *
  6. * Yehuda Yitschak <yehuday@marvell.com>
  7. * Gregory Clement <gregory.clement@free-electrons.com>
  8. * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
  9. *
  10. * This file is licensed under the terms of the GNU General Public
  11. * License version 2. This program is licensed "as is" without any
  12. * warranty of any kind, whether express or implied.
  13. *
  14. * The Armada 370 and Armada XP SOCs have a coherency fabric which is
  15. * responsible for ensuring hardware coherency between all CPUs and between
  16. * CPUs and I/O masters. This file initializes the coherency fabric and
  17. * supplies basic routines for configuring and controlling hardware coherency
  18. */
  19. #define pr_fmt(fmt) "mvebu-coherency: " fmt
  20. #include <linux/kernel.h>
  21. #include <linux/init.h>
  22. #include <linux/of_address.h>
  23. #include <linux/io.h>
  24. #include <linux/smp.h>
  25. #include <linux/dma-mapping.h>
  26. #include <linux/platform_device.h>
  27. #include <linux/slab.h>
  28. #include <linux/mbus.h>
  29. #include <linux/clk.h>
  30. #include <linux/pci.h>
  31. #include <asm/smp_plat.h>
  32. #include <asm/cacheflush.h>
  33. #include <asm/mach/map.h>
  34. #include "armada-370-xp.h"
  35. #include "coherency.h"
  36. #include "mvebu-soc-id.h"
  37. unsigned long coherency_phys_base;
  38. void __iomem *coherency_base;
  39. static void __iomem *coherency_cpu_base;
  40. /* Coherency fabric registers */
  41. #define COHERENCY_FABRIC_CFG_OFFSET 0x4
  42. #define IO_SYNC_BARRIER_CTL_OFFSET 0x0
  43. enum {
  44. COHERENCY_FABRIC_TYPE_NONE,
  45. COHERENCY_FABRIC_TYPE_ARMADA_370_XP,
  46. COHERENCY_FABRIC_TYPE_ARMADA_375,
  47. COHERENCY_FABRIC_TYPE_ARMADA_380,
  48. };
  49. static struct of_device_id of_coherency_table[] = {
  50. {.compatible = "marvell,coherency-fabric",
  51. .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP },
  52. {.compatible = "marvell,armada-375-coherency-fabric",
  53. .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 },
  54. {.compatible = "marvell,armada-380-coherency-fabric",
  55. .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 },
  56. { /* end of list */ },
  57. };
  58. /* Functions defined in coherency_ll.S */
  59. int ll_enable_coherency(void);
  60. void ll_add_cpu_to_smp_group(void);
  61. int set_cpu_coherent(void)
  62. {
  63. if (!coherency_base) {
  64. pr_warn("Can't make current CPU cache coherent.\n");
  65. pr_warn("Coherency fabric is not initialized\n");
  66. return 1;
  67. }
  68. ll_add_cpu_to_smp_group();
  69. return ll_enable_coherency();
  70. }
  71. /*
  72. * The below code implements the I/O coherency workaround on Armada
  73. * 375. This workaround consists in using the two channels of the
  74. * first XOR engine to trigger a XOR transaction that serves as the
  75. * I/O coherency barrier.
  76. */
  77. static void __iomem *xor_base, *xor_high_base;
  78. static dma_addr_t coherency_wa_buf_phys[CONFIG_NR_CPUS];
  79. static void *coherency_wa_buf[CONFIG_NR_CPUS];
  80. static bool coherency_wa_enabled;
  81. #define XOR_CONFIG(chan) (0x10 + (chan * 4))
  82. #define XOR_ACTIVATION(chan) (0x20 + (chan * 4))
  83. #define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
  84. #define WINDOW_BASE(w) (0x250 + ((w) << 2))
  85. #define WINDOW_SIZE(w) (0x270 + ((w) << 2))
  86. #define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
  87. #define WINDOW_OVERRIDE_CTRL(chan) (0x2A0 + ((chan) << 2))
  88. #define XOR_DEST_POINTER(chan) (0x2B0 + (chan * 4))
  89. #define XOR_BLOCK_SIZE(chan) (0x2C0 + (chan * 4))
  90. #define XOR_INIT_VALUE_LOW 0x2E0
  91. #define XOR_INIT_VALUE_HIGH 0x2E4
  92. static inline void mvebu_hwcc_armada375_sync_io_barrier_wa(void)
  93. {
  94. int idx = smp_processor_id();
  95. /* Write '1' to the first word of the buffer */
  96. writel(0x1, coherency_wa_buf[idx]);
  97. /* Wait until the engine is idle */
  98. while ((readl(xor_base + XOR_ACTIVATION(idx)) >> 4) & 0x3)
  99. ;
  100. dmb();
  101. /* Trigger channel */
  102. writel(0x1, xor_base + XOR_ACTIVATION(idx));
  103. /* Poll the data until it is cleared by the XOR transaction */
  104. while (readl(coherency_wa_buf[idx]))
  105. ;
  106. }
  107. static void __init armada_375_coherency_init_wa(void)
  108. {
  109. const struct mbus_dram_target_info *dram;
  110. struct device_node *xor_node;
  111. struct property *xor_status;
  112. struct clk *xor_clk;
  113. u32 win_enable = 0;
  114. int i;
  115. pr_warn("enabling coherency workaround for Armada 375 Z1, one XOR engine disabled\n");
  116. /*
  117. * Since the workaround uses one XOR engine, we grab a
  118. * reference to its Device Tree node first.
  119. */
  120. xor_node = of_find_compatible_node(NULL, NULL, "marvell,orion-xor");
  121. BUG_ON(!xor_node);
  122. /*
  123. * Then we mark it as disabled so that the real XOR driver
  124. * will not use it.
  125. */
  126. xor_status = kzalloc(sizeof(struct property), GFP_KERNEL);
  127. BUG_ON(!xor_status);
  128. xor_status->value = kstrdup("disabled", GFP_KERNEL);
  129. BUG_ON(!xor_status->value);
  130. xor_status->length = 8;
  131. xor_status->name = kstrdup("status", GFP_KERNEL);
  132. BUG_ON(!xor_status->name);
  133. of_update_property(xor_node, xor_status);
  134. /*
  135. * And we remap the registers, get the clock, and do the
  136. * initial configuration of the XOR engine.
  137. */
  138. xor_base = of_iomap(xor_node, 0);
  139. xor_high_base = of_iomap(xor_node, 1);
  140. xor_clk = of_clk_get_by_name(xor_node, NULL);
  141. BUG_ON(!xor_clk);
  142. clk_prepare_enable(xor_clk);
  143. dram = mv_mbus_dram_info();
  144. for (i = 0; i < 8; i++) {
  145. writel(0, xor_base + WINDOW_BASE(i));
  146. writel(0, xor_base + WINDOW_SIZE(i));
  147. if (i < 4)
  148. writel(0, xor_base + WINDOW_REMAP_HIGH(i));
  149. }
  150. for (i = 0; i < dram->num_cs; i++) {
  151. const struct mbus_dram_window *cs = dram->cs + i;
  152. writel((cs->base & 0xffff0000) |
  153. (cs->mbus_attr << 8) |
  154. dram->mbus_dram_target_id, xor_base + WINDOW_BASE(i));
  155. writel((cs->size - 1) & 0xffff0000, xor_base + WINDOW_SIZE(i));
  156. win_enable |= (1 << i);
  157. win_enable |= 3 << (16 + (2 * i));
  158. }
  159. writel(win_enable, xor_base + WINDOW_BAR_ENABLE(0));
  160. writel(win_enable, xor_base + WINDOW_BAR_ENABLE(1));
  161. writel(0, xor_base + WINDOW_OVERRIDE_CTRL(0));
  162. writel(0, xor_base + WINDOW_OVERRIDE_CTRL(1));
  163. for (i = 0; i < CONFIG_NR_CPUS; i++) {
  164. coherency_wa_buf[i] = kzalloc(PAGE_SIZE, GFP_KERNEL);
  165. BUG_ON(!coherency_wa_buf[i]);
  166. /*
  167. * We can't use the DMA mapping API, since we don't
  168. * have a valid 'struct device' pointer
  169. */
  170. coherency_wa_buf_phys[i] =
  171. virt_to_phys(coherency_wa_buf[i]);
  172. BUG_ON(!coherency_wa_buf_phys[i]);
  173. /*
  174. * Configure the XOR engine for memset operation, with
  175. * a 128 bytes block size
  176. */
  177. writel(0x444, xor_base + XOR_CONFIG(i));
  178. writel(128, xor_base + XOR_BLOCK_SIZE(i));
  179. writel(coherency_wa_buf_phys[i],
  180. xor_base + XOR_DEST_POINTER(i));
  181. }
  182. writel(0x0, xor_base + XOR_INIT_VALUE_LOW);
  183. writel(0x0, xor_base + XOR_INIT_VALUE_HIGH);
  184. coherency_wa_enabled = true;
  185. }
  186. static inline void mvebu_hwcc_sync_io_barrier(void)
  187. {
  188. if (coherency_wa_enabled) {
  189. mvebu_hwcc_armada375_sync_io_barrier_wa();
  190. return;
  191. }
  192. writel(0x1, coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET);
  193. while (readl(coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET) & 0x1);
  194. }
  195. static dma_addr_t mvebu_hwcc_dma_map_page(struct device *dev, struct page *page,
  196. unsigned long offset, size_t size,
  197. enum dma_data_direction dir,
  198. struct dma_attrs *attrs)
  199. {
  200. if (dir != DMA_TO_DEVICE)
  201. mvebu_hwcc_sync_io_barrier();
  202. return pfn_to_dma(dev, page_to_pfn(page)) + offset;
  203. }
  204. static void mvebu_hwcc_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
  205. size_t size, enum dma_data_direction dir,
  206. struct dma_attrs *attrs)
  207. {
  208. if (dir != DMA_TO_DEVICE)
  209. mvebu_hwcc_sync_io_barrier();
  210. }
  211. static void mvebu_hwcc_dma_sync(struct device *dev, dma_addr_t dma_handle,
  212. size_t size, enum dma_data_direction dir)
  213. {
  214. if (dir != DMA_TO_DEVICE)
  215. mvebu_hwcc_sync_io_barrier();
  216. }
  217. static struct dma_map_ops mvebu_hwcc_dma_ops = {
  218. .alloc = arm_dma_alloc,
  219. .free = arm_dma_free,
  220. .mmap = arm_dma_mmap,
  221. .map_page = mvebu_hwcc_dma_map_page,
  222. .unmap_page = mvebu_hwcc_dma_unmap_page,
  223. .get_sgtable = arm_dma_get_sgtable,
  224. .map_sg = arm_dma_map_sg,
  225. .unmap_sg = arm_dma_unmap_sg,
  226. .sync_single_for_cpu = mvebu_hwcc_dma_sync,
  227. .sync_single_for_device = mvebu_hwcc_dma_sync,
  228. .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
  229. .sync_sg_for_device = arm_dma_sync_sg_for_device,
  230. .set_dma_mask = arm_dma_set_mask,
  231. };
  232. static int mvebu_hwcc_notifier(struct notifier_block *nb,
  233. unsigned long event, void *__dev)
  234. {
  235. struct device *dev = __dev;
  236. if (event != BUS_NOTIFY_ADD_DEVICE)
  237. return NOTIFY_DONE;
  238. set_dma_ops(dev, &mvebu_hwcc_dma_ops);
  239. return NOTIFY_OK;
  240. }
  241. static struct notifier_block mvebu_hwcc_nb = {
  242. .notifier_call = mvebu_hwcc_notifier,
  243. };
  244. static struct notifier_block mvebu_hwcc_pci_nb = {
  245. .notifier_call = mvebu_hwcc_notifier,
  246. };
  247. static void __init armada_370_coherency_init(struct device_node *np)
  248. {
  249. struct resource res;
  250. of_address_to_resource(np, 0, &res);
  251. coherency_phys_base = res.start;
  252. /*
  253. * Ensure secondary CPUs will see the updated value,
  254. * which they read before they join the coherency
  255. * fabric, and therefore before they are coherent with
  256. * the boot CPU cache.
  257. */
  258. sync_cache_w(&coherency_phys_base);
  259. coherency_base = of_iomap(np, 0);
  260. coherency_cpu_base = of_iomap(np, 1);
  261. set_cpu_coherent();
  262. }
  263. /*
  264. * This ioremap hook is used on Armada 375/38x to ensure that PCIe
  265. * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
  266. * is needed as a workaround for a deadlock issue between the PCIe
  267. * interface and the cache controller.
  268. */
  269. static void __iomem *
  270. armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
  271. unsigned int mtype, void *caller)
  272. {
  273. struct resource pcie_mem;
  274. mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
  275. if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
  276. mtype = MT_UNCACHED;
  277. return __arm_ioremap_caller(phys_addr, size, mtype, caller);
  278. }
  279. static void __init armada_375_380_coherency_init(struct device_node *np)
  280. {
  281. struct device_node *cache_dn;
  282. coherency_cpu_base = of_iomap(np, 0);
  283. arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
  284. /*
  285. * We should switch the PL310 to I/O coherency mode only if
  286. * I/O coherency is actually enabled.
  287. */
  288. if (!coherency_available())
  289. return;
  290. /*
  291. * Add the PL310 property "arm,io-coherent". This makes sure the
  292. * outer sync operation is not used, which allows to
  293. * workaround the system erratum that causes deadlocks when
  294. * doing PCIe in an SMP situation on Armada 375 and Armada
  295. * 38x.
  296. */
  297. for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
  298. struct property *p;
  299. p = kzalloc(sizeof(*p), GFP_KERNEL);
  300. p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
  301. of_add_property(cache_dn, p);
  302. }
  303. }
  304. static int coherency_type(void)
  305. {
  306. struct device_node *np;
  307. const struct of_device_id *match;
  308. int type;
  309. /*
  310. * The coherency fabric is needed:
  311. * - For coherency between processors on Armada XP, so only
  312. * when SMP is enabled.
  313. * - For coherency between the processor and I/O devices, but
  314. * this coherency requires many pre-requisites (write
  315. * allocate cache policy, shareable pages, SMP bit set) that
  316. * are only meant in SMP situations.
  317. *
  318. * Note that this means that on Armada 370, there is currently
  319. * no way to use hardware I/O coherency, because even when
  320. * CONFIG_SMP is enabled, is_smp() returns false due to the
  321. * Armada 370 being a single-core processor. To lift this
  322. * limitation, we would have to find a way to make the cache
  323. * policy set to write-allocate (on all Armada SoCs), and to
  324. * set the shareable attribute in page tables (on all Armada
  325. * SoCs except the Armada 370). Unfortunately, such decisions
  326. * are taken very early in the kernel boot process, at a point
  327. * where we don't know yet on which SoC we are running.
  328. */
  329. if (!is_smp())
  330. return COHERENCY_FABRIC_TYPE_NONE;
  331. np = of_find_matching_node_and_match(NULL, of_coherency_table, &match);
  332. if (!np)
  333. return COHERENCY_FABRIC_TYPE_NONE;
  334. type = (int) match->data;
  335. of_node_put(np);
  336. return type;
  337. }
  338. /*
  339. * As a precaution, we currently completely disable hardware I/O
  340. * coherency, until enough testing is done with automatic I/O
  341. * synchronization barriers to validate that it is a proper solution.
  342. */
  343. int coherency_available(void)
  344. {
  345. return false;
  346. }
  347. int __init coherency_init(void)
  348. {
  349. int type = coherency_type();
  350. struct device_node *np;
  351. np = of_find_matching_node(NULL, of_coherency_table);
  352. if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP)
  353. armada_370_coherency_init(np);
  354. else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 ||
  355. type == COHERENCY_FABRIC_TYPE_ARMADA_380)
  356. armada_375_380_coherency_init(np);
  357. of_node_put(np);
  358. return 0;
  359. }
  360. static int __init coherency_late_init(void)
  361. {
  362. int type = coherency_type();
  363. if (type == COHERENCY_FABRIC_TYPE_NONE)
  364. return 0;
  365. if (type == COHERENCY_FABRIC_TYPE_ARMADA_375) {
  366. u32 dev, rev;
  367. if (mvebu_get_soc_id(&dev, &rev) == 0 &&
  368. rev == ARMADA_375_Z1_REV)
  369. armada_375_coherency_init_wa();
  370. }
  371. if (coherency_available())
  372. bus_register_notifier(&platform_bus_type,
  373. &mvebu_hwcc_nb);
  374. return 0;
  375. }
  376. postcore_initcall(coherency_late_init);
  377. #if IS_ENABLED(CONFIG_PCI)
  378. static int __init coherency_pci_init(void)
  379. {
  380. if (coherency_available())
  381. bus_register_notifier(&pci_bus_type,
  382. &mvebu_hwcc_pci_nb);
  383. return 0;
  384. }
  385. arch_initcall(coherency_pci_init);
  386. #endif