i915_gem_gtt.c 57 KB


  1. /*
  2. * Copyright © 2010 Daniel Vetter
  3. * Copyright © 2011-2014 Intel Corporation
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22. * IN THE SOFTWARE.
  23. *
  24. */
  25. #include <linux/seq_file.h>
  26. #include <drm/drmP.h>
  27. #include <drm/i915_drm.h>
  28. #include "i915_drv.h"
  29. #include "i915_trace.h"
  30. #include "intel_drv.h"
  31. static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
  32. static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
  33. static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
  34. {
  35. if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
  36. return 0;
  37. if (enable_ppgtt == 1)
  38. return 1;
  39. if (enable_ppgtt == 2 && HAS_PPGTT(dev))
  40. return 2;
  41. #ifdef CONFIG_INTEL_IOMMU
  42. /* Disable ppgtt on SNB if VT-d is on. */
  43. if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
  44. DRM_INFO("Disabling PPGTT because VT-d is on\n");
  45. return 0;
  46. }
  47. #endif
  48. /* Early VLV doesn't have this */
  49. if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
  50. dev->pdev->revision < 0xb) {
  51. DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
  52. return 0;
  53. }
  54. return HAS_ALIASING_PPGTT(dev) ? 1 : 0;
  55. }
  56. static void ppgtt_bind_vma(struct i915_vma *vma,
  57. enum i915_cache_level cache_level,
  58. u32 flags);
  59. static void ppgtt_unbind_vma(struct i915_vma *vma);
  60. static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
  61. enum i915_cache_level level,
  62. bool valid)
  63. {
  64. gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
  65. pte |= addr;
  66. switch (level) {
  67. case I915_CACHE_NONE:
  68. pte |= PPAT_UNCACHED_INDEX;
  69. break;
  70. case I915_CACHE_WT:
  71. pte |= PPAT_DISPLAY_ELLC_INDEX;
  72. break;
  73. default:
  74. pte |= PPAT_CACHED_INDEX;
  75. break;
  76. }
  77. return pte;
  78. }
  79. static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
  80. dma_addr_t addr,
  81. enum i915_cache_level level)
  82. {
  83. gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
  84. pde |= addr;
  85. if (level != I915_CACHE_NONE)
  86. pde |= PPAT_CACHED_PDE_INDEX;
  87. else
  88. pde |= PPAT_UNCACHED_INDEX;
  89. return pde;
  90. }
  91. static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
  92. enum i915_cache_level level,
  93. bool valid, u32 unused)
  94. {
  95. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  96. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  97. switch (level) {
  98. case I915_CACHE_L3_LLC:
  99. case I915_CACHE_LLC:
  100. pte |= GEN6_PTE_CACHE_LLC;
  101. break;
  102. case I915_CACHE_NONE:
  103. pte |= GEN6_PTE_UNCACHED;
  104. break;
  105. default:
  106. WARN_ON(1);
  107. }
  108. return pte;
  109. }
  110. static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
  111. enum i915_cache_level level,
  112. bool valid, u32 unused)
  113. {
  114. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  115. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  116. switch (level) {
  117. case I915_CACHE_L3_LLC:
  118. pte |= GEN7_PTE_CACHE_L3_LLC;
  119. break;
  120. case I915_CACHE_LLC:
  121. pte |= GEN6_PTE_CACHE_LLC;
  122. break;
  123. case I915_CACHE_NONE:
  124. pte |= GEN6_PTE_UNCACHED;
  125. break;
  126. default:
  127. WARN_ON(1);
  128. }
  129. return pte;
  130. }
  131. static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
  132. enum i915_cache_level level,
  133. bool valid, u32 flags)
  134. {
  135. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  136. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  137. /* Mark the page as writeable. Other platforms don't have a
  138. * setting for read-only/writable, so this matches that behavior.
  139. */
  140. if (!(flags & PTE_READ_ONLY))
  141. pte |= BYT_PTE_WRITEABLE;
  142. if (level != I915_CACHE_NONE)
  143. pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
  144. return pte;
  145. }
  146. static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
  147. enum i915_cache_level level,
  148. bool valid, u32 unused)
  149. {
  150. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  151. pte |= HSW_PTE_ADDR_ENCODE(addr);
  152. if (level != I915_CACHE_NONE)
  153. pte |= HSW_WB_LLC_AGE3;
  154. return pte;
  155. }
  156. static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
  157. enum i915_cache_level level,
  158. bool valid, u32 unused)
  159. {
  160. gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  161. pte |= HSW_PTE_ADDR_ENCODE(addr);
  162. switch (level) {
  163. case I915_CACHE_NONE:
  164. break;
  165. case I915_CACHE_WT:
  166. pte |= HSW_WT_ELLC_LLC_AGE3;
  167. break;
  168. default:
  169. pte |= HSW_WB_ELLC_LLC_AGE3;
  170. break;
  171. }
  172. return pte;
  173. }
  174. /* Broadwell Page Directory Pointer Descriptors */
  175. static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
  176. uint64_t val)
  177. {
  178. int ret;
  179. BUG_ON(entry >= 4);
  180. ret = intel_ring_begin(ring, 6);
  181. if (ret)
  182. return ret;
  183. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  184. intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
  185. intel_ring_emit(ring, (u32)(val >> 32));
  186. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  187. intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
  188. intel_ring_emit(ring, (u32)(val));
  189. intel_ring_advance(ring);
  190. return 0;
  191. }
  192. static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
  193. struct intel_engine_cs *ring)
  194. {
  195. int i, ret;
  196. /* bit of a hack to find the actual last used pd */
  197. int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
  198. for (i = used_pd - 1; i >= 0; i--) {
  199. dma_addr_t addr = ppgtt->pd_dma_addr[i];
  200. ret = gen8_write_pdp(ring, i, addr);
  201. if (ret)
  202. return ret;
  203. }
  204. return 0;
  205. }
  206. static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
  207. uint64_t start,
  208. uint64_t length,
  209. bool use_scratch)
  210. {
  211. struct i915_hw_ppgtt *ppgtt =
  212. container_of(vm, struct i915_hw_ppgtt, base);
  213. gen8_gtt_pte_t *pt_vaddr, scratch_pte;
  214. unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
  215. unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
  216. unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
  217. unsigned num_entries = length >> PAGE_SHIFT;
  218. unsigned last_pte, i;
  219. scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
  220. I915_CACHE_LLC, use_scratch);
  221. while (num_entries) {
  222. struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
  223. last_pte = pte + num_entries;
  224. if (last_pte > GEN8_PTES_PER_PAGE)
  225. last_pte = GEN8_PTES_PER_PAGE;
  226. pt_vaddr = kmap_atomic(page_table);
  227. for (i = pte; i < last_pte; i++) {
  228. pt_vaddr[i] = scratch_pte;
  229. num_entries--;
  230. }
  231. if (!HAS_LLC(ppgtt->base.dev))
  232. drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
  233. kunmap_atomic(pt_vaddr);
  234. pte = 0;
  235. if (++pde == GEN8_PDES_PER_PAGE) {
  236. pdpe++;
  237. pde = 0;
  238. }
  239. }
  240. }
  241. static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
  242. struct sg_table *pages,
  243. uint64_t start,
  244. enum i915_cache_level cache_level, u32 unused)
  245. {
  246. struct i915_hw_ppgtt *ppgtt =
  247. container_of(vm, struct i915_hw_ppgtt, base);
  248. gen8_gtt_pte_t *pt_vaddr;
  249. unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
  250. unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
  251. unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
  252. struct sg_page_iter sg_iter;
  253. pt_vaddr = NULL;
  254. for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
  255. if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
  256. break;
  257. if (pt_vaddr == NULL)
  258. pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
  259. pt_vaddr[pte] =
  260. gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
  261. cache_level, true);
  262. if (++pte == GEN8_PTES_PER_PAGE) {
  263. if (!HAS_LLC(ppgtt->base.dev))
  264. drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
  265. kunmap_atomic(pt_vaddr);
  266. pt_vaddr = NULL;
  267. if (++pde == GEN8_PDES_PER_PAGE) {
  268. pdpe++;
  269. pde = 0;
  270. }
  271. pte = 0;
  272. }
  273. }
  274. if (pt_vaddr) {
  275. if (!HAS_LLC(ppgtt->base.dev))
  276. drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
  277. kunmap_atomic(pt_vaddr);
  278. }
  279. }
  280. static void gen8_free_page_tables(struct page **pt_pages)
  281. {
  282. int i;
  283. if (pt_pages == NULL)
  284. return;
  285. for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
  286. if (pt_pages[i])
  287. __free_pages(pt_pages[i], 0);
  288. }
  289. static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
  290. {
  291. int i;
  292. for (i = 0; i < ppgtt->num_pd_pages; i++) {
  293. gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
  294. kfree(ppgtt->gen8_pt_pages[i]);
  295. kfree(ppgtt->gen8_pt_dma_addr[i]);
  296. }
  297. __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
  298. }
  299. static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
  300. {
  301. struct pci_dev *hwdev = ppgtt->base.dev->pdev;
  302. int i, j;
  303. for (i = 0; i < ppgtt->num_pd_pages; i++) {
  304. /* TODO: In the future we'll support sparse mappings, so this
  305. * will have to change. */
  306. if (!ppgtt->pd_dma_addr[i])
  307. continue;
  308. pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
  309. PCI_DMA_BIDIRECTIONAL);
  310. for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  311. dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
  312. if (addr)
  313. pci_unmap_page(hwdev, addr, PAGE_SIZE,
  314. PCI_DMA_BIDIRECTIONAL);
  315. }
  316. }
  317. }
  318. static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  319. {
  320. struct i915_hw_ppgtt *ppgtt =
  321. container_of(vm, struct i915_hw_ppgtt, base);
  322. gen8_ppgtt_unmap_pages(ppgtt);
  323. gen8_ppgtt_free(ppgtt);
  324. }
  325. static struct page **__gen8_alloc_page_tables(void)
  326. {
  327. struct page **pt_pages;
  328. int i;
  329. pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
  330. if (!pt_pages)
  331. return ERR_PTR(-ENOMEM);
  332. for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
  333. pt_pages[i] = alloc_page(GFP_KERNEL);
  334. if (!pt_pages[i])
  335. goto bail;
  336. }
  337. return pt_pages;
  338. bail:
  339. gen8_free_page_tables(pt_pages);
  340. kfree(pt_pages);
  341. return ERR_PTR(-ENOMEM);
  342. }
  343. static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
  344. const int max_pdp)
  345. {
  346. struct page **pt_pages[GEN8_LEGACY_PDPS];
  347. int i, ret;
  348. for (i = 0; i < max_pdp; i++) {
  349. pt_pages[i] = __gen8_alloc_page_tables();
  350. if (IS_ERR(pt_pages[i])) {
  351. ret = PTR_ERR(pt_pages[i]);
  352. goto unwind_out;
  353. }
  354. }
  355. /* NB: Avoid touching gen8_pt_pages until last to keep the allocation,
  356. * "atomic" - for cleanup purposes.
  357. */
  358. for (i = 0; i < max_pdp; i++)
  359. ppgtt->gen8_pt_pages[i] = pt_pages[i];
  360. return 0;
  361. unwind_out:
  362. while (i--) {
  363. gen8_free_page_tables(pt_pages[i]);
  364. kfree(pt_pages[i]);
  365. }
  366. return ret;
  367. }
  368. static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
  369. {
  370. int i;
  371. for (i = 0; i < ppgtt->num_pd_pages; i++) {
  372. ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
  373. sizeof(dma_addr_t),
  374. GFP_KERNEL);
  375. if (!ppgtt->gen8_pt_dma_addr[i])
  376. return -ENOMEM;
  377. }
  378. return 0;
  379. }
  380. static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
  381. const int max_pdp)
  382. {
  383. ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
  384. if (!ppgtt->pd_pages)
  385. return -ENOMEM;
  386. ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
  387. BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
  388. return 0;
  389. }
  390. static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
  391. const int max_pdp)
  392. {
  393. int ret;
  394. ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
  395. if (ret)
  396. return ret;
  397. ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
  398. if (ret) {
  399. __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
  400. return ret;
  401. }
  402. ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
  403. ret = gen8_ppgtt_allocate_dma(ppgtt);
  404. if (ret)
  405. gen8_ppgtt_free(ppgtt);
  406. return ret;
  407. }
  408. static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
  409. const int pd)
  410. {
  411. dma_addr_t pd_addr;
  412. int ret;
  413. pd_addr = pci_map_page(ppgtt->base.dev->pdev,
  414. &ppgtt->pd_pages[pd], 0,
  415. PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  416. ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
  417. if (ret)
  418. return ret;
  419. ppgtt->pd_dma_addr[pd] = pd_addr;
  420. return 0;
  421. }
  422. static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
  423. const int pd,
  424. const int pt)
  425. {
  426. dma_addr_t pt_addr;
  427. struct page *p;
  428. int ret;
  429. p = ppgtt->gen8_pt_pages[pd][pt];
  430. pt_addr = pci_map_page(ppgtt->base.dev->pdev,
  431. p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  432. ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
  433. if (ret)
  434. return ret;
  435. ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
  436. return 0;
  437. }
  438. /**
  439. * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  440. * with a net effect resembling a 2-level page table in normal x86 terms. Each
  441. * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
  442. * space.
  443. *
  444. * FIXME: split allocation into smaller pieces. For now we only ever do this
  445. * once, but with full PPGTT, the multiple contiguous allocations will be bad.
  446. * TODO: Do something with the size parameter
  447. */
  448. static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
  449. {
  450. const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
  451. const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
  452. int i, j, ret;
  453. if (size % (1<<30))
  454. DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
  455. /* 1. Do all our allocations for page directories and page tables. */
  456. ret = gen8_ppgtt_alloc(ppgtt, max_pdp);
  457. if (ret)
  458. return ret;
  459. /*
  460. * 2. Create DMA mappings for the page directories and page tables.
  461. */
  462. for (i = 0; i < max_pdp; i++) {
  463. ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
  464. if (ret)
  465. goto bail;
  466. for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  467. ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
  468. if (ret)
  469. goto bail;
  470. }
  471. }
  472. /*
  473. * 3. Map all the page directory entires to point to the page tables
  474. * we've allocated.
  475. *
  476. * For now, the PPGTT helper functions all require that the PDEs are
  477. * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
  478. * will never need to touch the PDEs again.
  479. */
  480. for (i = 0; i < max_pdp; i++) {
  481. gen8_ppgtt_pde_t *pd_vaddr;
  482. pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
  483. for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  484. dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
  485. pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
  486. I915_CACHE_LLC);
  487. }
  488. if (!HAS_LLC(ppgtt->base.dev))
  489. drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
  490. kunmap_atomic(pd_vaddr);
  491. }
  492. ppgtt->switch_mm = gen8_mm_switch;
  493. ppgtt->base.clear_range = gen8_ppgtt_clear_range;
  494. ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
  495. ppgtt->base.cleanup = gen8_ppgtt_cleanup;
  496. ppgtt->base.start = 0;
  497. ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
  498. ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
  499. DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
  500. ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
  501. DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
  502. ppgtt->num_pd_entries,
  503. (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
  504. return 0;
  505. bail:
  506. gen8_ppgtt_unmap_pages(ppgtt);
  507. gen8_ppgtt_free(ppgtt);
  508. return ret;
  509. }
  510. static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
  511. {
  512. struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
  513. struct i915_address_space *vm = &ppgtt->base;
  514. gen6_gtt_pte_t __iomem *pd_addr;
  515. gen6_gtt_pte_t scratch_pte;
  516. uint32_t pd_entry;
  517. int pte, pde;
  518. scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
  519. pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
  520. ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
  521. seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm,
  522. ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
  523. for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
  524. u32 expected;
  525. gen6_gtt_pte_t *pt_vaddr;
  526. dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
  527. pd_entry = readl(pd_addr + pde);
  528. expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
  529. if (pd_entry != expected)
  530. seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
  531. pde,
  532. pd_entry,
  533. expected);
  534. seq_printf(m, "\tPDE: %x\n", pd_entry);
  535. pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
  536. for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
  537. unsigned long va =
  538. (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
  539. (pte * PAGE_SIZE);
  540. int i;
  541. bool found = false;
  542. for (i = 0; i < 4; i++)
  543. if (pt_vaddr[pte + i] != scratch_pte)
  544. found = true;
  545. if (!found)
  546. continue;
  547. seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
  548. for (i = 0; i < 4; i++) {
  549. if (pt_vaddr[pte + i] != scratch_pte)
  550. seq_printf(m, " %08x", pt_vaddr[pte + i]);
  551. else
  552. seq_puts(m, " SCRATCH ");
  553. }
  554. seq_puts(m, "\n");
  555. }
  556. kunmap_atomic(pt_vaddr);
  557. }
  558. }
  559. static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
  560. {
  561. struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
  562. gen6_gtt_pte_t __iomem *pd_addr;
  563. uint32_t pd_entry;
  564. int i;
  565. WARN_ON(ppgtt->pd_offset & 0x3f);
  566. pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
  567. ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
  568. for (i = 0; i < ppgtt->num_pd_entries; i++) {
  569. dma_addr_t pt_addr;
  570. pt_addr = ppgtt->pt_dma_addr[i];
  571. pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
  572. pd_entry |= GEN6_PDE_VALID;
  573. writel(pd_entry, pd_addr + i);
  574. }
  575. readl(pd_addr);
  576. }
  577. static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
  578. {
  579. BUG_ON(ppgtt->pd_offset & 0x3f);
  580. return (ppgtt->pd_offset / 64) << 16;
  581. }
  582. static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
  583. struct intel_engine_cs *ring)
  584. {
  585. int ret;
  586. /* NB: TLBs must be flushed and invalidated before a switch */
  587. ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
  588. if (ret)
  589. return ret;
  590. ret = intel_ring_begin(ring, 6);
  591. if (ret)
  592. return ret;
  593. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
  594. intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
  595. intel_ring_emit(ring, PP_DIR_DCLV_2G);
  596. intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
  597. intel_ring_emit(ring, get_pd_offset(ppgtt));
  598. intel_ring_emit(ring, MI_NOOP);
  599. intel_ring_advance(ring);
  600. return 0;
  601. }
  602. static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
  603. struct intel_engine_cs *ring)
  604. {
  605. int ret;
  606. /* NB: TLBs must be flushed and invalidated before a switch */
  607. ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
  608. if (ret)
  609. return ret;
  610. ret = intel_ring_begin(ring, 6);
  611. if (ret)
  612. return ret;
  613. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
  614. intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
  615. intel_ring_emit(ring, PP_DIR_DCLV_2G);
  616. intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
  617. intel_ring_emit(ring, get_pd_offset(ppgtt));
  618. intel_ring_emit(ring, MI_NOOP);
  619. intel_ring_advance(ring);
  620. /* XXX: RCS is the only one to auto invalidate the TLBs? */
  621. if (ring->id != RCS) {
  622. ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
  623. if (ret)
  624. return ret;
  625. }
  626. return 0;
  627. }
  628. static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
  629. struct intel_engine_cs *ring)
  630. {
  631. struct drm_device *dev = ppgtt->base.dev;
  632. struct drm_i915_private *dev_priv = dev->dev_private;
  633. I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
  634. I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
  635. POSTING_READ(RING_PP_DIR_DCLV(ring));
  636. return 0;
  637. }
  638. static void gen8_ppgtt_enable(struct drm_device *dev)
  639. {
  640. struct drm_i915_private *dev_priv = dev->dev_private;
  641. struct intel_engine_cs *ring;
  642. int j;
  643. for_each_ring(ring, dev_priv, j) {
  644. I915_WRITE(RING_MODE_GEN7(ring),
  645. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  646. }
  647. }
  648. static void gen7_ppgtt_enable(struct drm_device *dev)
  649. {
  650. struct drm_i915_private *dev_priv = dev->dev_private;
  651. struct intel_engine_cs *ring;
  652. uint32_t ecochk, ecobits;
  653. int i;
  654. ecobits = I915_READ(GAC_ECO_BITS);
  655. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
  656. ecochk = I915_READ(GAM_ECOCHK);
  657. if (IS_HASWELL(dev)) {
  658. ecochk |= ECOCHK_PPGTT_WB_HSW;
  659. } else {
  660. ecochk |= ECOCHK_PPGTT_LLC_IVB;
  661. ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
  662. }
  663. I915_WRITE(GAM_ECOCHK, ecochk);
  664. for_each_ring(ring, dev_priv, i) {
  665. /* GFX_MODE is per-ring on gen7+ */
  666. I915_WRITE(RING_MODE_GEN7(ring),
  667. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  668. }
  669. }
  670. static void gen6_ppgtt_enable(struct drm_device *dev)
  671. {
  672. struct drm_i915_private *dev_priv = dev->dev_private;
  673. uint32_t ecochk, gab_ctl, ecobits;
  674. ecobits = I915_READ(GAC_ECO_BITS);
  675. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
  676. ECOBITS_PPGTT_CACHE64B);
  677. gab_ctl = I915_READ(GAB_CTL);
  678. I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
  679. ecochk = I915_READ(GAM_ECOCHK);
  680. I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
  681. I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  682. }
  683. /* PPGTT support for Sandybdrige/Gen6 and later */
  684. static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
  685. uint64_t start,
  686. uint64_t length,
  687. bool use_scratch)
  688. {
  689. struct i915_hw_ppgtt *ppgtt =
  690. container_of(vm, struct i915_hw_ppgtt, base);
  691. gen6_gtt_pte_t *pt_vaddr, scratch_pte;
  692. unsigned first_entry = start >> PAGE_SHIFT;
  693. unsigned num_entries = length >> PAGE_SHIFT;
  694. unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
  695. unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
  696. unsigned last_pte, i;
  697. scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
  698. while (num_entries) {
  699. last_pte = first_pte + num_entries;
  700. if (last_pte > I915_PPGTT_PT_ENTRIES)
  701. last_pte = I915_PPGTT_PT_ENTRIES;
  702. pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
  703. for (i = first_pte; i < last_pte; i++)
  704. pt_vaddr[i] = scratch_pte;
  705. kunmap_atomic(pt_vaddr);
  706. num_entries -= last_pte - first_pte;
  707. first_pte = 0;
  708. act_pt++;
  709. }
  710. }
  711. static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
  712. struct sg_table *pages,
  713. uint64_t start,
  714. enum i915_cache_level cache_level, u32 flags)
  715. {
  716. struct i915_hw_ppgtt *ppgtt =
  717. container_of(vm, struct i915_hw_ppgtt, base);
  718. gen6_gtt_pte_t *pt_vaddr;
  719. unsigned first_entry = start >> PAGE_SHIFT;
  720. unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
  721. unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
  722. struct sg_page_iter sg_iter;
  723. pt_vaddr = NULL;
  724. for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
  725. if (pt_vaddr == NULL)
  726. pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
  727. pt_vaddr[act_pte] =
  728. vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
  729. cache_level, true, flags);
  730. if (++act_pte == I915_PPGTT_PT_ENTRIES) {
  731. kunmap_atomic(pt_vaddr);
  732. pt_vaddr = NULL;
  733. act_pt++;
  734. act_pte = 0;
  735. }
  736. }
  737. if (pt_vaddr)
  738. kunmap_atomic(pt_vaddr);
  739. }
  740. static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
  741. {
  742. int i;
  743. if (ppgtt->pt_dma_addr) {
  744. for (i = 0; i < ppgtt->num_pd_entries; i++)
  745. pci_unmap_page(ppgtt->base.dev->pdev,
  746. ppgtt->pt_dma_addr[i],
  747. 4096, PCI_DMA_BIDIRECTIONAL);
  748. }
  749. }
  750. static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
  751. {
  752. int i;
  753. kfree(ppgtt->pt_dma_addr);
  754. for (i = 0; i < ppgtt->num_pd_entries; i++)
  755. __free_page(ppgtt->pt_pages[i]);
  756. kfree(ppgtt->pt_pages);
  757. }
  758. static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
  759. {
  760. struct i915_hw_ppgtt *ppgtt =
  761. container_of(vm, struct i915_hw_ppgtt, base);
  762. drm_mm_remove_node(&ppgtt->node);
  763. gen6_ppgtt_unmap_pages(ppgtt);
  764. gen6_ppgtt_free(ppgtt);
  765. }
  766. static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
  767. {
  768. struct drm_device *dev = ppgtt->base.dev;
  769. struct drm_i915_private *dev_priv = dev->dev_private;
  770. bool retried = false;
  771. int ret;
  772. /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
  773. * allocator works in address space sizes, so it's multiplied by page
  774. * size. We allocate at the top of the GTT to avoid fragmentation.
  775. */
  776. BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
  777. alloc:
  778. ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
  779. &ppgtt->node, GEN6_PD_SIZE,
  780. GEN6_PD_ALIGN, 0,
  781. 0, dev_priv->gtt.base.total,
  782. DRM_MM_TOPDOWN);
  783. if (ret == -ENOSPC && !retried) {
  784. ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
  785. GEN6_PD_SIZE, GEN6_PD_ALIGN,
  786. I915_CACHE_NONE,
  787. 0, dev_priv->gtt.base.total,
  788. 0);
  789. if (ret)
  790. return ret;
  791. retried = true;
  792. goto alloc;
  793. }
  794. if (ppgtt->node.start < dev_priv->gtt.mappable_end)
  795. DRM_DEBUG("Forced to use aperture for PDEs\n");
  796. ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
  797. return ret;
  798. }
  799. static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
  800. {
  801. int i;
  802. ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
  803. GFP_KERNEL);
  804. if (!ppgtt->pt_pages)
  805. return -ENOMEM;
  806. for (i = 0; i < ppgtt->num_pd_entries; i++) {
  807. ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
  808. if (!ppgtt->pt_pages[i]) {
  809. gen6_ppgtt_free(ppgtt);
  810. return -ENOMEM;
  811. }
  812. }
  813. return 0;
  814. }
  815. static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
  816. {
  817. int ret;
  818. ret = gen6_ppgtt_allocate_page_directories(ppgtt);
  819. if (ret)
  820. return ret;
  821. ret = gen6_ppgtt_allocate_page_tables(ppgtt);
  822. if (ret) {
  823. drm_mm_remove_node(&ppgtt->node);
  824. return ret;
  825. }
  826. ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
  827. GFP_KERNEL);
  828. if (!ppgtt->pt_dma_addr) {
  829. drm_mm_remove_node(&ppgtt->node);
  830. gen6_ppgtt_free(ppgtt);
  831. return -ENOMEM;
  832. }
  833. return 0;
  834. }
  835. static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
  836. {
  837. struct drm_device *dev = ppgtt->base.dev;
  838. int i;
  839. for (i = 0; i < ppgtt->num_pd_entries; i++) {
  840. dma_addr_t pt_addr;
  841. pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
  842. PCI_DMA_BIDIRECTIONAL);
  843. if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
  844. gen6_ppgtt_unmap_pages(ppgtt);
  845. return -EIO;
  846. }
  847. ppgtt->pt_dma_addr[i] = pt_addr;
  848. }
  849. return 0;
  850. }
  851. static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  852. {
  853. struct drm_device *dev = ppgtt->base.dev;
  854. struct drm_i915_private *dev_priv = dev->dev_private;
  855. int ret;
  856. ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
  857. if (IS_GEN6(dev)) {
  858. ppgtt->switch_mm = gen6_mm_switch;
  859. } else if (IS_HASWELL(dev)) {
  860. ppgtt->switch_mm = hsw_mm_switch;
  861. } else if (IS_GEN7(dev)) {
  862. ppgtt->switch_mm = gen7_mm_switch;
  863. } else
  864. BUG();
  865. ret = gen6_ppgtt_alloc(ppgtt);
  866. if (ret)
  867. return ret;
  868. ret = gen6_ppgtt_setup_page_tables(ppgtt);
  869. if (ret) {
  870. gen6_ppgtt_free(ppgtt);
  871. return ret;
  872. }
  873. ppgtt->base.clear_range = gen6_ppgtt_clear_range;
  874. ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
  875. ppgtt->base.cleanup = gen6_ppgtt_cleanup;
  876. ppgtt->base.start = 0;
  877. ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
  878. ppgtt->debug_dump = gen6_dump_ppgtt;
  879. ppgtt->pd_offset =
  880. ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
  881. ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
  882. DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
  883. ppgtt->node.size >> 20,
  884. ppgtt->node.start / PAGE_SIZE);
  885. gen6_write_pdes(ppgtt);
  886. DRM_DEBUG("Adding PPGTT at offset %x\n",
  887. ppgtt->pd_offset << 10);
  888. return 0;
  889. }
  890. static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
  891. {
  892. struct drm_i915_private *dev_priv = dev->dev_private;
  893. ppgtt->base.dev = dev;
  894. ppgtt->base.scratch = dev_priv->gtt.base.scratch;
  895. if (INTEL_INFO(dev)->gen < 8)
  896. return gen6_ppgtt_init(ppgtt);
  897. else if (IS_GEN8(dev))
  898. return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
  899. else
  900. BUG();
  901. }
  902. int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
  903. {
  904. struct drm_i915_private *dev_priv = dev->dev_private;
  905. int ret = 0;
  906. ret = __hw_ppgtt_init(dev, ppgtt);
  907. if (ret == 0) {
  908. kref_init(&ppgtt->ref);
  909. drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
  910. ppgtt->base.total);
  911. i915_init_vm(dev_priv, &ppgtt->base);
  912. }
  913. return ret;
  914. }
  915. int i915_ppgtt_init_hw(struct drm_device *dev)
  916. {
  917. struct drm_i915_private *dev_priv = dev->dev_private;
  918. struct intel_engine_cs *ring;
  919. struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
  920. int i, ret = 0;
  921. /* In the case of execlists, PPGTT is enabled by the context descriptor
  922. * and the PDPs are contained within the context itself. We don't
  923. * need to do anything here. */
  924. if (i915.enable_execlists)
  925. return 0;
  926. if (!USES_PPGTT(dev))
  927. return 0;
  928. if (IS_GEN6(dev))
  929. gen6_ppgtt_enable(dev);
  930. else if (IS_GEN7(dev))
  931. gen7_ppgtt_enable(dev);
  932. else if (INTEL_INFO(dev)->gen >= 8)
  933. gen8_ppgtt_enable(dev);
  934. else
  935. WARN_ON(1);
  936. if (ppgtt) {
  937. for_each_ring(ring, dev_priv, i) {
  938. ret = ppgtt->switch_mm(ppgtt, ring);
  939. if (ret != 0)
  940. return ret;
  941. }
  942. }
  943. return ret;
  944. }
  945. struct i915_hw_ppgtt *
  946. i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
  947. {
  948. struct i915_hw_ppgtt *ppgtt;
  949. int ret;
  950. ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
  951. if (!ppgtt)
  952. return ERR_PTR(-ENOMEM);
  953. ret = i915_ppgtt_init(dev, ppgtt);
  954. if (ret) {
  955. kfree(ppgtt);
  956. return ERR_PTR(ret);
  957. }
  958. ppgtt->file_priv = fpriv;
  959. return ppgtt;
  960. }
  961. void i915_ppgtt_release(struct kref *kref)
  962. {
  963. struct i915_hw_ppgtt *ppgtt =
  964. container_of(kref, struct i915_hw_ppgtt, ref);
  965. /* vmas should already be unbound */
  966. WARN_ON(!list_empty(&ppgtt->base.active_list));
  967. WARN_ON(!list_empty(&ppgtt->base.inactive_list));
  968. list_del(&ppgtt->base.global_link);
  969. drm_mm_takedown(&ppgtt->base.mm);
  970. ppgtt->base.cleanup(&ppgtt->base);
  971. kfree(ppgtt);
  972. }
  973. static void
  974. ppgtt_bind_vma(struct i915_vma *vma,
  975. enum i915_cache_level cache_level,
  976. u32 flags)
  977. {
  978. /* Currently applicable only to VLV */
  979. if (vma->obj->gt_ro)
  980. flags |= PTE_READ_ONLY;
  981. vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
  982. cache_level, flags);
  983. }
  984. static void ppgtt_unbind_vma(struct i915_vma *vma)
  985. {
  986. vma->vm->clear_range(vma->vm,
  987. vma->node.start,
  988. vma->obj->base.size,
  989. true);
  990. }
  991. extern int intel_iommu_gfx_mapped;
  992. /* Certain Gen5 chipsets require require idling the GPU before
  993. * unmapping anything from the GTT when VT-d is enabled.
  994. */
  995. static inline bool needs_idle_maps(struct drm_device *dev)
  996. {
  997. #ifdef CONFIG_INTEL_IOMMU
  998. /* Query intel_iommu to see if we need the workaround. Presumably that
  999. * was loaded first.
  1000. */
  1001. if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
  1002. return true;
  1003. #endif
  1004. return false;
  1005. }
  1006. static bool do_idling(struct drm_i915_private *dev_priv)
  1007. {
  1008. bool ret = dev_priv->mm.interruptible;
  1009. if (unlikely(dev_priv->gtt.do_idle_maps)) {
  1010. dev_priv->mm.interruptible = false;
  1011. if (i915_gpu_idle(dev_priv->dev)) {
  1012. DRM_ERROR("Couldn't idle GPU\n");
  1013. /* Wait a bit, in hopes it avoids the hang */
  1014. udelay(10);
  1015. }
  1016. }
  1017. return ret;
  1018. }
  1019. static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
  1020. {
  1021. if (unlikely(dev_priv->gtt.do_idle_maps))
  1022. dev_priv->mm.interruptible = interruptible;
  1023. }
  1024. void i915_check_and_clear_faults(struct drm_device *dev)
  1025. {
  1026. struct drm_i915_private *dev_priv = dev->dev_private;
  1027. struct intel_engine_cs *ring;
  1028. int i;
  1029. if (INTEL_INFO(dev)->gen < 6)
  1030. return;
  1031. for_each_ring(ring, dev_priv, i) {
  1032. u32 fault_reg;
  1033. fault_reg = I915_READ(RING_FAULT_REG(ring));
  1034. if (fault_reg & RING_FAULT_VALID) {
  1035. DRM_DEBUG_DRIVER("Unexpected fault\n"
  1036. "\tAddr: 0x%08lx\\n"
  1037. "\tAddress space: %s\n"
  1038. "\tSource ID: %d\n"
  1039. "\tType: %d\n",
  1040. fault_reg & PAGE_MASK,
  1041. fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
  1042. RING_FAULT_SRCID(fault_reg),
  1043. RING_FAULT_FAULT_TYPE(fault_reg));
  1044. I915_WRITE(RING_FAULT_REG(ring),
  1045. fault_reg & ~RING_FAULT_VALID);
  1046. }
  1047. }
  1048. POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
  1049. }
  1050. static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
  1051. {
  1052. if (INTEL_INFO(dev_priv->dev)->gen < 6) {
  1053. intel_gtt_chipset_flush();
  1054. } else {
  1055. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1056. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1057. }
  1058. }
  1059. void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
  1060. {
  1061. struct drm_i915_private *dev_priv = dev->dev_private;
  1062. /* Don't bother messing with faults pre GEN6 as we have little
  1063. * documentation supporting that it's a good idea.
  1064. */
  1065. if (INTEL_INFO(dev)->gen < 6)
  1066. return;
  1067. i915_check_and_clear_faults(dev);
  1068. dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
  1069. dev_priv->gtt.base.start,
  1070. dev_priv->gtt.base.total,
  1071. true);
  1072. i915_ggtt_flush(dev_priv);
  1073. }
  1074. void i915_gem_restore_gtt_mappings(struct drm_device *dev)
  1075. {
  1076. struct drm_i915_private *dev_priv = dev->dev_private;
  1077. struct drm_i915_gem_object *obj;
  1078. struct i915_address_space *vm;
  1079. i915_check_and_clear_faults(dev);
  1080. /* First fill our portion of the GTT with scratch pages */
  1081. dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
  1082. dev_priv->gtt.base.start,
  1083. dev_priv->gtt.base.total,
  1084. true);
  1085. list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
  1086. struct i915_vma *vma = i915_gem_obj_to_vma(obj,
  1087. &dev_priv->gtt.base);
  1088. if (!vma)
  1089. continue;
  1090. i915_gem_clflush_object(obj, obj->pin_display);
  1091. /* The bind_vma code tries to be smart about tracking mappings.
  1092. * Unfortunately above, we've just wiped out the mappings
  1093. * without telling our object about it. So we need to fake it.
  1094. */
  1095. obj->has_global_gtt_mapping = 0;
  1096. vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
  1097. }
  1098. if (INTEL_INFO(dev)->gen >= 8) {
  1099. if (IS_CHERRYVIEW(dev))
  1100. chv_setup_private_ppat(dev_priv);
  1101. else
  1102. bdw_setup_private_ppat(dev_priv);
  1103. return;
  1104. }
  1105. list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
  1106. /* TODO: Perhaps it shouldn't be gen6 specific */
  1107. if (i915_is_ggtt(vm)) {
  1108. if (dev_priv->mm.aliasing_ppgtt)
  1109. gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
  1110. continue;
  1111. }
  1112. gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
  1113. }
  1114. i915_ggtt_flush(dev_priv);
  1115. }
  1116. int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  1117. {
  1118. if (obj->has_dma_mapping)
  1119. return 0;
  1120. if (!dma_map_sg(&obj->base.dev->pdev->dev,
  1121. obj->pages->sgl, obj->pages->nents,
  1122. PCI_DMA_BIDIRECTIONAL))
  1123. return -ENOSPC;
  1124. return 0;
  1125. }
  1126. static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
  1127. {
  1128. #ifdef writeq
  1129. writeq(pte, addr);
  1130. #else
  1131. iowrite32((u32)pte, addr);
  1132. iowrite32(pte >> 32, addr + 4);
  1133. #endif
  1134. }
  1135. static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
  1136. struct sg_table *st,
  1137. uint64_t start,
  1138. enum i915_cache_level level, u32 unused)
  1139. {
  1140. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1141. unsigned first_entry = start >> PAGE_SHIFT;
  1142. gen8_gtt_pte_t __iomem *gtt_entries =
  1143. (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
  1144. int i = 0;
  1145. struct sg_page_iter sg_iter;
  1146. dma_addr_t addr = 0; /* shut up gcc */
  1147. for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
  1148. addr = sg_dma_address(sg_iter.sg) +
  1149. (sg_iter.sg_pgoffset << PAGE_SHIFT);
  1150. gen8_set_pte(&gtt_entries[i],
  1151. gen8_pte_encode(addr, level, true));
  1152. i++;
  1153. }
  1154. /*
  1155. * XXX: This serves as a posting read to make sure that the PTE has
  1156. * actually been updated. There is some concern that even though
  1157. * registers and PTEs are within the same BAR that they are potentially
  1158. * of NUMA access patterns. Therefore, even with the way we assume
  1159. * hardware should work, we must keep this posting read for paranoia.
  1160. */
  1161. if (i != 0)
  1162. WARN_ON(readq(&gtt_entries[i-1])
  1163. != gen8_pte_encode(addr, level, true));
  1164. /* This next bit makes the above posting read even more important. We
  1165. * want to flush the TLBs only after we're certain all the PTE updates
  1166. * have finished.
  1167. */
  1168. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1169. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1170. }
  1171. /*
  1172. * Binds an object into the global gtt with the specified cache level. The object
  1173. * will be accessible to the GPU via commands whose operands reference offsets
  1174. * within the global GTT as well as accessible by the GPU through the GMADR
  1175. * mapped BAR (dev_priv->mm.gtt->gtt).
  1176. */
  1177. static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
  1178. struct sg_table *st,
  1179. uint64_t start,
  1180. enum i915_cache_level level, u32 flags)
  1181. {
  1182. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1183. unsigned first_entry = start >> PAGE_SHIFT;
  1184. gen6_gtt_pte_t __iomem *gtt_entries =
  1185. (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
  1186. int i = 0;
  1187. struct sg_page_iter sg_iter;
  1188. dma_addr_t addr = 0;
  1189. for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
  1190. addr = sg_page_iter_dma_address(&sg_iter);
  1191. iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
  1192. i++;
  1193. }
  1194. /* XXX: This serves as a posting read to make sure that the PTE has
  1195. * actually been updated. There is some concern that even though
  1196. * registers and PTEs are within the same BAR that they are potentially
  1197. * of NUMA access patterns. Therefore, even with the way we assume
  1198. * hardware should work, we must keep this posting read for paranoia.
  1199. */
  1200. if (i != 0) {
  1201. unsigned long gtt = readl(&gtt_entries[i-1]);
  1202. WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
  1203. }
  1204. /* This next bit makes the above posting read even more important. We
  1205. * want to flush the TLBs only after we're certain all the PTE updates
  1206. * have finished.
  1207. */
  1208. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1209. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1210. }
  1211. static void gen8_ggtt_clear_range(struct i915_address_space *vm,
  1212. uint64_t start,
  1213. uint64_t length,
  1214. bool use_scratch)
  1215. {
  1216. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1217. unsigned first_entry = start >> PAGE_SHIFT;
  1218. unsigned num_entries = length >> PAGE_SHIFT;
  1219. gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
  1220. (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
  1221. const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
  1222. int i;
  1223. if (WARN(num_entries > max_entries,
  1224. "First entry = %d; Num entries = %d (max=%d)\n",
  1225. first_entry, num_entries, max_entries))
  1226. num_entries = max_entries;
  1227. scratch_pte = gen8_pte_encode(vm->scratch.addr,
  1228. I915_CACHE_LLC,
  1229. use_scratch);
  1230. for (i = 0; i < num_entries; i++)
  1231. gen8_set_pte(&gtt_base[i], scratch_pte);
  1232. readl(gtt_base);
  1233. }
  1234. static void gen6_ggtt_clear_range(struct i915_address_space *vm,
  1235. uint64_t start,
  1236. uint64_t length,
  1237. bool use_scratch)
  1238. {
  1239. struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1240. unsigned first_entry = start >> PAGE_SHIFT;
  1241. unsigned num_entries = length >> PAGE_SHIFT;
  1242. gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
  1243. (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
  1244. const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
  1245. int i;
  1246. if (WARN(num_entries > max_entries,
  1247. "First entry = %d; Num entries = %d (max=%d)\n",
  1248. first_entry, num_entries, max_entries))
  1249. num_entries = max_entries;
  1250. scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
  1251. for (i = 0; i < num_entries; i++)
  1252. iowrite32(scratch_pte, &gtt_base[i]);
  1253. readl(gtt_base);
  1254. }
  1255. static void i915_ggtt_bind_vma(struct i915_vma *vma,
  1256. enum i915_cache_level cache_level,
  1257. u32 unused)
  1258. {
  1259. const unsigned long entry = vma->node.start >> PAGE_SHIFT;
  1260. unsigned int flags = (cache_level == I915_CACHE_NONE) ?
  1261. AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
  1262. BUG_ON(!i915_is_ggtt(vma->vm));
  1263. intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
  1264. vma->obj->has_global_gtt_mapping = 1;
  1265. }
  1266. static void i915_ggtt_clear_range(struct i915_address_space *vm,
  1267. uint64_t start,
  1268. uint64_t length,
  1269. bool unused)
  1270. {
  1271. unsigned first_entry = start >> PAGE_SHIFT;
  1272. unsigned num_entries = length >> PAGE_SHIFT;
  1273. intel_gtt_clear_range(first_entry, num_entries);
  1274. }
  1275. static void i915_ggtt_unbind_vma(struct i915_vma *vma)
  1276. {
  1277. const unsigned int first = vma->node.start >> PAGE_SHIFT;
  1278. const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
  1279. BUG_ON(!i915_is_ggtt(vma->vm));
  1280. vma->obj->has_global_gtt_mapping = 0;
  1281. intel_gtt_clear_range(first, size);
  1282. }
  1283. static void ggtt_bind_vma(struct i915_vma *vma,
  1284. enum i915_cache_level cache_level,
  1285. u32 flags)
  1286. {
  1287. struct drm_device *dev = vma->vm->dev;
  1288. struct drm_i915_private *dev_priv = dev->dev_private;
  1289. struct drm_i915_gem_object *obj = vma->obj;
  1290. /* Currently applicable only to VLV */
  1291. if (obj->gt_ro)
  1292. flags |= PTE_READ_ONLY;
  1293. /* If there is no aliasing PPGTT, or the caller needs a global mapping,
  1294. * or we have a global mapping already but the cacheability flags have
  1295. * changed, set the global PTEs.
  1296. *
  1297. * If there is an aliasing PPGTT it is anecdotally faster, so use that
  1298. * instead if none of the above hold true.
  1299. *
  1300. * NB: A global mapping should only be needed for special regions like
  1301. * "gtt mappable", SNB errata, or if specified via special execbuf
  1302. * flags. At all other times, the GPU will use the aliasing PPGTT.
  1303. */
  1304. if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
  1305. if (!obj->has_global_gtt_mapping ||
  1306. (cache_level != obj->cache_level)) {
  1307. vma->vm->insert_entries(vma->vm, obj->pages,
  1308. vma->node.start,
  1309. cache_level, flags);
  1310. obj->has_global_gtt_mapping = 1;
  1311. }
  1312. }
  1313. if (dev_priv->mm.aliasing_ppgtt &&
  1314. (!obj->has_aliasing_ppgtt_mapping ||
  1315. (cache_level != obj->cache_level))) {
  1316. struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
  1317. appgtt->base.insert_entries(&appgtt->base,
  1318. vma->obj->pages,
  1319. vma->node.start,
  1320. cache_level, flags);
  1321. vma->obj->has_aliasing_ppgtt_mapping = 1;
  1322. }
  1323. }
  1324. static void ggtt_unbind_vma(struct i915_vma *vma)
  1325. {
  1326. struct drm_device *dev = vma->vm->dev;
  1327. struct drm_i915_private *dev_priv = dev->dev_private;
  1328. struct drm_i915_gem_object *obj = vma->obj;
  1329. if (obj->has_global_gtt_mapping) {
  1330. vma->vm->clear_range(vma->vm,
  1331. vma->node.start,
  1332. obj->base.size,
  1333. true);
  1334. obj->has_global_gtt_mapping = 0;
  1335. }
  1336. if (obj->has_aliasing_ppgtt_mapping) {
  1337. struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
  1338. appgtt->base.clear_range(&appgtt->base,
  1339. vma->node.start,
  1340. obj->base.size,
  1341. true);
  1342. obj->has_aliasing_ppgtt_mapping = 0;
  1343. }
  1344. }
  1345. void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
  1346. {
  1347. struct drm_device *dev = obj->base.dev;
  1348. struct drm_i915_private *dev_priv = dev->dev_private;
  1349. bool interruptible;
  1350. interruptible = do_idling(dev_priv);
  1351. if (!obj->has_dma_mapping)
  1352. dma_unmap_sg(&dev->pdev->dev,
  1353. obj->pages->sgl, obj->pages->nents,
  1354. PCI_DMA_BIDIRECTIONAL);
  1355. undo_idling(dev_priv, interruptible);
  1356. }
  1357. static void i915_gtt_color_adjust(struct drm_mm_node *node,
  1358. unsigned long color,
  1359. unsigned long *start,
  1360. unsigned long *end)
  1361. {
  1362. if (node->color != color)
  1363. *start += 4096;
  1364. if (!list_empty(&node->node_list)) {
  1365. node = list_entry(node->node_list.next,
  1366. struct drm_mm_node,
  1367. node_list);
  1368. if (node->allocated && node->color != color)
  1369. *end -= 4096;
  1370. }
  1371. }
  1372. int i915_gem_setup_global_gtt(struct drm_device *dev,
  1373. unsigned long start,
  1374. unsigned long mappable_end,
  1375. unsigned long end)
  1376. {
  1377. /* Let GEM Manage all of the aperture.
  1378. *
  1379. * However, leave one page at the end still bound to the scratch page.
  1380. * There are a number of places where the hardware apparently prefetches
  1381. * past the end of the object, and we've seen multiple hangs with the
  1382. * GPU head pointer stuck in a batchbuffer bound at the last page of the
  1383. * aperture. One page should be enough to keep any prefetching inside
  1384. * of the aperture.
  1385. */
  1386. struct drm_i915_private *dev_priv = dev->dev_private;
  1387. struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
  1388. struct drm_mm_node *entry;
  1389. struct drm_i915_gem_object *obj;
  1390. unsigned long hole_start, hole_end;
  1391. int ret;
  1392. BUG_ON(mappable_end > end);
  1393. /* Subtract the guard page ... */
  1394. drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
  1395. if (!HAS_LLC(dev))
  1396. dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
  1397. /* Mark any preallocated objects as occupied */
  1398. list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
  1399. struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
  1400. DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
  1401. i915_gem_obj_ggtt_offset(obj), obj->base.size);
  1402. WARN_ON(i915_gem_obj_ggtt_bound(obj));
  1403. ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
  1404. if (ret) {
  1405. DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
  1406. return ret;
  1407. }
  1408. obj->has_global_gtt_mapping = 1;
  1409. }
  1410. dev_priv->gtt.base.start = start;
  1411. dev_priv->gtt.base.total = end - start;
  1412. /* Clear any non-preallocated blocks */
  1413. drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
  1414. DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
  1415. hole_start, hole_end);
  1416. ggtt_vm->clear_range(ggtt_vm, hole_start,
  1417. hole_end - hole_start, true);
  1418. }
  1419. /* And finally clear the reserved guard page */
  1420. ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
  1421. if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
  1422. struct i915_hw_ppgtt *ppgtt;
  1423. ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
  1424. if (!ppgtt)
  1425. return -ENOMEM;
  1426. ret = __hw_ppgtt_init(dev, ppgtt);
  1427. if (ret != 0)
  1428. return ret;
  1429. dev_priv->mm.aliasing_ppgtt = ppgtt;
  1430. }
  1431. return 0;
  1432. }
  1433. void i915_gem_init_global_gtt(struct drm_device *dev)
  1434. {
  1435. struct drm_i915_private *dev_priv = dev->dev_private;
  1436. unsigned long gtt_size, mappable_size;
  1437. gtt_size = dev_priv->gtt.base.total;
  1438. mappable_size = dev_priv->gtt.mappable_end;
  1439. i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
  1440. }
  1441. void i915_global_gtt_cleanup(struct drm_device *dev)
  1442. {
  1443. struct drm_i915_private *dev_priv = dev->dev_private;
  1444. struct i915_address_space *vm = &dev_priv->gtt.base;
  1445. if (dev_priv->mm.aliasing_ppgtt) {
  1446. struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
  1447. ppgtt->base.cleanup(&ppgtt->base);
  1448. }
  1449. if (drm_mm_initialized(&vm->mm)) {
  1450. drm_mm_takedown(&vm->mm);
  1451. list_del(&vm->global_link);
  1452. }
  1453. vm->cleanup(vm);
  1454. }
  1455. static int setup_scratch_page(struct drm_device *dev)
  1456. {
  1457. struct drm_i915_private *dev_priv = dev->dev_private;
  1458. struct page *page;
  1459. dma_addr_t dma_addr;
  1460. page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
  1461. if (page == NULL)
  1462. return -ENOMEM;
  1463. get_page(page);
  1464. set_pages_uc(page, 1);
  1465. #ifdef CONFIG_INTEL_IOMMU
  1466. dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
  1467. PCI_DMA_BIDIRECTIONAL);
  1468. if (pci_dma_mapping_error(dev->pdev, dma_addr))
  1469. return -EINVAL;
  1470. #else
  1471. dma_addr = page_to_phys(page);
  1472. #endif
  1473. dev_priv->gtt.base.scratch.page = page;
  1474. dev_priv->gtt.base.scratch.addr = dma_addr;
  1475. return 0;
  1476. }
  1477. static void teardown_scratch_page(struct drm_device *dev)
  1478. {
  1479. struct drm_i915_private *dev_priv = dev->dev_private;
  1480. struct page *page = dev_priv->gtt.base.scratch.page;
  1481. set_pages_wb(page, 1);
  1482. pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
  1483. PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  1484. put_page(page);
  1485. __free_page(page);
  1486. }
  1487. static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
  1488. {
  1489. snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
  1490. snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
  1491. return snb_gmch_ctl << 20;
  1492. }
  1493. static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
  1494. {
  1495. bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
  1496. bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
  1497. if (bdw_gmch_ctl)
  1498. bdw_gmch_ctl = 1 << bdw_gmch_ctl;
  1499. #ifdef CONFIG_X86_32
  1500. /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
  1501. if (bdw_gmch_ctl > 4)
  1502. bdw_gmch_ctl = 4;
  1503. #endif
  1504. return bdw_gmch_ctl << 20;
  1505. }
  1506. static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
  1507. {
  1508. gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
  1509. gmch_ctrl &= SNB_GMCH_GGMS_MASK;
  1510. if (gmch_ctrl)
  1511. return 1 << (20 + gmch_ctrl);
  1512. return 0;
  1513. }
  1514. static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
  1515. {
  1516. snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
  1517. snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
  1518. return snb_gmch_ctl << 25; /* 32 MB units */
  1519. }
  1520. static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
  1521. {
  1522. bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
  1523. bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
  1524. return bdw_gmch_ctl << 25; /* 32 MB units */
  1525. }
  1526. static size_t chv_get_stolen_size(u16 gmch_ctrl)
  1527. {
  1528. gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
  1529. gmch_ctrl &= SNB_GMCH_GMS_MASK;
  1530. /*
  1531. * 0x0 to 0x10: 32MB increments starting at 0MB
  1532. * 0x11 to 0x16: 4MB increments starting at 8MB
  1533. * 0x17 to 0x1d: 4MB increments start at 36MB
  1534. */
  1535. if (gmch_ctrl < 0x11)
  1536. return gmch_ctrl << 25;
  1537. else if (gmch_ctrl < 0x17)
  1538. return (gmch_ctrl - 0x11 + 2) << 22;
  1539. else
  1540. return (gmch_ctrl - 0x17 + 9) << 22;
  1541. }
  1542. static int ggtt_probe_common(struct drm_device *dev,
  1543. size_t gtt_size)
  1544. {
  1545. struct drm_i915_private *dev_priv = dev->dev_private;
  1546. phys_addr_t gtt_phys_addr;
  1547. int ret;
  1548. /* For Modern GENs the PTEs and register space are split in the BAR */
  1549. gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
  1550. (pci_resource_len(dev->pdev, 0) / 2);
  1551. dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
  1552. if (!dev_priv->gtt.gsm) {
  1553. DRM_ERROR("Failed to map the gtt page table\n");
  1554. return -ENOMEM;
  1555. }
  1556. ret = setup_scratch_page(dev);
  1557. if (ret) {
  1558. DRM_ERROR("Scratch setup failed\n");
  1559. /* iounmap will also get called at remove, but meh */
  1560. iounmap(dev_priv->gtt.gsm);
  1561. }
  1562. return ret;
  1563. }
  1564. /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
  1565. * bits. When using advanced contexts each context stores its own PAT, but
  1566. * writing this data shouldn't be harmful even in those cases. */
  1567. static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
  1568. {
  1569. uint64_t pat;
  1570. pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
  1571. GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
  1572. GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
  1573. GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
  1574. GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
  1575. GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
  1576. GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
  1577. GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
  1578. if (!USES_PPGTT(dev_priv->dev))
  1579. /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
  1580. * so RTL will always use the value corresponding to
  1581. * pat_sel = 000".
  1582. * So let's disable cache for GGTT to avoid screen corruptions.
  1583. * MOCS still can be used though.
  1584. * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
  1585. * before this patch, i.e. the same uncached + snooping access
  1586. * like on gen6/7 seems to be in effect.
  1587. * - So this just fixes blitter/render access. Again it looks
  1588. * like it's not just uncached access, but uncached + snooping.
  1589. * So we can still hold onto all our assumptions wrt cpu
  1590. * clflushing on LLC machines.
  1591. */
  1592. pat = GEN8_PPAT(0, GEN8_PPAT_UC);
  1593. /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
  1594. * write would work. */
  1595. I915_WRITE(GEN8_PRIVATE_PAT, pat);
  1596. I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
  1597. }
  1598. static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
  1599. {
  1600. uint64_t pat;
  1601. /*
  1602. * Map WB on BDW to snooped on CHV.
  1603. *
  1604. * Only the snoop bit has meaning for CHV, the rest is
  1605. * ignored.
  1606. *
  1607. * Note that the harware enforces snooping for all page
  1608. * table accesses. The snoop bit is actually ignored for
  1609. * PDEs.
  1610. */
  1611. pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
  1612. GEN8_PPAT(1, 0) |
  1613. GEN8_PPAT(2, 0) |
  1614. GEN8_PPAT(3, 0) |
  1615. GEN8_PPAT(4, CHV_PPAT_SNOOP) |
  1616. GEN8_PPAT(5, CHV_PPAT_SNOOP) |
  1617. GEN8_PPAT(6, CHV_PPAT_SNOOP) |
  1618. GEN8_PPAT(7, CHV_PPAT_SNOOP);
  1619. I915_WRITE(GEN8_PRIVATE_PAT, pat);
  1620. I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
  1621. }
  1622. static int gen8_gmch_probe(struct drm_device *dev,
  1623. size_t *gtt_total,
  1624. size_t *stolen,
  1625. phys_addr_t *mappable_base,
  1626. unsigned long *mappable_end)
  1627. {
  1628. struct drm_i915_private *dev_priv = dev->dev_private;
  1629. unsigned int gtt_size;
  1630. u16 snb_gmch_ctl;
  1631. int ret;
  1632. /* TODO: We're not aware of mappable constraints on gen8 yet */
  1633. *mappable_base = pci_resource_start(dev->pdev, 2);
  1634. *mappable_end = pci_resource_len(dev->pdev, 2);
  1635. if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
  1636. pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
  1637. pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  1638. if (IS_CHERRYVIEW(dev)) {
  1639. *stolen = chv_get_stolen_size(snb_gmch_ctl);
  1640. gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
  1641. } else {
  1642. *stolen = gen8_get_stolen_size(snb_gmch_ctl);
  1643. gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
  1644. }
  1645. *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
  1646. if (IS_CHERRYVIEW(dev))
  1647. chv_setup_private_ppat(dev_priv);
  1648. else
  1649. bdw_setup_private_ppat(dev_priv);
  1650. ret = ggtt_probe_common(dev, gtt_size);
  1651. dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
  1652. dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
  1653. return ret;
  1654. }
  1655. static int gen6_gmch_probe(struct drm_device *dev,
  1656. size_t *gtt_total,
  1657. size_t *stolen,
  1658. phys_addr_t *mappable_base,
  1659. unsigned long *mappable_end)
  1660. {
  1661. struct drm_i915_private *dev_priv = dev->dev_private;
  1662. unsigned int gtt_size;
  1663. u16 snb_gmch_ctl;
  1664. int ret;
  1665. *mappable_base = pci_resource_start(dev->pdev, 2);
  1666. *mappable_end = pci_resource_len(dev->pdev, 2);
  1667. /* 64/512MB is the current min/max we actually know of, but this is just
  1668. * a coarse sanity check.
  1669. */
  1670. if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
  1671. DRM_ERROR("Unknown GMADR size (%lx)\n",
  1672. dev_priv->gtt.mappable_end);
  1673. return -ENXIO;
  1674. }
  1675. if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
  1676. pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
  1677. pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  1678. *stolen = gen6_get_stolen_size(snb_gmch_ctl);
  1679. gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
  1680. *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
  1681. ret = ggtt_probe_common(dev, gtt_size);
  1682. dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
  1683. dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
  1684. return ret;
  1685. }
  1686. static void gen6_gmch_remove(struct i915_address_space *vm)
  1687. {
  1688. struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
  1689. iounmap(gtt->gsm);
  1690. teardown_scratch_page(vm->dev);
  1691. }
  1692. static int i915_gmch_probe(struct drm_device *dev,
  1693. size_t *gtt_total,
  1694. size_t *stolen,
  1695. phys_addr_t *mappable_base,
  1696. unsigned long *mappable_end)
  1697. {
  1698. struct drm_i915_private *dev_priv = dev->dev_private;
  1699. int ret;
  1700. ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
  1701. if (!ret) {
  1702. DRM_ERROR("failed to set up gmch\n");
  1703. return -EIO;
  1704. }
  1705. intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
  1706. dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
  1707. dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
  1708. if (unlikely(dev_priv->gtt.do_idle_maps))
  1709. DRM_INFO("applying Ironlake quirks for intel_iommu\n");
  1710. return 0;
  1711. }
  1712. static void i915_gmch_remove(struct i915_address_space *vm)
  1713. {
  1714. intel_gmch_remove();
  1715. }
  1716. int i915_gem_gtt_init(struct drm_device *dev)
  1717. {
  1718. struct drm_i915_private *dev_priv = dev->dev_private;
  1719. struct i915_gtt *gtt = &dev_priv->gtt;
  1720. int ret;
  1721. if (INTEL_INFO(dev)->gen <= 5) {
  1722. gtt->gtt_probe = i915_gmch_probe;
  1723. gtt->base.cleanup = i915_gmch_remove;
  1724. } else if (INTEL_INFO(dev)->gen < 8) {
  1725. gtt->gtt_probe = gen6_gmch_probe;
  1726. gtt->base.cleanup = gen6_gmch_remove;
  1727. if (IS_HASWELL(dev) && dev_priv->ellc_size)
  1728. gtt->base.pte_encode = iris_pte_encode;
  1729. else if (IS_HASWELL(dev))
  1730. gtt->base.pte_encode = hsw_pte_encode;
  1731. else if (IS_VALLEYVIEW(dev))
  1732. gtt->base.pte_encode = byt_pte_encode;
  1733. else if (INTEL_INFO(dev)->gen >= 7)
  1734. gtt->base.pte_encode = ivb_pte_encode;
  1735. else
  1736. gtt->base.pte_encode = snb_pte_encode;
  1737. } else {
  1738. dev_priv->gtt.gtt_probe = gen8_gmch_probe;
  1739. dev_priv->gtt.base.cleanup = gen6_gmch_remove;
  1740. }
  1741. ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
  1742. &gtt->mappable_base, &gtt->mappable_end);
  1743. if (ret)
  1744. return ret;
  1745. gtt->base.dev = dev;
  1746. /* GMADR is the PCI mmio aperture into the global GTT. */
  1747. DRM_INFO("Memory usable by graphics device = %zdM\n",
  1748. gtt->base.total >> 20);
  1749. DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
  1750. DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
  1751. #ifdef CONFIG_INTEL_IOMMU
  1752. if (intel_iommu_gfx_mapped)
  1753. DRM_INFO("VT-d active for gfx access\n");
  1754. #endif
  1755. /*
  1756. * i915.enable_ppgtt is read-only, so do an early pass to validate the
  1757. * user's requested state against the hardware/driver capabilities. We
  1758. * do this now so that we can print out any log messages once rather
  1759. * than every time we check intel_enable_ppgtt().
  1760. */
  1761. i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
  1762. DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
  1763. return 0;
  1764. }
  1765. static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
  1766. struct i915_address_space *vm)
  1767. {
  1768. struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
  1769. if (vma == NULL)
  1770. return ERR_PTR(-ENOMEM);
  1771. INIT_LIST_HEAD(&vma->vma_link);
  1772. INIT_LIST_HEAD(&vma->mm_list);
  1773. INIT_LIST_HEAD(&vma->exec_list);
  1774. vma->vm = vm;
  1775. vma->obj = obj;
  1776. switch (INTEL_INFO(vm->dev)->gen) {
  1777. case 8:
  1778. case 7:
  1779. case 6:
  1780. if (i915_is_ggtt(vm)) {
  1781. vma->unbind_vma = ggtt_unbind_vma;
  1782. vma->bind_vma = ggtt_bind_vma;
  1783. } else {
  1784. vma->unbind_vma = ppgtt_unbind_vma;
  1785. vma->bind_vma = ppgtt_bind_vma;
  1786. }
  1787. break;
  1788. case 5:
  1789. case 4:
  1790. case 3:
  1791. case 2:
  1792. BUG_ON(!i915_is_ggtt(vm));
  1793. vma->unbind_vma = i915_ggtt_unbind_vma;
  1794. vma->bind_vma = i915_ggtt_bind_vma;
  1795. break;
  1796. default:
  1797. BUG();
  1798. }
  1799. /* Keep GGTT vmas first to make debug easier */
  1800. if (i915_is_ggtt(vm))
  1801. list_add(&vma->vma_link, &obj->vma_list);
  1802. else {
  1803. list_add_tail(&vma->vma_link, &obj->vma_list);
  1804. i915_ppgtt_get(i915_vm_to_ppgtt(vm));
  1805. }
  1806. return vma;
  1807. }
  1808. struct i915_vma *
  1809. i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
  1810. struct i915_address_space *vm)
  1811. {
  1812. struct i915_vma *vma;
  1813. vma = i915_gem_obj_to_vma(obj, vm);
  1814. if (!vma)
  1815. vma = __i915_gem_vma_create(obj, vm);
  1816. return vma;
  1817. }