kvm_book3s_64.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License, version 2, as
  4. * published by the Free Software Foundation.
  5. *
  6. * This program is distributed in the hope that it will be useful,
  7. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. * GNU General Public License for more details.
  10. *
  11. * You should have received a copy of the GNU General Public License
  12. * along with this program; if not, write to the Free Software
  13. * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  14. *
  15. * Copyright SUSE Linux Products GmbH 2010
  16. *
  17. * Authors: Alexander Graf <agraf@suse.de>
  18. */
  19. #ifndef __ASM_KVM_BOOK3S_64_H__
  20. #define __ASM_KVM_BOOK3S_64_H__
  21. #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
  22. static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
  23. {
  24. preempt_disable();
  25. return &get_paca()->shadow_vcpu;
  26. }
  27. static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
  28. {
  29. preempt_enable();
  30. }
  31. #endif
  32. #define SPAPR_TCE_SHIFT 12
  33. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  34. #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
  35. extern unsigned long kvm_rma_pages;
  36. #endif
  37. #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
  38. /*
  39. * We use a lock bit in HPTE dword 0 to synchronize updates and
  40. * accesses to each HPTE, and another bit to indicate non-present
  41. * HPTEs.
  42. */
  43. #define HPTE_V_HVLOCK 0x40UL
  44. #define HPTE_V_ABSENT 0x20UL
  45. /*
  46. * We use this bit in the guest_rpte field of the revmap entry
  47. * to indicate a modified HPTE.
  48. */
  49. #define HPTE_GR_MODIFIED (1ul << 62)
  50. /* These bits are reserved in the guest view of the HPTE */
  51. #define HPTE_GR_RESERVED HPTE_GR_MODIFIED
  52. static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
  53. {
  54. unsigned long tmp, old;
  55. __be64 be_lockbit, be_bits;
  56. /*
  57. * We load/store in native endian, but the HTAB is in big endian. If
  58. * we byte swap all data we apply on the PTE we're implicitly correct
  59. * again.
  60. */
  61. be_lockbit = cpu_to_be64(HPTE_V_HVLOCK);
  62. be_bits = cpu_to_be64(bits);
  63. asm volatile(" ldarx %0,0,%2\n"
  64. " and. %1,%0,%3\n"
  65. " bne 2f\n"
  66. " or %0,%0,%4\n"
  67. " stdcx. %0,0,%2\n"
  68. " beq+ 2f\n"
  69. " mr %1,%3\n"
  70. "2: isync"
  71. : "=&r" (tmp), "=&r" (old)
  72. : "r" (hpte), "r" (be_bits), "r" (be_lockbit)
  73. : "cc", "memory");
  74. return old == 0;
  75. }
  76. static inline int __hpte_actual_psize(unsigned int lp, int psize)
  77. {
  78. int i, shift;
  79. unsigned int mask;
  80. /* start from 1 ignoring MMU_PAGE_4K */
  81. for (i = 1; i < MMU_PAGE_COUNT; i++) {
  82. /* invalid penc */
  83. if (mmu_psize_defs[psize].penc[i] == -1)
  84. continue;
  85. /*
  86. * encoding bits per actual page size
  87. * PTE LP actual page size
  88. * rrrr rrrz >=8KB
  89. * rrrr rrzz >=16KB
  90. * rrrr rzzz >=32KB
  91. * rrrr zzzz >=64KB
  92. * .......
  93. */
  94. shift = mmu_psize_defs[i].shift - LP_SHIFT;
  95. if (shift > LP_BITS)
  96. shift = LP_BITS;
  97. mask = (1 << shift) - 1;
  98. if ((lp & mask) == mmu_psize_defs[psize].penc[i])
  99. return i;
  100. }
  101. return -1;
  102. }
  103. static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
  104. unsigned long pte_index)
  105. {
  106. int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
  107. unsigned int penc;
  108. unsigned long rb = 0, va_low, sllp;
  109. unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
  110. if (v & HPTE_V_LARGE) {
  111. for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
  112. /* valid entries have a shift value */
  113. if (!mmu_psize_defs[b_psize].shift)
  114. continue;
  115. a_psize = __hpte_actual_psize(lp, b_psize);
  116. if (a_psize != -1)
  117. break;
  118. }
  119. }
  120. /*
  121. * Ignore the top 14 bits of va
  122. * v have top two bits covering segment size, hence move
  123. * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
  124. * AVA field in v also have the lower 23 bits ignored.
  125. * For base page size 4K we need 14 .. 65 bits (so need to
  126. * collect extra 11 bits)
  127. * For others we need 14..14+i
  128. */
  129. /* This covers 14..54 bits of va*/
  130. rb = (v & ~0x7fUL) << 16; /* AVA field */
  131. rb |= v >> (62 - 8); /* B field */
  132. /*
  133. * AVA in v had cleared lower 23 bits. We need to derive
  134. * that from pteg index
  135. */
  136. va_low = pte_index >> 3;
  137. if (v & HPTE_V_SECONDARY)
  138. va_low = ~va_low;
  139. /*
  140. * get the vpn bits from va_low using reverse of hashing.
  141. * In v we have va with 23 bits dropped and then left shifted
  142. * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
  143. * right shift it with (SID_SHIFT - (23 - 7))
  144. */
  145. if (!(v & HPTE_V_1TB_SEG))
  146. va_low ^= v >> (SID_SHIFT - 16);
  147. else
  148. va_low ^= v >> (SID_SHIFT_1T - 16);
  149. va_low &= 0x7ff;
  150. switch (b_psize) {
  151. case MMU_PAGE_4K:
  152. sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
  153. ((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
  154. rb |= sllp << 5; /* AP field */
  155. rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
  156. break;
  157. default:
  158. {
  159. int aval_shift;
  160. /*
  161. * remaining bits of AVA/LP fields
  162. * Also contain the rr bits of LP
  163. */
  164. rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
  165. /*
  166. * Now clear not needed LP bits based on actual psize
  167. */
  168. rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
  169. /*
  170. * AVAL field 58..77 - base_page_shift bits of va
  171. * we have space for 58..64 bits, Missing bits should
  172. * be zero filled. +1 is to take care of L bit shift
  173. */
  174. aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
  175. rb |= ((va_low << aval_shift) & 0xfe);
  176. rb |= 1; /* L field */
  177. penc = mmu_psize_defs[b_psize].penc[a_psize];
  178. rb |= penc << 12; /* LP field */
  179. break;
  180. }
  181. }
  182. rb |= (v >> 54) & 0x300; /* B field */
  183. return rb;
  184. }
  185. static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
  186. bool is_base_size)
  187. {
  188. int size, a_psize;
  189. /* Look at the 8 bit LP value */
  190. unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
  191. /* only handle 4k, 64k and 16M pages for now */
  192. if (!(h & HPTE_V_LARGE))
  193. return 1ul << 12;
  194. else {
  195. for (size = 0; size < MMU_PAGE_COUNT; size++) {
  196. /* valid entries have a shift value */
  197. if (!mmu_psize_defs[size].shift)
  198. continue;
  199. a_psize = __hpte_actual_psize(lp, size);
  200. if (a_psize != -1) {
  201. if (is_base_size)
  202. return 1ul << mmu_psize_defs[size].shift;
  203. return 1ul << mmu_psize_defs[a_psize].shift;
  204. }
  205. }
  206. }
  207. return 0;
  208. }
  209. static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
  210. {
  211. return __hpte_page_size(h, l, 0);
  212. }
  213. static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
  214. {
  215. return __hpte_page_size(h, l, 1);
  216. }
  217. static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
  218. {
  219. return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
  220. }
  221. static inline int hpte_is_writable(unsigned long ptel)
  222. {
  223. unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
  224. return pp != PP_RXRX && pp != PP_RXXX;
  225. }
  226. static inline unsigned long hpte_make_readonly(unsigned long ptel)
  227. {
  228. if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
  229. ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
  230. else
  231. ptel |= PP_RXRX;
  232. return ptel;
  233. }
  234. static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
  235. {
  236. unsigned int wimg = ptel & HPTE_R_WIMG;
  237. /* Handle SAO */
  238. if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
  239. cpu_has_feature(CPU_FTR_ARCH_206))
  240. wimg = HPTE_R_M;
  241. if (!io_type)
  242. return wimg == HPTE_R_M;
  243. return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
  244. }
  245. /*
  246. * If it's present and writable, atomically set dirty and referenced bits and
  247. * return the PTE, otherwise return 0. If we find a transparent hugepage
  248. * and if it is marked splitting we return 0;
  249. */
  250. static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
  251. unsigned int hugepage)
  252. {
  253. pte_t old_pte, new_pte = __pte(0);
  254. while (1) {
  255. old_pte = pte_val(*ptep);
  256. /*
  257. * wait until _PAGE_BUSY is clear then set it atomically
  258. */
  259. if (unlikely(old_pte & _PAGE_BUSY)) {
  260. cpu_relax();
  261. continue;
  262. }
  263. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  264. /* If hugepage and is trans splitting return None */
  265. if (unlikely(hugepage &&
  266. pmd_trans_splitting(pte_pmd(old_pte))))
  267. return __pte(0);
  268. #endif
  269. /* If pte is not present return None */
  270. if (unlikely(!(old_pte & _PAGE_PRESENT)))
  271. return __pte(0);
  272. new_pte = pte_mkyoung(old_pte);
  273. if (writing && pte_write(old_pte))
  274. new_pte = pte_mkdirty(new_pte);
  275. if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte,
  276. new_pte))
  277. break;
  278. }
  279. return new_pte;
  280. }
  281. /* Return HPTE cache control bits corresponding to Linux pte bits */
  282. static inline unsigned long hpte_cache_bits(unsigned long pte_val)
  283. {
  284. #if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
  285. return pte_val & (HPTE_R_W | HPTE_R_I);
  286. #else
  287. return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
  288. ((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
  289. #endif
  290. }
  291. static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
  292. {
  293. if (key)
  294. return PP_RWRX <= pp && pp <= PP_RXRX;
  295. return 1;
  296. }
  297. static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
  298. {
  299. if (key)
  300. return pp == PP_RWRW;
  301. return pp <= PP_RWRW;
  302. }
  303. static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr)
  304. {
  305. unsigned long skey;
  306. skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) |
  307. ((hpte_r & HPTE_R_KEY_LO) >> 9);
  308. return (amr >> (62 - 2 * skey)) & 3;
  309. }
  310. static inline void lock_rmap(unsigned long *rmap)
  311. {
  312. do {
  313. while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
  314. cpu_relax();
  315. } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
  316. }
  317. static inline void unlock_rmap(unsigned long *rmap)
  318. {
  319. __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
  320. }
  321. static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
  322. unsigned long pagesize)
  323. {
  324. unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
  325. if (pagesize <= PAGE_SIZE)
  326. return 1;
  327. return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
  328. }
  329. /*
  330. * This works for 4k, 64k and 16M pages on POWER7,
  331. * and 4k and 16M pages on PPC970.
  332. */
  333. static inline unsigned long slb_pgsize_encoding(unsigned long psize)
  334. {
  335. unsigned long senc = 0;
  336. if (psize > 0x1000) {
  337. senc = SLB_VSID_L;
  338. if (psize == 0x10000)
  339. senc |= SLB_VSID_LP_01;
  340. }
  341. return senc;
  342. }
  343. static inline int is_vrma_hpte(unsigned long hpte_v)
  344. {
  345. return (hpte_v & ~0xffffffUL) ==
  346. (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
  347. }
  348. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  349. /*
  350. * Note modification of an HPTE; set the HPTE modified bit
  351. * if anyone is interested.
  352. */
  353. static inline void note_hpte_modification(struct kvm *kvm,
  354. struct revmap_entry *rev)
  355. {
  356. if (atomic_read(&kvm->arch.hpte_mod_interest))
  357. rev->guest_rpte |= HPTE_GR_MODIFIED;
  358. }
  359. /*
  360. * Like kvm_memslots(), but for use in real mode when we can't do
  361. * any RCU stuff (since the secondary threads are offline from the
  362. * kernel's point of view), and we can't print anything.
  363. * Thus we use rcu_dereference_raw() rather than rcu_dereference_check().
  364. */
  365. static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
  366. {
  367. return rcu_dereference_raw_notrace(kvm->memslots);
  368. }
  369. #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  370. #endif /* __ASM_KVM_BOOK3S_64_H__ */