mm_types.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. #ifndef _LINUX_MM_TYPES_H
  2. #define _LINUX_MM_TYPES_H
  3. #include <linux/auxvec.h>
  4. #include <linux/types.h>
  5. #include <linux/threads.h>
  6. #include <linux/list.h>
  7. #include <linux/spinlock.h>
  8. #include <linux/rbtree.h>
  9. #include <linux/rwsem.h>
  10. #include <linux/completion.h>
  11. #include <linux/cpumask.h>
  12. #include <linux/page-debug-flags.h>
  13. #include <linux/uprobes.h>
  14. #include <linux/page-flags-layout.h>
  15. #include <asm/page.h>
  16. #include <asm/mmu.h>
  17. #ifndef AT_VECTOR_SIZE_ARCH
  18. #define AT_VECTOR_SIZE_ARCH 0
  19. #endif
  20. #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
  21. struct address_space;
  22. #define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
  23. #define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \
  24. IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
  25. #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8)
  26. /*
  27. * Each physical page in the system has a struct page associated with
  28. * it to keep track of whatever it is we are using the page for at the
  29. * moment. Note that we have no way to track which tasks are using
  30. * a page, though if it is a pagecache page, rmap structures can tell us
  31. * who is mapping it.
  32. *
  33. * The objects in struct page are organized in double word blocks in
  34. * order to allows us to use atomic double word operations on portions
  35. * of struct page. That is currently only used by slub but the arrangement
  36. * allows the use of atomic double word operations on the flags/mapping
  37. * and lru list pointers also.
  38. */
  39. struct page {
  40. /* First double word block */
  41. unsigned long flags; /* Atomic flags, some possibly
  42. * updated asynchronously */
  43. union {
  44. struct address_space *mapping; /* If low bit clear, points to
  45. * inode address_space, or NULL.
  46. * If page mapped as anonymous
  47. * memory, low bit is set, and
  48. * it points to anon_vma object:
  49. * see PAGE_MAPPING_ANON below.
  50. */
  51. void *s_mem; /* slab first object */
  52. };
  53. /* Second double word */
  54. struct {
  55. union {
  56. pgoff_t index; /* Our offset within mapping. */
  57. void *freelist; /* sl[aou]b first free object */
  58. bool pfmemalloc; /* If set by the page allocator,
  59. * ALLOC_NO_WATERMARKS was set
  60. * and the low watermark was not
  61. * met implying that the system
  62. * is under some pressure. The
  63. * caller should try ensure
  64. * this page is only used to
  65. * free other pages.
  66. */
  67. };
  68. union {
  69. #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
  70. defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
  71. /* Used for cmpxchg_double in slub */
  72. unsigned long counters;
  73. #else
  74. /*
  75. * Keep _count separate from slub cmpxchg_double data.
  76. * As the rest of the double word is protected by
  77. * slab_lock but _count is not.
  78. */
  79. unsigned counters;
  80. #endif
  81. struct {
  82. union {
  83. /*
  84. * Count of ptes mapped in
  85. * mms, to show when page is
  86. * mapped & limit reverse map
  87. * searches.
  88. *
  89. * Used also for tail pages
  90. * refcounting instead of
  91. * _count. Tail pages cannot
  92. * be mapped and keeping the
  93. * tail page _count zero at
  94. * all times guarantees
  95. * get_page_unless_zero() will
  96. * never succeed on tail
  97. * pages.
  98. */
  99. atomic_t _mapcount;
  100. struct { /* SLUB */
  101. unsigned inuse:16;
  102. unsigned objects:15;
  103. unsigned frozen:1;
  104. };
  105. int units; /* SLOB */
  106. };
  107. atomic_t _count; /* Usage count, see below. */
  108. };
  109. unsigned int active; /* SLAB */
  110. };
  111. };
  112. /* Third double word block */
  113. union {
  114. struct list_head lru; /* Pageout list, eg. active_list
  115. * protected by zone->lru_lock !
  116. * Can be used as a generic list
  117. * by the page owner.
  118. */
  119. struct { /* slub per cpu partial pages */
  120. struct page *next; /* Next partial slab */
  121. #ifdef CONFIG_64BIT
  122. int pages; /* Nr of partial slabs left */
  123. int pobjects; /* Approximate # of objects */
  124. #else
  125. short int pages;
  126. short int pobjects;
  127. #endif
  128. };
  129. struct slab *slab_page; /* slab fields */
  130. struct rcu_head rcu_head; /* Used by SLAB
  131. * when destroying via RCU
  132. */
  133. #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
  134. pgtable_t pmd_huge_pte; /* protected by page->ptl */
  135. #endif
  136. };
  137. /* Remainder is not double word aligned */
  138. union {
  139. unsigned long private; /* Mapping-private opaque data:
  140. * usually used for buffer_heads
  141. * if PagePrivate set; used for
  142. * swp_entry_t if PageSwapCache;
  143. * indicates order in the buddy
  144. * system if PG_buddy is set.
  145. */
  146. #if USE_SPLIT_PTE_PTLOCKS
  147. #if ALLOC_SPLIT_PTLOCKS
  148. spinlock_t *ptl;
  149. #else
  150. spinlock_t ptl;
  151. #endif
  152. #endif
  153. struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
  154. struct page *first_page; /* Compound tail pages */
  155. };
  156. /*
  157. * On machines where all RAM is mapped into kernel address space,
  158. * we can simply calculate the virtual address. On machines with
  159. * highmem some memory is mapped into kernel virtual memory
  160. * dynamically, so we need a place to store that address.
  161. * Note that this field could be 16 bits on x86 ... ;)
  162. *
  163. * Architectures with slow multiplication can define
  164. * WANT_PAGE_VIRTUAL in asm/page.h
  165. */
  166. #if defined(WANT_PAGE_VIRTUAL)
  167. void *virtual; /* Kernel virtual address (NULL if
  168. not kmapped, ie. highmem) */
  169. #endif /* WANT_PAGE_VIRTUAL */
  170. #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
  171. unsigned long debug_flags; /* Use atomic bitops on this */
  172. #endif
  173. #ifdef CONFIG_KMEMCHECK
  174. /*
  175. * kmemcheck wants to track the status of each byte in a page; this
  176. * is a pointer to such a status block. NULL if not tracked.
  177. */
  178. void *shadow;
  179. #endif
  180. #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
  181. int _last_cpupid;
  182. #endif
  183. }
  184. /*
  185. * The struct page can be forced to be double word aligned so that atomic ops
  186. * on double words work. The SLUB allocator can make use of such a feature.
  187. */
  188. #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
  189. __aligned(2 * sizeof(unsigned long))
  190. #endif
  191. ;
  192. struct page_frag {
  193. struct page *page;
  194. #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
  195. __u32 offset;
  196. __u32 size;
  197. #else
  198. __u16 offset;
  199. __u16 size;
  200. #endif
  201. };
  202. typedef unsigned long __nocast vm_flags_t;
  203. /*
  204. * A region containing a mapping of a non-memory backed file under NOMMU
  205. * conditions. These are held in a global tree and are pinned by the VMAs that
  206. * map parts of them.
  207. */
  208. struct vm_region {
  209. struct rb_node vm_rb; /* link in global region tree */
  210. vm_flags_t vm_flags; /* VMA vm_flags */
  211. unsigned long vm_start; /* start address of region */
  212. unsigned long vm_end; /* region initialised to here */
  213. unsigned long vm_top; /* region allocated to here */
  214. unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
  215. struct file *vm_file; /* the backing file or NULL */
  216. int vm_usage; /* region usage count (access under nommu_region_sem) */
  217. bool vm_icache_flushed : 1; /* true if the icache has been flushed for
  218. * this region */
  219. };
  220. /*
  221. * This struct defines a memory VMM memory area. There is one of these
  222. * per VM-area/task. A VM area is any part of the process virtual memory
  223. * space that has a special rule for the page-fault handlers (ie a shared
  224. * library, the executable area etc).
  225. */
  226. struct vm_area_struct {
  227. /* The first cache line has the info for VMA tree walking. */
  228. unsigned long vm_start; /* Our start address within vm_mm. */
  229. unsigned long vm_end; /* The first byte after our end address
  230. within vm_mm. */
  231. /* linked list of VM areas per task, sorted by address */
  232. struct vm_area_struct *vm_next, *vm_prev;
  233. struct rb_node vm_rb;
  234. /*
  235. * Largest free memory gap in bytes to the left of this VMA.
  236. * Either between this VMA and vma->vm_prev, or between one of the
  237. * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
  238. * get_unmapped_area find a free area of the right size.
  239. */
  240. unsigned long rb_subtree_gap;
  241. /* Second cache line starts here. */
  242. struct mm_struct *vm_mm; /* The address space we belong to. */
  243. pgprot_t vm_page_prot; /* Access permissions of this VMA. */
  244. unsigned long vm_flags; /* Flags, see mm.h. */
  245. /*
  246. * For areas with an address space and backing store,
  247. * linkage into the address_space->i_mmap interval tree, or
  248. * linkage of vma in the address_space->i_mmap_nonlinear list.
  249. *
  250. * For private anonymous mappings, a pointer to a null terminated string
  251. * in the user process containing the name given to the vma, or NULL
  252. * if unnamed.
  253. */
  254. union {
  255. struct {
  256. struct rb_node rb;
  257. unsigned long rb_subtree_last;
  258. } linear;
  259. struct list_head nonlinear;
  260. const char __user *anon_name;
  261. } shared;
  262. /*
  263. * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
  264. * list, after a COW of one of the file pages. A MAP_SHARED vma
  265. * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
  266. * or brk vma (with NULL file) can only be in an anon_vma list.
  267. */
  268. struct list_head anon_vma_chain; /* Serialized by mmap_sem &
  269. * page_table_lock */
  270. struct anon_vma *anon_vma; /* Serialized by page_table_lock */
  271. /* Function pointers to deal with this struct. */
  272. const struct vm_operations_struct *vm_ops;
  273. /* Information about our backing store: */
  274. unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
  275. units, *not* PAGE_CACHE_SIZE */
  276. struct file * vm_file; /* File we map to (can be NULL). */
  277. void * vm_private_data; /* was vm_pte (shared mem) */
  278. #ifndef CONFIG_MMU
  279. struct vm_region *vm_region; /* NOMMU mapping region */
  280. #endif
  281. #ifdef CONFIG_NUMA
  282. struct mempolicy *vm_policy; /* NUMA policy for the VMA */
  283. #endif
  284. };
  285. struct core_thread {
  286. struct task_struct *task;
  287. struct core_thread *next;
  288. };
  289. struct core_state {
  290. atomic_t nr_threads;
  291. struct core_thread dumper;
  292. struct completion startup;
  293. };
  294. enum {
  295. MM_FILEPAGES,
  296. MM_ANONPAGES,
  297. MM_SWAPENTS,
  298. NR_MM_COUNTERS
  299. };
  300. #if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
  301. #define SPLIT_RSS_COUNTING
  302. /* per-thread cached information, */
  303. struct task_rss_stat {
  304. int events; /* for synchronization threshold */
  305. int count[NR_MM_COUNTERS];
  306. };
  307. #endif /* USE_SPLIT_PTE_PTLOCKS */
  308. struct mm_rss_stat {
  309. atomic_long_t count[NR_MM_COUNTERS];
  310. };
  311. struct kioctx_table;
  312. struct mm_struct {
  313. struct vm_area_struct *mmap; /* list of VMAs */
  314. struct rb_root mm_rb;
  315. u32 vmacache_seqnum; /* per-thread vmacache */
  316. #ifdef CONFIG_MMU
  317. unsigned long (*get_unmapped_area) (struct file *filp,
  318. unsigned long addr, unsigned long len,
  319. unsigned long pgoff, unsigned long flags);
  320. #endif
  321. unsigned long mmap_base; /* base of mmap area */
  322. unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
  323. unsigned long task_size; /* size of task vm space */
  324. unsigned long highest_vm_end; /* highest vma end address */
  325. pgd_t * pgd;
  326. atomic_t mm_users; /* How many users with user space? */
  327. atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
  328. atomic_long_t nr_ptes; /* Page table pages */
  329. int map_count; /* number of VMAs */
  330. spinlock_t page_table_lock; /* Protects page tables and some counters */
  331. struct rw_semaphore mmap_sem;
  332. struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
  333. * together off init_mm.mmlist, and are protected
  334. * by mmlist_lock
  335. */
  336. unsigned long hiwater_rss; /* High-watermark of RSS usage */
  337. unsigned long hiwater_vm; /* High-water virtual memory usage */
  338. unsigned long total_vm; /* Total pages mapped */
  339. unsigned long locked_vm; /* Pages that have PG_mlocked set */
  340. unsigned long pinned_vm; /* Refcount permanently increased */
  341. unsigned long shared_vm; /* Shared pages (files) */
  342. unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
  343. unsigned long stack_vm; /* VM_GROWSUP/DOWN */
  344. unsigned long def_flags;
  345. unsigned long start_code, end_code, start_data, end_data;
  346. unsigned long start_brk, brk, start_stack;
  347. unsigned long arg_start, arg_end, env_start, env_end;
  348. unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
  349. /*
  350. * Special counters, in some configurations protected by the
  351. * page_table_lock, in other configurations by being atomic.
  352. */
  353. struct mm_rss_stat rss_stat;
  354. struct linux_binfmt *binfmt;
  355. cpumask_var_t cpu_vm_mask_var;
  356. /* Architecture-specific MM context */
  357. mm_context_t context;
  358. unsigned long flags; /* Must use atomic bitops to access the bits */
  359. struct core_state *core_state; /* coredumping support */
  360. #ifdef CONFIG_AIO
  361. spinlock_t ioctx_lock;
  362. struct kioctx_table __rcu *ioctx_table;
  363. #endif
  364. #ifdef CONFIG_MEMCG
  365. /*
  366. * "owner" points to a task that is regarded as the canonical
  367. * user/owner of this mm. All of the following must be true in
  368. * order for it to be changed:
  369. *
  370. * current == mm->owner
  371. * current->mm != mm
  372. * new_owner->mm == mm
  373. * new_owner->alloc_lock is held
  374. */
  375. struct task_struct __rcu *owner;
  376. #endif
  377. /* store ref to file /proc/<pid>/exe symlink points to */
  378. struct file *exe_file;
  379. #ifdef CONFIG_MMU_NOTIFIER
  380. struct mmu_notifier_mm *mmu_notifier_mm;
  381. #endif
  382. #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
  383. pgtable_t pmd_huge_pte; /* protected by page_table_lock */
  384. #endif
  385. #ifdef CONFIG_CPUMASK_OFFSTACK
  386. struct cpumask cpumask_allocation;
  387. #endif
  388. #ifdef CONFIG_NUMA_BALANCING
  389. /*
  390. * numa_next_scan is the next time that the PTEs will be marked
  391. * pte_numa. NUMA hinting faults will gather statistics and migrate
  392. * pages to new nodes if necessary.
  393. */
  394. unsigned long numa_next_scan;
  395. /* Restart point for scanning and setting pte_numa */
  396. unsigned long numa_scan_offset;
  397. /* numa_scan_seq prevents two threads setting pte_numa */
  398. int numa_scan_seq;
  399. #endif
  400. #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
  401. /*
  402. * An operation with batched TLB flushing is going on. Anything that
  403. * can move process memory needs to flush the TLB when moving a
  404. * PROT_NONE or PROT_NUMA mapped page.
  405. */
  406. bool tlb_flush_pending;
  407. #endif
  408. struct uprobes_state uprobes_state;
  409. };
  410. static inline void mm_init_cpumask(struct mm_struct *mm)
  411. {
  412. #ifdef CONFIG_CPUMASK_OFFSTACK
  413. mm->cpu_vm_mask_var = &mm->cpumask_allocation;
  414. #endif
  415. cpumask_clear(mm->cpu_vm_mask_var);
  416. }
  417. /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
  418. static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
  419. {
  420. return mm->cpu_vm_mask_var;
  421. }
  422. #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
  423. /*
  424. * Memory barriers to keep this state in sync are graciously provided by
  425. * the page table locks, outside of which no page table modifications happen.
  426. * The barriers below prevent the compiler from re-ordering the instructions
  427. * around the memory barriers that are already present in the code.
  428. */
  429. static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
  430. {
  431. barrier();
  432. return mm->tlb_flush_pending;
  433. }
  434. static inline void set_tlb_flush_pending(struct mm_struct *mm)
  435. {
  436. mm->tlb_flush_pending = true;
  437. /*
  438. * Guarantee that the tlb_flush_pending store does not leak into the
  439. * critical section updating the page tables
  440. */
  441. smp_mb__before_spinlock();
  442. }
  443. /* Clearing is done after a TLB flush, which also provides a barrier. */
  444. static inline void clear_tlb_flush_pending(struct mm_struct *mm)
  445. {
  446. barrier();
  447. mm->tlb_flush_pending = false;
  448. }
  449. #else
  450. static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
  451. {
  452. return false;
  453. }
  454. static inline void set_tlb_flush_pending(struct mm_struct *mm)
  455. {
  456. }
  457. static inline void clear_tlb_flush_pending(struct mm_struct *mm)
  458. {
  459. }
  460. #endif
  461. struct vm_special_mapping
  462. {
  463. const char *name;
  464. struct page **pages;
  465. };
  466. enum tlb_flush_reason {
  467. TLB_FLUSH_ON_TASK_SWITCH,
  468. TLB_REMOTE_SHOOTDOWN,
  469. TLB_LOCAL_SHOOTDOWN,
  470. TLB_LOCAL_MM_SHOOTDOWN,
  471. NR_TLB_FLUSH_REASONS,
  472. };
  473. /* Return the name for an anonymous mapping or NULL for a file-backed mapping */
  474. static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma)
  475. {
  476. if (vma->vm_file)
  477. return NULL;
  478. return vma->shared.anon_name;
  479. }
  480. #endif /* _LINUX_MM_TYPES_H */