barrier.h 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. #ifndef _ASM_X86_BARRIER_H
  2. #define _ASM_X86_BARRIER_H
  3. #include <asm/alternative.h>
  4. #include <asm/nops.h>
  5. /*
  6. * Force strict CPU ordering.
  7. * And yes, this is required on UP too when we're talking
  8. * to devices.
  9. */
  10. #ifdef CONFIG_X86_32
  11. /*
  12. * Some non-Intel clones support out of order store. wmb() ceases to be a
  13. * nop for these.
  14. */
  15. #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
  16. #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
  17. #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
  18. #else
  19. #define mb() asm volatile("mfence":::"memory")
  20. #define rmb() asm volatile("lfence":::"memory")
  21. #define wmb() asm volatile("sfence" ::: "memory")
  22. #endif
  23. /**
  24. * read_barrier_depends - Flush all pending reads that subsequents reads
  25. * depend on.
  26. *
  27. * No data-dependent reads from memory-like regions are ever reordered
  28. * over this barrier. All reads preceding this primitive are guaranteed
  29. * to access memory (but not necessarily other CPUs' caches) before any
  30. * reads following this primitive that depend on the data return by
  31. * any of the preceding reads. This primitive is much lighter weight than
  32. * rmb() on most CPUs, and is never heavier weight than is
  33. * rmb().
  34. *
  35. * These ordering constraints are respected by both the local CPU
  36. * and the compiler.
  37. *
  38. * Ordering is not guaranteed by anything other than these primitives,
  39. * not even by data dependencies. See the documentation for
  40. * memory_barrier() for examples and URLs to more information.
  41. *
  42. * For example, the following code would force ordering (the initial
  43. * value of "a" is zero, "b" is one, and "p" is "&a"):
  44. *
  45. * <programlisting>
  46. * CPU 0 CPU 1
  47. *
  48. * b = 2;
  49. * memory_barrier();
  50. * p = &b; q = p;
  51. * read_barrier_depends();
  52. * d = *q;
  53. * </programlisting>
  54. *
  55. * because the read of "*q" depends on the read of "p" and these
  56. * two reads are separated by a read_barrier_depends(). However,
  57. * the following code, with the same initial values for "a" and "b":
  58. *
  59. * <programlisting>
  60. * CPU 0 CPU 1
  61. *
  62. * a = 2;
  63. * memory_barrier();
  64. * b = 3; y = b;
  65. * read_barrier_depends();
  66. * x = a;
  67. * </programlisting>
  68. *
  69. * does not enforce ordering, since there is no data dependency between
  70. * the read of "a" and the read of "b". Therefore, on some CPUs, such
  71. * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
  72. * in cases like this where there are no data dependencies.
  73. **/
  74. #define read_barrier_depends() do { } while (0)
  75. #ifdef CONFIG_SMP
  76. #define smp_mb() mb()
  77. #ifdef CONFIG_X86_PPRO_FENCE
  78. # define smp_rmb() rmb()
  79. #else
  80. # define smp_rmb() barrier()
  81. #endif
  82. #define smp_wmb() barrier()
  83. #define smp_read_barrier_depends() read_barrier_depends()
  84. #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
  85. #else /* !SMP */
  86. #define smp_mb() barrier()
  87. #define smp_rmb() barrier()
  88. #define smp_wmb() barrier()
  89. #define smp_read_barrier_depends() do { } while (0)
  90. #define set_mb(var, value) do { var = value; barrier(); } while (0)
  91. #endif /* SMP */
  92. #if defined(CONFIG_X86_PPRO_FENCE)
  93. /*
  94. * For this option x86 doesn't have a strong TSO memory
  95. * model and we should fall back to full barriers.
  96. */
  97. #define smp_store_release(p, v) \
  98. do { \
  99. compiletime_assert_atomic_type(*p); \
  100. smp_mb(); \
  101. ACCESS_ONCE(*p) = (v); \
  102. } while (0)
  103. #define smp_load_acquire(p) \
  104. ({ \
  105. typeof(*p) ___p1 = ACCESS_ONCE(*p); \
  106. compiletime_assert_atomic_type(*p); \
  107. smp_mb(); \
  108. ___p1; \
  109. })
  110. #else /* regular x86 TSO memory ordering */
  111. #define smp_store_release(p, v) \
  112. do { \
  113. compiletime_assert_atomic_type(*p); \
  114. barrier(); \
  115. ACCESS_ONCE(*p) = (v); \
  116. } while (0)
  117. #define smp_load_acquire(p) \
  118. ({ \
  119. typeof(*p) ___p1 = ACCESS_ONCE(*p); \
  120. compiletime_assert_atomic_type(*p); \
  121. barrier(); \
  122. ___p1; \
  123. })
  124. #endif
  125. /* Atomic operations are already serializing on x86 */
  126. #define smp_mb__before_atomic() barrier()
  127. #define smp_mb__after_atomic() barrier()
  128. /*
  129. * Stop RDTSC speculation. This is needed when you need to use RDTSC
  130. * (or get_cycles or vread that possibly accesses the TSC) in a defined
  131. * code region.
  132. *
  133. * (Could use an alternative three way for this if there was one.)
  134. */
  135. static __always_inline void rdtsc_barrier(void)
  136. {
  137. alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
  138. alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
  139. }
  140. #endif /* _ASM_X86_BARRIER_H */