@@ -170,60 +170,6 @@ native_write_msr_safe(u32 msr, u32 low, u32 high)
extern int rdmsr_safe_regs(u32 regs[8]);
extern int wrmsr_safe_regs(u32 regs[8]);
-/**
- * rdtsc() - returns the current TSC without ordering constraints
- *
- * rdtsc() returns the result of RDTSC as a 64-bit integer. The
- * only ordering constraint it supplies is the ordering implied by
- * "asm volatile": it will put the RDTSC in the place you expect. The
- * CPU can and will speculatively execute that RDTSC, though, so the
- * results can be non-monotonic if compared on different CPUs.
- */
-static __always_inline u64 rdtsc(void)
-{
- DECLARE_ARGS(val, low, high);
-
- asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-
- return EAX_EDX_VAL(val, low, high);
-}
-
-/**
- * rdtsc_ordered() - read the current TSC in program order
- *
- * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
- * It is ordered like a load to a global in-memory counter. It should
- * be impossible to observe non-monotonic rdtsc_unordered() behavior
- * across multiple CPUs as long as the TSC is synced.
- */
-static __always_inline u64 rdtsc_ordered(void)
-{
- DECLARE_ARGS(val, low, high);
-
- /*
- * The RDTSC instruction is not ordered relative to memory
- * access. The Intel SDM and the AMD APM are both vague on this
- * point, but empirically an RDTSC instruction can be
- * speculatively executed before prior loads. An RDTSC
- * immediately after an appropriate barrier appears to be
- * ordered as a normal load, that is, it provides the same
- * ordering guarantees as reading from a global memory location
- * that some other imaginary CPU is updating continuously with a
- * time stamp.
- *
- * Thus, use the preferred barrier on the respective CPU, aiming for
- * RDTSCP as the default.
- */
- asm volatile(ALTERNATIVE_2("rdtsc",
- "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
- "rdtscp", X86_FEATURE_RDTSCP)
- : EAX_EDX_RET(val, low, high)
- /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
- :: "ecx");
-
- return EAX_EDX_VAL(val, low, high);
-}
-
static inline u64 native_read_pmc(int counter)
{
DECLARE_ARGS(val, low, high);
@@ -7,7 +7,81 @@
#include <asm/cpufeature.h>
#include <asm/processor.h>
-#include <asm/msr.h>
+
+/*
+ * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
+ * constraint has different meanings. For i386, "A" means exactly
+ * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
+ * it means rax *or* rdx.
+ */
+#ifdef CONFIG_X86_64
+/* Using 64-bit values saves one instruction clearing the high half of low */
+#define DECLARE_ARGS(val, low, high) unsigned long low, high
+#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
+#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
+#else
+#define DECLARE_ARGS(val, low, high) u64 val
+#define EAX_EDX_VAL(val, low, high) (val)
+#define EAX_EDX_RET(val, low, high) "=A" (val)
+#endif
+
+/**
+ * rdtsc() - returns the current TSC without ordering constraints
+ *
+ * rdtsc() returns the result of RDTSC as a 64-bit integer. The
+ * only ordering constraint it supplies is the ordering implied by
+ * "asm volatile": it will put the RDTSC in the place you expect. The
+ * CPU can and will speculatively execute that RDTSC, though, so the
+ * results can be non-monotonic if compared on different CPUs.
+ */
+static __always_inline u64 rdtsc(void)
+{
+ DECLARE_ARGS(val, low, high);
+
+ asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
+/**
+ * rdtsc_ordered() - read the current TSC in program order
+ *
+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
+ * It is ordered like a load to a global in-memory counter. It should
+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
+ * across multiple CPUs as long as the TSC is synced.
+ */
+static __always_inline u64 rdtsc_ordered(void)
+{
+ DECLARE_ARGS(val, low, high);
+
+ /*
+ * The RDTSC instruction is not ordered relative to memory
+ * access. The Intel SDM and the AMD APM are both vague on this
+ * point, but empirically an RDTSC instruction can be
+ * speculatively executed before prior loads. An RDTSC
+ * immediately after an appropriate barrier appears to be
+ * ordered as a normal load, that is, it provides the same
+ * ordering guarantees as reading from a global memory location
+ * that some other imaginary CPU is updating continuously with a
+ * time stamp.
+ *
+ * Thus, use the preferred barrier on the respective CPU, aiming for
+ * RDTSCP as the default.
+ */
+ asm volatile(ALTERNATIVE_2("rdtsc",
+ "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
+ "rdtscp", X86_FEATURE_RDTSCP)
+ : EAX_EDX_RET(val, low, high)
+ /* RDTSCP clobbers ECX with MSR_TSC_AUX. */
+ :: "ecx");
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
+#undef DECLARE_ARGS
+#undef EAX_EDX_VAL
+#undef EAX_EDX_RET
/*
* Standard way to access the cycle counter.