@@ -27,8 +27,14 @@ SOFTWARE.
27
27
#include < type_traits>
28
28
#include < vector>
29
29
#include < chrono>
30
+ #include < atomic>
31
+ #include < thread>
30
32
#include < memory>
31
33
34
+ #ifdef _MSC_VER
35
+ #include < intrin.h>
36
+ #endif
37
+
32
38
#ifdef _WIN32
33
39
#define FAST_THREAD_LOCAL thread_local
34
40
#else
@@ -91,11 +97,6 @@ class fmtlogT
91
97
OFF
92
98
};
93
99
94
- // If you know the exact tsc frequency(in ghz) in the os, tell fmtlog!
95
- // But how can I know the frequency? Check below link(for Linux only):
96
- // https://github.com/MengRao/tscns#i-dont-wanna-wait-a-long-time-for-calibration-can-i-cheat
97
- static void setTscGhz (double tscGhz) FMT_NOEXCEPT;
98
-
99
100
// Preallocate thread queue for current thread
100
101
static void preallocate () FMT_NOEXCEPT;
101
102
@@ -244,31 +245,31 @@ class fmtlogT
244
245
class TSCNS
245
246
{
246
247
public:
247
- double init (double tsc_ghz = 0.0 ) {
248
+ static const int64_t NsPerSec = 1000000000 ;
249
+
250
+ void init (int64_t init_calibrate_ns = 20000000 , int64_t calibrate_interval_ns_ = 3 * NsPerSec) {
251
+ calibate_interval_ns = calibrate_interval_ns_;
252
+ int64_t base_tsc, base_ns;
248
253
syncTime (base_tsc, base_ns);
249
- if (tsc_ghz > 0 ) {
250
- tsc_ghz_inv = 1.0 / tsc_ghz;
251
- adjustOffset ();
252
- return tsc_ghz;
253
- }
254
- else {
255
- #ifdef _WIN32
256
- return calibrate (1000000 *
257
- 100 ); // wait more time as Windows' system time is in 100ns precision
258
- #else
259
- return calibrate (1000000 * 10 ); //
260
- #endif
261
- }
254
+ int64_t expire_ns = base_ns + init_calibrate_ns;
255
+ while (rdsysns () < expire_ns) std::this_thread::yield ();
256
+ int64_t delayed_tsc, delayed_ns;
257
+ syncTime (delayed_tsc, delayed_ns);
258
+ double init_ns_per_tsc = (double )(delayed_ns - base_ns) / (delayed_tsc - base_tsc);
259
+ saveParam (base_tsc, base_ns, base_ns, init_ns_per_tsc);
262
260
}
263
261
264
- double calibrate (int64_t min_wait_ns) {
265
- int64_t delayed_tsc, delayed_ns;
266
- do {
267
- syncTime (delayed_tsc, delayed_ns);
268
- } while ((delayed_ns - base_ns) < min_wait_ns);
269
- tsc_ghz_inv = (double )(delayed_ns - base_ns) / (delayed_tsc - base_tsc);
270
- adjustOffset ();
271
- return 1.0 / tsc_ghz_inv;
262
+ void calibrate () {
263
+ if (rdtsc () < next_calibrate_tsc) return ;
264
+ int64_t tsc, ns;
265
+ syncTime (tsc, ns);
266
+ int64_t calulated_ns = tsc2ns (tsc);
267
+ double ns_err = calulated_ns - ns;
268
+ double expected_err_at_next_calibration =
269
+ ns_err + (ns_err - last_ns_err) / (ns - last_ns) * calibate_interval_ns;
270
+ double new_ns_per_tsc =
271
+ ns_per_tsc * (1.0 - expected_err_at_next_calibration / calibate_interval_ns);
272
+ saveParam (tsc, calulated_ns, ns, new_ns_per_tsc);
272
273
}
273
274
274
275
static inline int64_t rdtsc () {
@@ -281,46 +282,81 @@ class fmtlogT
281
282
#endif
282
283
}
283
284
284
- inline int64_t tsc2ns (int64_t tsc) const { return ns_offset + (int64_t )(tsc * tsc_ghz_inv); }
285
+ inline int64_t tsc2ns (int64_t tsc) const {
286
+ while (true ) {
287
+ uint32_t before_seq = param_seq.load (std::memory_order_acquire) & ~1 ;
288
+ int64_t ns = ns_offset + (int64_t )(tsc * ns_per_tsc);
289
+ uint32_t after_seq = param_seq.load (std::memory_order_acquire);
290
+ if (before_seq == after_seq) return ns;
291
+ }
292
+ }
285
293
286
294
inline int64_t rdns () const { return tsc2ns (rdtsc ()); }
287
295
288
- static int64_t rdsysns () {
296
+ static inline int64_t rdsysns () {
289
297
using namespace std ::chrono;
290
298
return duration_cast<nanoseconds>(system_clock::now ().time_since_epoch ()).count ();
291
299
}
292
300
293
- // For checking purposes, see test.cc
294
- int64_t rdoffset () const { return ns_offset; }
301
+ double getTscGhz () const { return 1.0 / ns_per_tsc; }
295
302
296
- private:
297
- // Linux kernel sync time by finding the first try with tsc diff < 50000
298
- // We do better: we find the try with the mininum tsc diff
299
- void syncTime (int64_t & tsc, int64_t & ns) {
300
- const int N = 10 ;
301
- int64_t tscs[N + 1 ];
302
- int64_t nses[N + 1 ];
303
-
304
- tscs[0 ] = rdtsc ();
303
+ // Linux kernel sync time by finding the first trial with tsc diff < 50000
304
+ // We try several times and return the one with the mininum tsc diff.
305
+ // Note that MSVC has a 100ns resolution clock, so we need to combine those ns with the same
306
+ // value, and drop the first and the last value as they may not scan a full 100ns range
307
+ static void syncTime (int64_t & tsc_out, int64_t & ns_out) {
308
+ #ifdef _MSC_VER
309
+ const int N = 15 ;
310
+ #else
311
+ const int N = 3 ;
312
+ #endif
313
+ int64_t tsc[N + 1 ];
314
+ int64_t ns[N + 1 ];
315
+
316
+ tsc[0 ] = rdtsc ();
305
317
for (int i = 1 ; i <= N; i++) {
306
- nses [i] = rdsysns ();
307
- tscs [i] = rdtsc ();
318
+ ns [i] = rdsysns ();
319
+ tsc [i] = rdtsc ();
308
320
}
309
321
310
- int best = 1 ;
322
+ #ifdef _MSC_VER
323
+ int j = 1 ;
311
324
for (int i = 2 ; i <= N; i++) {
312
- if (tscs[i] - tscs[i - 1 ] < tscs[best] - tscs[best - 1 ]) best = i;
325
+ if (ns[i] == ns[i - 1 ]) continue ;
326
+ tsc[j - 1 ] = tsc[i - 1 ];
327
+ ns[j++] = ns[i];
313
328
}
314
- tsc = (tscs[best] + tscs[best - 1 ]) >> 1 ;
315
- ns = nses[best];
329
+ j--;
330
+ #else
331
+ int j = N + 1 ;
332
+ #endif
333
+
334
+ int best = 1 ;
335
+ for (int i = 2 ; i < j; i++) {
336
+ if (tsc[i] - tsc[i - 1 ] < tsc[best] - tsc[best - 1 ]) best = i;
337
+ }
338
+ tsc_out = (tsc[best] + tsc[best - 1 ]) >> 1 ;
339
+ ns_out = ns[best];
316
340
}
317
341
318
- void adjustOffset () { ns_offset = base_ns - (int64_t )(base_tsc * tsc_ghz_inv); }
342
+ void saveParam (int64_t base_tsc, int64_t base_ns, int64_t sys_ns, double new_ns_per_tsc) {
343
+ last_ns = sys_ns;
344
+ last_ns_err = base_ns - sys_ns;
345
+ next_calibrate_tsc = base_tsc + (int64_t )(calibate_interval_ns / new_ns_per_tsc);
346
+ uint32_t seq = param_seq.load (std::memory_order_relaxed);
347
+ param_seq.store (++seq, std::memory_order_release);
348
+ ns_per_tsc = new_ns_per_tsc;
349
+ ns_offset = base_ns - (int64_t )(base_tsc * ns_per_tsc);
350
+ param_seq.store (++seq, std::memory_order_release);
351
+ }
319
352
320
- alignas (64 ) double tsc_ghz_inv;
321
- int64_t ns_offset;
322
- int64_t base_tsc;
323
- int64_t base_ns;
353
+ alignas (64 ) std::atomic<uint32_t > param_seq = 0 ;
354
+ double ns_per_tsc = 1.0 ;
355
+ int64_t ns_offset = 0 ;
356
+ int64_t calibate_interval_ns;
357
+ int64_t last_ns;
358
+ double last_ns_err;
359
+ int64_t next_calibrate_tsc;
324
360
};
325
361
326
362
void init () {
0 commit comments