Skip to content

Commit e45d13a

Browse files
author
Rao Meng
committed
upgrade TSCNS to 2.0: now tsc clock will be synchronized with system clock
1 parent 5c07952 commit e45d13a

File tree

2 files changed

+88
-56
lines changed

2 files changed

+88
-56
lines changed

fmtlog-inl.h

+1-5
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ class fmtlogDetailT
420420
}
421421

422422
void poll(bool forceFlush) {
423+
fmtlogWrapper<>::impl.tscns.calibrate();
423424
int64_t tsc = fmtlogWrapper<>::impl.tscns.rdtsc();
424425
if (logInfos.size()) {
425426
std::unique_lock<std::mutex> lock(logInfoMutex);
@@ -630,10 +631,5 @@ void fmtlogT<_>::stopPollingThread() FMT_NOEXCEPT {
630631
fmtlogDetailWrapper<>::impl.stopPollingThread();
631632
}
632633

633-
template<int _>
634-
void fmtlogT<_>::setTscGhz(double tscGhz) FMT_NOEXCEPT {
635-
fmtlogWrapper<>::impl.tscns.init(tscGhz);
636-
}
637-
638634
template class fmtlogT<0>;
639635

fmtlog.h

+87-51
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,14 @@ SOFTWARE.
2727
#include <type_traits>
2828
#include <vector>
2929
#include <chrono>
30+
#include <atomic>
31+
#include <thread>
3032
#include <memory>
3133

34+
#ifdef _MSC_VER
35+
#include <intrin.h>
36+
#endif
37+
3238
#ifdef _WIN32
3339
#define FAST_THREAD_LOCAL thread_local
3440
#else
@@ -91,11 +97,6 @@ class fmtlogT
9197
OFF
9298
};
9399

94-
// If you know the exact tsc frequency(in ghz) in the os, tell fmtlog!
95-
// But how can I know the frequency? Check below link(for Linux only):
96-
// https://github.com/MengRao/tscns#i-dont-wanna-wait-a-long-time-for-calibration-can-i-cheat
97-
static void setTscGhz(double tscGhz) FMT_NOEXCEPT;
98-
99100
// Preallocate thread queue for current thread
100101
static void preallocate() FMT_NOEXCEPT;
101102

@@ -244,31 +245,31 @@ class fmtlogT
244245
class TSCNS
245246
{
246247
public:
247-
double init(double tsc_ghz = 0.0) {
248+
static const int64_t NsPerSec = 1000000000;
249+
250+
void init(int64_t init_calibrate_ns = 20000000, int64_t calibrate_interval_ns_ = 3 * NsPerSec) {
251+
calibate_interval_ns = calibrate_interval_ns_;
252+
int64_t base_tsc, base_ns;
248253
syncTime(base_tsc, base_ns);
249-
if (tsc_ghz > 0) {
250-
tsc_ghz_inv = 1.0 / tsc_ghz;
251-
adjustOffset();
252-
return tsc_ghz;
253-
}
254-
else {
255-
#ifdef _WIN32
256-
return calibrate(1000000 *
257-
100); // wait more time as Windows' system time is in 100ns precision
258-
#else
259-
return calibrate(1000000 * 10); //
260-
#endif
261-
}
254+
int64_t expire_ns = base_ns + init_calibrate_ns;
255+
while (rdsysns() < expire_ns) std::this_thread::yield();
256+
int64_t delayed_tsc, delayed_ns;
257+
syncTime(delayed_tsc, delayed_ns);
258+
double init_ns_per_tsc = (double)(delayed_ns - base_ns) / (delayed_tsc - base_tsc);
259+
saveParam(base_tsc, base_ns, base_ns, init_ns_per_tsc);
262260
}
263261

264-
double calibrate(int64_t min_wait_ns) {
265-
int64_t delayed_tsc, delayed_ns;
266-
do {
267-
syncTime(delayed_tsc, delayed_ns);
268-
} while ((delayed_ns - base_ns) < min_wait_ns);
269-
tsc_ghz_inv = (double)(delayed_ns - base_ns) / (delayed_tsc - base_tsc);
270-
adjustOffset();
271-
return 1.0 / tsc_ghz_inv;
262+
void calibrate() {
263+
if (rdtsc() < next_calibrate_tsc) return;
264+
int64_t tsc, ns;
265+
syncTime(tsc, ns);
266+
int64_t calulated_ns = tsc2ns(tsc);
267+
double ns_err = calulated_ns - ns;
268+
double expected_err_at_next_calibration =
269+
ns_err + (ns_err - last_ns_err) / (ns - last_ns) * calibate_interval_ns;
270+
double new_ns_per_tsc =
271+
ns_per_tsc * (1.0 - expected_err_at_next_calibration / calibate_interval_ns);
272+
saveParam(tsc, calulated_ns, ns, new_ns_per_tsc);
272273
}
273274

274275
static inline int64_t rdtsc() {
@@ -281,46 +282,81 @@ class fmtlogT
281282
#endif
282283
}
283284

284-
inline int64_t tsc2ns(int64_t tsc) const { return ns_offset + (int64_t)(tsc * tsc_ghz_inv); }
285+
inline int64_t tsc2ns(int64_t tsc) const {
286+
while (true) {
287+
uint32_t before_seq = param_seq.load(std::memory_order_acquire) & ~1;
288+
int64_t ns = ns_offset + (int64_t)(tsc * ns_per_tsc);
289+
uint32_t after_seq = param_seq.load(std::memory_order_acquire);
290+
if (before_seq == after_seq) return ns;
291+
}
292+
}
285293

286294
inline int64_t rdns() const { return tsc2ns(rdtsc()); }
287295

288-
static int64_t rdsysns() {
296+
static inline int64_t rdsysns() {
289297
using namespace std::chrono;
290298
return duration_cast<nanoseconds>(system_clock::now().time_since_epoch()).count();
291299
}
292300

293-
// For checking purposes, see test.cc
294-
int64_t rdoffset() const { return ns_offset; }
301+
double getTscGhz() const { return 1.0 / ns_per_tsc; }
295302

296-
private:
297-
// Linux kernel sync time by finding the first try with tsc diff < 50000
298-
// We do better: we find the try with the mininum tsc diff
299-
void syncTime(int64_t& tsc, int64_t& ns) {
300-
const int N = 10;
301-
int64_t tscs[N + 1];
302-
int64_t nses[N + 1];
303-
304-
tscs[0] = rdtsc();
303+
// Linux kernel sync time by finding the first trial with tsc diff < 50000
304+
// We try several times and return the one with the mininum tsc diff.
305+
// Note that MSVC has a 100ns resolution clock, so we need to combine those ns with the same
306+
// value, and drop the first and the last value as they may not scan a full 100ns range
307+
static void syncTime(int64_t& tsc_out, int64_t& ns_out) {
308+
#ifdef _MSC_VER
309+
const int N = 15;
310+
#else
311+
const int N = 3;
312+
#endif
313+
int64_t tsc[N + 1];
314+
int64_t ns[N + 1];
315+
316+
tsc[0] = rdtsc();
305317
for (int i = 1; i <= N; i++) {
306-
nses[i] = rdsysns();
307-
tscs[i] = rdtsc();
318+
ns[i] = rdsysns();
319+
tsc[i] = rdtsc();
308320
}
309321

310-
int best = 1;
322+
#ifdef _MSC_VER
323+
int j = 1;
311324
for (int i = 2; i <= N; i++) {
312-
if (tscs[i] - tscs[i - 1] < tscs[best] - tscs[best - 1]) best = i;
325+
if (ns[i] == ns[i - 1]) continue;
326+
tsc[j - 1] = tsc[i - 1];
327+
ns[j++] = ns[i];
313328
}
314-
tsc = (tscs[best] + tscs[best - 1]) >> 1;
315-
ns = nses[best];
329+
j--;
330+
#else
331+
int j = N + 1;
332+
#endif
333+
334+
int best = 1;
335+
for (int i = 2; i < j; i++) {
336+
if (tsc[i] - tsc[i - 1] < tsc[best] - tsc[best - 1]) best = i;
337+
}
338+
tsc_out = (tsc[best] + tsc[best - 1]) >> 1;
339+
ns_out = ns[best];
316340
}
317341

318-
void adjustOffset() { ns_offset = base_ns - (int64_t)(base_tsc * tsc_ghz_inv); }
342+
void saveParam(int64_t base_tsc, int64_t base_ns, int64_t sys_ns, double new_ns_per_tsc) {
343+
last_ns = sys_ns;
344+
last_ns_err = base_ns - sys_ns;
345+
next_calibrate_tsc = base_tsc + (int64_t)(calibate_interval_ns / new_ns_per_tsc);
346+
uint32_t seq = param_seq.load(std::memory_order_relaxed);
347+
param_seq.store(++seq, std::memory_order_release);
348+
ns_per_tsc = new_ns_per_tsc;
349+
ns_offset = base_ns - (int64_t)(base_tsc * ns_per_tsc);
350+
param_seq.store(++seq, std::memory_order_release);
351+
}
319352

320-
alignas(64) double tsc_ghz_inv;
321-
int64_t ns_offset;
322-
int64_t base_tsc;
323-
int64_t base_ns;
353+
alignas(64) std::atomic<uint32_t> param_seq = 0;
354+
double ns_per_tsc = 1.0;
355+
int64_t ns_offset = 0;
356+
int64_t calibate_interval_ns;
357+
int64_t last_ns;
358+
double last_ns_err;
359+
int64_t next_calibrate_tsc;
324360
};
325361

326362
void init() {

0 commit comments

Comments
 (0)