Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf fix Time #142

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 89 additions & 62 deletions src/eckit/types/Time.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,64 @@
* does it submit to any jurisdiction.
*/

#include <regex>
#include <cctype>
#include <cmath>
#include <cstddef>
#include <regex>
#include <string>
#include <string_view>

#include "eckit/eckit.h"

#include "eckit/exception/Exceptions.h"
#include "eckit/persist/DumpLoad.h"
#include "eckit/types/Time.h"
#include "eckit/utils/Hash.h"
#include "eckit/utils/Tokenizer.h"

namespace {
static thread_local std::regex digits_time_("^-?[0-9]+$");
static thread_local std::regex float_hours_("^-?[0-9]*\\.[0-9]+$");
static thread_local std::regex hhmmss_("^([0-9]+):([0-5]?[0-9])(:[0-5]?[0-9])?$");
static thread_local std::regex ddhhmmss_("^-?([0-9]+[dD])?([0-9]+[hH])?([0-9]+[mM])?([0-9]+[sS])?$");
}

namespace eckit {

//----------------------------------------------------------------------------------------------------------------------

inline void printTime(std::ostream& s, long n) {
const std::regex hhmmss_("^([0-9]+):([0-5]?[0-9])(:[0-5]?[0-9])?$");
const std::regex ddhhmmss_("^-?([0-9]+[dD])?([0-9]+[hH])?([0-9]+[mM])?([0-9]+[sS])?$");

// DIGITS: "^-?[0-9]+$"
// FLOAT: "^-?[0-9]*\\.[0-9]+$"
enum class TimeFormat { UNKOWN, OTHER, DIGITS, DECIMAL };
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UNKNOWN is treated as erroneous input, it go stronger in the name and call it INVALID


TimeFormat checkTimeFormat(const std::string_view time) {
bool hasDigit = false;
bool hasDecimal = false;

const std::size_t start = (time[0] == '-') ? 1 : 0;

for (auto i = start; i < time.length(); i++) {
if (time[i] == '.') {
if (hasDecimal || i == time.length() - 1) { return TimeFormat::UNKOWN; }
hasDecimal = true;
} else if (isdigit(time[i]) == 0) {
return TimeFormat::OTHER;
} else {
hasDigit = true;
}
}
Comment on lines +40 to +49
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be done with a single pass over the input string and then return the typeid of the found variant (pretty similar to what you are doing already) plus up to 5 tokens, 1 token for the sign and 4 for positive integers (day, hour, minute, second).

Subsequent code can then validate on this result, e.g. was the extended flag passed.

I am thinking along the lines of:

struct tokenized_time {
    FormatType type;
    std::string_view sign;
    std::string_view integers[4];
};


if (!hasDigit) { return TimeFormat::UNKOWN; }

return hasDecimal ? TimeFormat::DECIMAL : TimeFormat::DIGITS;
}

void printTime(std::ostream& s, long n) {
if (n < 10) {
s << '0';
}
s << n;
}

} // namespace

//----------------------------------------------------------------------------------------------------------------------

namespace eckit {

Time::Time(long seconds, bool extended) :
seconds_(static_cast<Second>(seconds)) {
if ((seconds >= 86400 && !extended) || seconds < 0) {
Expand All @@ -50,9 +80,10 @@ Time::Time(const std::string& s, bool extended) {
long mm = 0;
long hh = 0;
long dd = 0;
std::smatch m;

if (std::regex_match (s, m, digits_time_)) {

const auto format = checkTimeFormat(s);

if (format == TimeFormat::DIGITS) {
long t = std::stol(s);
int sign = (s[0] == '-' ? 1 : 0);
if (extended || s.length() <= 2+sign) { // cases: h, hh, (or hhh..h for step parsing)
Expand All @@ -67,60 +98,56 @@ Time::Time(const std::string& s, bool extended) {
ss = t % 100;
}
}
}
else {
if (std::regex_match (s, m, float_hours_)) {
long sec = std::round(std::stod(s)*3600);
hh = sec/3600;
sec -= hh*3600;
mm = sec/60;
sec -= mm*60;
ss = sec;
}
else {
if (std::regex_match (s, m, hhmmss_)) {
for (int i=1; i<m.size(); i++) {
if (m[i].matched) {
switch (i) {
case 1: hh = std::stol(m[i].str()); break;
case 2: mm = std::stol(m[i].str()); break;
case 3: std::string aux = m[i].str();
aux.erase(0,1);
ss = std::stol(aux); break;
}
} else if (format == TimeFormat::DECIMAL) {
long sec = std::round(std::stod(s) * 3600);
hh = sec / 3600;
sec -= hh * 3600;
mm = sec / 60;
sec -= mm * 60;
ss = sec;
} else if (format == TimeFormat::OTHER) {
std::smatch m;
if (std::regex_match(s, m, hhmmss_)) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still using a regex.

for (int i = 1; i < m.size(); i++) {
if (m[i].matched) {
switch (i) {
case 1: hh = std::stol(m[i].str()); break;
case 2: mm = std::stol(m[i].str()); break;
case 3:
std::string aux = m[i].str();
aux.erase(0, 1);
ss = std::stol(aux);
break;
}
}
}
else {
if (std::regex_match (s, m, ddhhmmss_)) {
for (int i=1; i<m.size(); i++) {
if (m[i].matched) {
std::string aux = m[i].str();
aux.pop_back();
long t = std::stol(aux);
switch (i) {
case 1: dd = t; break;
case 2: hh = t; break;
case 3: mm = t; break;
case 4: ss = t;
}
}
}
ss += 60 * (mm + 60 * (hh + 24 * dd));
if (s[0] == '-') {
ss = -ss;
} else if (std::regex_match(s, m, ddhhmmss_)) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still using a regex.

for (int i = 1; i < m.size(); i++) {
if (m[i].matched) {
std::string aux = m[i].str();
aux.pop_back();
long t = std::stol(aux);
switch (i) {
case 1: dd = t; break;
case 2: hh = t; break;
case 3: mm = t; break;
case 4: ss = t;
}
dd = ss / 86400;
hh = (ss / 3600) % 24;
mm = (ss / 60) % 60;
ss = ss % 60;
} else {
std::string msg = "Wrong input for time: ";
msg += s;
throw BadTime(msg);
}
}
ss += 60 * (mm + 60 * (hh + 24 * dd));
if (s[0] == '-') { ss = -ss; }
dd = ss / 86400;
hh = (ss / 3600) % 24;
mm = (ss / 60) % 60;
ss = ss % 60;
} else {
throw BadTime("Wrong input for time: " + s);
}
} else if (format == TimeFormat::UNKOWN) {
throw BadTime("Unkown format for time: " + s);
} else {
throw SeriousBug("Unhandled time format!");
Comment on lines +148 to +150
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is there a need for two different exceptions? I understand the intent of those exception types as follows:

  • BadTime is best described as a usage error, as in it was called as Time("ABCD")
  • SeriousBug represents an implementation error such as Time("20:11:24") not getting parsed properly.

Is that second throw giving any additional benefits?

}

if (mm >= 60 || ss >= 60 || (!extended && (hh >= 24 || dd > 0 || hh < 0 || mm < 0 || ss < 0))) {
Expand Down
Loading