Skip to content

Commit 6b855f6

Browse files
committed
initial datetime support
1 parent 9255cd3 commit 6b855f6

File tree

2 files changed

+49
-21
lines changed

2 files changed

+49
-21
lines changed

src/shogun/io/ARFFFile.cpp

+46-13
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <shogun/io/ARFFFile.h>
88
#include <shogun/mathematics/linalg/LinalgNamespace.h>
99

10-
#include <iostream>
10+
#include <ctime>
1111

1212
using namespace shogun;
1313
using namespace shogun::arff_detail;
@@ -55,7 +55,8 @@ void ARFFDeserializer::read()
5555
// store attribute name and type
5656
std::string name;
5757
std::string type;
58-
auto inner_string = m_current_line.substr(strlen(m_attribute_string));
58+
auto inner_string =
59+
m_current_line.substr(strlen(m_attribute_string));
5960
left_trim(inner_string);
6061
auto it = inner_string.begin();
6162
while (it != inner_string.end())
@@ -71,14 +72,16 @@ void ARFFDeserializer::read()
7172
}
7273
if (it == inner_string.end())
7374
SG_SERROR(
74-
"Could not split attibute name and type on line %d: \"%s\".\n",
75+
"Could not split attibute name and type on line %d: "
76+
"\"%s\".\n",
7577
m_line_number, m_current_line.c_str())
7678
// check if it is nominal
7779
if (type[0] == '{')
7880
{
7981
std::vector<std::string> attributes;
8082
// split norminal values: "{A, B, C}" to vector{A, B, C}
81-
split(type.substr(1, type.size() - 2), ", ", true,
83+
split(
84+
type.substr(1, type.size() - 2), ", ", true,
8285
std::back_inserter(attributes));
8386
m_nominal_attributes.emplace_back(
8487
std::make_pair(name, attributes));
@@ -91,23 +94,24 @@ void ARFFDeserializer::read()
9194
{
9295
std::vector<std::string> date_elements;
9396
// split "date [[date-format]]" or "name date [[date-format]]"
94-
split(type, " ", true,
95-
std::back_inserter(date_elements));
96-
if (date_elements[0]=="date" && date_elements.size() < 3)
97+
split(type, " ", true, std::back_inserter(date_elements));
98+
if (date_elements[0] == "date" && date_elements.size() < 3)
9799
{
98100
// @attribute date [[date-format]]
99101
if (type.size() == 1)
100102
m_date_formats.emplace_back(m_default_date_format);
101103
else
102-
m_date_formats.push_back(javatime_to_cpptime(date_elements[1]));
104+
m_date_formats.push_back(
105+
javatime_to_cpptime(date_elements[1]));
103106
}
104-
else if (date_elements[1]=="date" && date_elements.size() < 4)
107+
else if (date_elements[1] == "date" && date_elements.size() < 4)
105108
{
106109
// @attribute name date [[date-format]]
107110
if (date_elements.size() == 2)
108111
m_date_formats.emplace_back(m_default_date_format);
109112
else
110-
m_date_formats.push_back(javatime_to_cpptime(date_elements[2]));
113+
m_date_formats.push_back(
114+
javatime_to_cpptime(date_elements[2]));
111115
}
112116
else
113117
{
@@ -172,12 +176,13 @@ void ARFFDeserializer::read()
172176
{
173177
return;
174178
}
175-
// assumes that until EOF we should expect tabular data to be parsed
179+
// assumes that until EOF we should expect comma delimited values
176180
else
177181
{
178182
std::vector<std::string> elems;
179183
split(m_current_line, ",", true, std::back_inserter(elems));
180184
auto nominal_pos = m_nominal_attributes.begin();
185+
auto date_pos = m_date_formats.begin();
181186
for (int i = 0; i < elems.size(); ++i)
182187
{
183188
Attribute type = m_attributes[i];
@@ -194,7 +199,8 @@ void ARFFDeserializer::read()
194199
catch (const std::invalid_argument&)
195200
{
196201
SG_SERROR(
197-
"Failed to covert \"%s\" to numeric.\n", elems[i].c_str())
202+
"Failed to covert \"%s\" to numeric.\n",
203+
elems[i].c_str())
198204
}
199205
}
200206
break;
@@ -219,7 +225,34 @@ void ARFFDeserializer::read()
219225
}
220226
break;
221227
case (Attribute::Date):
222-
SG_SERROR("Date parsing not implemented.\n")
228+
{
229+
tm t{};
230+
if (date_pos == m_date_formats.end())
231+
SG_SERROR(
232+
"Unexpected date value \"%s\" on line "
233+
"%d.\n",
234+
elems[i].c_str(), m_line_number);
235+
if (strptime(elems[i].c_str(), (*date_pos).c_str(), &t))
236+
{
237+
auto value_timestamp = std::mktime(&t);
238+
if (value_timestamp == -1)
239+
SG_SERROR(
240+
"Error creating timestamp with \"%s\" with "
241+
"date format \"%s\" on line %d.\n",
242+
elems[i].c_str(), (*date_pos).c_str(),
243+
m_line_number)
244+
else
245+
m_data.emplace_back(value_timestamp);
246+
}
247+
else
248+
SG_SERROR(
249+
"Error parsing date \"%s\" with date format \"%s\" "
250+
"on line %d.\n",
251+
elems[i].c_str(), (*date_pos).c_str(),
252+
m_line_number)
253+
++date_pos;
254+
}
255+
break;
223256
case (Attribute::String):
224257
SG_SERROR("String parsing not implemented.\n")
225258
}

src/shogun/io/ARFFFile.h

+3-8
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ namespace shogun
329329
"have the right permissions to open it.\n",
330330
filename.c_str())
331331
}
332-
m_stream = static_cast<std::istream*>(file_stream);
332+
m_stream = std::unique_ptr<std::istream>(static_cast<std::istream*>(file_stream));
333333
}
334334

335335
/**
@@ -341,12 +341,7 @@ namespace shogun
341341
*/
342342
explicit ARFFDeserializer(std::istream stream)
343343
{
344-
m_stream = &stream;
345-
}
346-
347-
~ARFFDeserializer()
348-
{
349-
delete m_stream;
344+
m_stream = std::unique_ptr<std::istream>(&stream);
350345
}
351346

352347
/**
@@ -449,7 +444,7 @@ namespace shogun
449444
/** the string after m_relation_string*/
450445
std::string m_relation;
451446
/** the input stream */
452-
std::istream* m_stream;
447+
std::unique_ptr<std::istream> m_stream;
453448
/** the string where comments are stored */
454449
std::vector<std::string> m_comments;
455450
/** the string representing the current line being parsed */

0 commit comments

Comments
 (0)