7
7
#include < shogun/io/ARFFFile.h>
8
8
#include < shogun/mathematics/linalg/LinalgNamespace.h>
9
9
10
- #include < iostream >
10
+ #include < ctime >
11
11
12
12
using namespace shogun ;
13
13
using namespace shogun ::arff_detail;
@@ -55,7 +55,8 @@ void ARFFDeserializer::read()
55
55
// store attribute name and type
56
56
std::string name;
57
57
std::string type;
58
- auto inner_string = m_current_line.substr (strlen (m_attribute_string));
58
+ auto inner_string =
59
+ m_current_line.substr (strlen (m_attribute_string));
59
60
left_trim (inner_string);
60
61
auto it = inner_string.begin ();
61
62
while (it != inner_string.end ())
@@ -71,14 +72,16 @@ void ARFFDeserializer::read()
71
72
}
72
73
if (it == inner_string.end ())
73
74
SG_SERROR (
74
- " Could not split attibute name and type on line %d: \" %s\" .\n " ,
75
+ " Could not split attibute name and type on line %d: "
76
+ " \" %s\" .\n " ,
75
77
m_line_number, m_current_line.c_str ())
76
78
// check if it is nominal
77
79
if (type[0 ] == ' {' )
78
80
{
79
81
std::vector<std::string> attributes;
80
82
// split norminal values: "{A, B, C}" to vector{A, B, C}
81
- split (type.substr (1 , type.size () - 2 ), " , " , true ,
83
+ split (
84
+ type.substr (1 , type.size () - 2 ), " , " , true ,
82
85
std::back_inserter (attributes));
83
86
m_nominal_attributes.emplace_back (
84
87
std::make_pair (name, attributes));
@@ -91,23 +94,24 @@ void ARFFDeserializer::read()
91
94
{
92
95
std::vector<std::string> date_elements;
93
96
// split "date [[date-format]]" or "name date [[date-format]]"
94
- split (type, " " , true ,
95
- std::back_inserter (date_elements));
96
- if (date_elements[0 ]==" date" && date_elements.size () < 3 )
97
+ split (type, " " , true , std::back_inserter (date_elements));
98
+ if (date_elements[0 ] == " date" && date_elements.size () < 3 )
97
99
{
98
100
// @attribute date [[date-format]]
99
101
if (type.size () == 1 )
100
102
m_date_formats.emplace_back (m_default_date_format);
101
103
else
102
- m_date_formats.push_back (javatime_to_cpptime (date_elements[1 ]));
104
+ m_date_formats.push_back (
105
+ javatime_to_cpptime (date_elements[1 ]));
103
106
}
104
- else if (date_elements[1 ]== " date" && date_elements.size () < 4 )
107
+ else if (date_elements[1 ] == " date" && date_elements.size () < 4 )
105
108
{
106
109
// @attribute name date [[date-format]]
107
110
if (date_elements.size () == 2 )
108
111
m_date_formats.emplace_back (m_default_date_format);
109
112
else
110
- m_date_formats.push_back (javatime_to_cpptime (date_elements[2 ]));
113
+ m_date_formats.push_back (
114
+ javatime_to_cpptime (date_elements[2 ]));
111
115
}
112
116
else
113
117
{
@@ -172,12 +176,13 @@ void ARFFDeserializer::read()
172
176
{
173
177
return ;
174
178
}
175
- // assumes that until EOF we should expect tabular data to be parsed
179
+ // assumes that until EOF we should expect comma delimited values
176
180
else
177
181
{
178
182
std::vector<std::string> elems;
179
183
split (m_current_line, " ," , true , std::back_inserter (elems));
180
184
auto nominal_pos = m_nominal_attributes.begin ();
185
+ auto date_pos = m_date_formats.begin ();
181
186
for (int i = 0 ; i < elems.size (); ++i)
182
187
{
183
188
Attribute type = m_attributes[i];
@@ -194,7 +199,8 @@ void ARFFDeserializer::read()
194
199
catch (const std::invalid_argument&)
195
200
{
196
201
SG_SERROR (
197
- " Failed to covert \" %s\" to numeric.\n " , elems[i].c_str ())
202
+ " Failed to covert \" %s\" to numeric.\n " ,
203
+ elems[i].c_str ())
198
204
}
199
205
}
200
206
break ;
@@ -219,7 +225,34 @@ void ARFFDeserializer::read()
219
225
}
220
226
break ;
221
227
case (Attribute::Date):
222
- SG_SERROR (" Date parsing not implemented.\n " )
228
+ {
229
+ tm t{};
230
+ if (date_pos == m_date_formats.end ())
231
+ SG_SERROR (
232
+ " Unexpected date value \" %s\" on line "
233
+ " %d.\n " ,
234
+ elems[i].c_str (), m_line_number);
235
+ if (strptime (elems[i].c_str (), (*date_pos).c_str (), &t))
236
+ {
237
+ auto value_timestamp = std::mktime (&t);
238
+ if (value_timestamp == -1 )
239
+ SG_SERROR (
240
+ " Error creating timestamp with \" %s\" with "
241
+ " date format \" %s\" on line %d.\n " ,
242
+ elems[i].c_str (), (*date_pos).c_str (),
243
+ m_line_number)
244
+ else
245
+ m_data.emplace_back (value_timestamp);
246
+ }
247
+ else
248
+ SG_SERROR (
249
+ " Error parsing date \" %s\" with date format \" %s\" "
250
+ " on line %d.\n " ,
251
+ elems[i].c_str (), (*date_pos).c_str (),
252
+ m_line_number)
253
+ ++date_pos;
254
+ }
255
+ break ;
223
256
case (Attribute::String):
224
257
SG_SERROR (" String parsing not implemented.\n " )
225
258
}
0 commit comments