3939import com .google .common .base .Joiner ;
4040import com .typesafe .config .Config ;
4141
42+ import javafx .util .Pair ;
43+
4244/**
4345 * Command that converts the timestamps in a given field from one of a set of input date formats (in
4446 * an input timezone) to an output date format (in an output timezone), while respecting daylight
@@ -60,12 +62,14 @@ public Command build(Config config, Command parent, Command child, MorphlineCont
6062 ///////////////////////////////////////////////////////////////////////////////
6163 // Nested classes:
6264 ///////////////////////////////////////////////////////////////////////////////
63- private static final class ConvertTimestamp extends AbstractCommand {
65+ public static final class ConvertTimestamp extends AbstractCommand {
6466
6567 private final String fieldName ;
66- private final List <SimpleDateFormat > inputFormats = new ArrayList <SimpleDateFormat >();
68+ private final List <Pair < SimpleDateFormat , Boolean >> inputFormats = new ArrayList <Pair < SimpleDateFormat , Boolean > >();
6769 private final SimpleDateFormat outputFormat ;
6870 private final String inputFormatsDebugString ; // cached
71+ private final int insertYearMonthOffset ;
72+ private final int insertYearOffset ;
6973
7074 private static final String NATIVE_SOLR_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'" ; // e.g. 2007-04-26T08:05:04.789Z
7175 private static final SimpleDateFormat UNIX_TIME_IN_MILLIS = new SimpleDateFormat ("'unixTimeInMillis'" );
@@ -75,20 +79,31 @@ private static final class ConvertTimestamp extends AbstractCommand {
7579 DateUtil .DEFAULT_DATE_FORMATS .add (0 , NATIVE_SOLR_FORMAT );
7680 }
7781
78- public ConvertTimestamp (CommandBuilder builder , Config config , Command parent , Command child , MorphlineContext context ) {
82+ ConvertTimestamp (CommandBuilder builder , Config config , Command parent , Command child , MorphlineContext context ) {
7983 super (builder , config , parent , child , context );
8084
8185 this .fieldName = getConfigs ().getString (config , "field" , Fields .TIMESTAMP );
8286 TimeZone inputTimeZone = getTimeZone (getConfigs ().getString (config , "inputTimezone" , "UTC" ));
8387 Locale inputLocale = getLocale (getConfigs ().getString (config , "inputLocale" , "" ));
88+
89+ boolean insertYear = getConfigs ().getBoolean (config , "insertMissingYear" , false );
90+ //Defaults to -5 which gives a rolling -11 through +1 month offset (assumes historic messages)
91+ insertYearMonthOffset = getConfigs ().getInt (config , "insertMissingYearMonthOffset" , -5 );
92+ insertYearOffset = getConfigs ().getInt (config ,"insertMissingYearOffset" , 0 );
93+
8494 for (String inputFormat : getConfigs ().getStringList (config , "inputFormats" , DateUtil .DEFAULT_DATE_FORMATS )) {
8595 SimpleDateFormat dateFormat = getUnixTimeFormat (inputFormat , inputTimeZone );
96+ boolean yearRequired = false ;
8697 if (dateFormat == null ) {
98+ if (insertYear && !inputFormat .contains ("yy" )) {
99+ inputFormat = "yyyy" + inputFormat ;
100+ yearRequired =true ;
101+ }
87102 dateFormat = new SimpleDateFormat (inputFormat , inputLocale );
88103 dateFormat .setTimeZone (inputTimeZone );
89104 dateFormat .set2DigitYearStart (DateUtil .DEFAULT_TWO_DIGIT_YEAR_START );
90105 }
91- this .inputFormats .add (dateFormat );
106+ this .inputFormats .add (new Pair < SimpleDateFormat , Boolean >( dateFormat , yearRequired ) );
92107 }
93108 TimeZone outputTimeZone = getTimeZone (getConfigs ().getString (config , "outputTimezone" , "UTC" ));
94109 Locale outputLocale = getLocale (getConfigs ().getString (config , "outputLocale" , "" ));
@@ -99,19 +114,23 @@ public ConvertTimestamp(CommandBuilder builder, Config config, Command parent, C
99114 dateFormat .setTimeZone (outputTimeZone );
100115 }
101116 this .outputFormat = dateFormat ;
102- validateArguments ();
103-
117+
104118 List <String > inputFormatsStringList = new ArrayList <String >();
105- for (SimpleDateFormat inputFormat : inputFormats ) {
119+ for (Pair < SimpleDateFormat , Boolean > inputFormat : inputFormats ) {
106120 // SimpleDateFormat.toString() doesn't print anything useful
107- inputFormatsStringList .add (inputFormat .toPattern ());
121+ inputFormatsStringList .add (inputFormat .getKey (). toPattern ());
108122 }
109123 this .inputFormatsDebugString = inputFormatsStringList .toString ();
110124
125+
126+ validateArguments ();
127+
128+
111129 if (LOG .isTraceEnabled ()) {
112130 LOG .trace ("inputFormatsDebugString: {}" , inputFormatsDebugString );
113131 LOG .trace ("availableTimeZoneIDs: {}" , Joiner .on ("\n " ).join (TimeZone .getAvailableIDs ()));
114132 LOG .trace ("availableLocales: {}" , Joiner .on ("\n " ).join (Locale .getAvailableLocales ()));
133+ LOG .trace ("insertMissingYear: {}" , insertYear );
115134 }
116135 }
117136
@@ -123,7 +142,9 @@ protected boolean doProcess(Record record) {
123142 while (iter .hasNext ()) {
124143 String timestamp = iter .next ().toString ();
125144 boolean foundMatchingFormat = false ;
126- for (SimpleDateFormat inputFormat : inputFormats ) {
145+ for (Pair <SimpleDateFormat ,Boolean > inputFormatPair : inputFormats ) {
146+ SimpleDateFormat inputFormat = inputFormatPair .getKey ();
147+ boolean yearRequired = inputFormatPair .getValue ();
127148 Date date ;
128149 boolean isUnixTime ;
129150 if (inputFormat == UNIX_TIME_IN_MILLIS ) {
@@ -135,7 +156,15 @@ protected boolean doProcess(Record record) {
135156 } else {
136157 isUnixTime = false ;
137158 pos .setIndex (0 );
138- date = inputFormat .parse (timestamp , pos );
159+ if (yearRequired ) {
160+ Calendar cal = Calendar .getInstance ();
161+ int targetYear = cal .get (Calendar .YEAR ) + insertYearOffset ;
162+ timestamp = targetYear + timestamp ;
163+ date = inputFormat .parse (timestamp , pos );
164+ date = DateUtil .insertYear (date , new Date (), insertYearMonthOffset , targetYear , inputFormat .getTimeZone ());
165+ } else {
166+ date = inputFormat .parse (timestamp , pos );
167+ }
139168 }
140169 if (date != null && (isUnixTime || pos .getIndex () == timestamp .length ())) {
141170 String result ;
@@ -208,6 +237,7 @@ private Locale getLocale(String name) {
208237 }
209238
210239
240+
211241 ///////////////////////////////////////////////////////////////////////////////
212242 // Nested classes:
213243 ///////////////////////////////////////////////////////////////////////////////
@@ -230,7 +260,7 @@ private Locale getLocale(String name) {
230260 /**
231261 * This class has some code from HttpClient DateUtil and Solrj DateUtil.
232262 */
233- private static final class DateUtil {
263+ public static final class DateUtil {
234264 //start HttpClient
235265 /**
236266 * Date format pattern used to parse HTTP date headers in RFC 1123 format.
@@ -259,8 +289,6 @@ private static final class DateUtil {
259289 DEFAULT_TWO_DIGIT_YEAR_START = calendar .getTime ();
260290 }
261291
262- // private static final TimeZone GMT = TimeZone.getTimeZone("GMT");
263-
264292 //end HttpClient
265293
266294 //---------------------------------------------------------------------------------------
@@ -280,7 +308,56 @@ private static final class DateUtil {
280308 DEFAULT_DATE_FORMATS .addAll (DateUtil .DEFAULT_HTTP_CLIENT_PATTERNS );
281309 }
282310
311+ //work around the fact that SimpleDateFormat doesn't handle missing year.
312+ //Code inspired by Flume SyslogParser.java
313+ //https://github.com/apache/flume/blob/trunk/flume-ng-core/src/main/java/org/apache/flume/source/SyslogParser.java
314+ public static Date insertYear (Date inputDate , Date currentDate , int monthOffset , int targetYear , TimeZone tz ) {
315+ Calendar cal = Calendar .getInstance ();
316+ cal .setTimeZone (tz );
317+ cal .setTime (inputDate );
318+
319+ //There are 12 months in a year. We offer a sliding window, for working out whether the parsed date falls within
320+ //the window (for dealing with year rollover issues).
321+ //Compute the upper and lower bound by moving +6 and -6 by the offset.
322+ int upperBound = monthOffset + 6 ;
323+ int lowerBound = monthOffset - 6 ;
324+
325+ //We're now going to check to see whether the date falls outside of the
326+ //upper or lower bounds by intentionally creating the wrong date and seeing
327+ //whether that falls in the past (or future)
328+ Calendar calMinusUpperBMonths = Calendar .getInstance ();
329+ calMinusUpperBMonths .setTime (inputDate );
330+ calMinusUpperBMonths .set (Calendar .YEAR , targetYear );
331+ calMinusUpperBMonths .add (Calendar .MONTH , upperBound * -1 );
332+
333+ Calendar calPlusLowerBMonths = Calendar .getInstance ();
334+ calPlusLowerBMonths .setTime (inputDate );
335+ calPlusLowerBMonths .set (Calendar .YEAR , targetYear );
336+ calPlusLowerBMonths .add (Calendar .MONTH , lowerBound * -1 );
337+
338+ Calendar calReferencePoint = Calendar .getInstance ();
339+ calReferencePoint .setTime (currentDate );
340+ calReferencePoint .setTimeZone (tz );
341+ calReferencePoint .set (Calendar .YEAR , targetYear );
342+
343+ if (cal .getTimeInMillis () > calReferencePoint .getTimeInMillis () &&
344+ calMinusUpperBMonths .getTimeInMillis () > calReferencePoint .getTimeInMillis ()) {
345+ //Date as is stands is in the future and also more than (upper bound) months in the future, therefore rolling back a year.
346+ //Need to roll back a year
347+ cal .add (Calendar .YEAR , -1 );
348+ } else if (cal .getTimeInMillis () < calReferencePoint .getTimeInMillis () &&
349+ calPlusLowerBMonths .getTimeInMillis () < calReferencePoint .getTimeInMillis () ) {
350+ //Date as it stands is in the past and indeed more than (lower bound) months in the past
351+ //Need to roll forward a year
352+ cal .add (Calendar .YEAR , -1 );
353+ }
354+ // Else it's in the middle and no modification required
355+
356+ return cal .getTime ();
357+
358+ }
359+
283360 }
284361 }
285-
286- }
362+
363+ }
0 commit comments