Skip to content

Commit 670f400

Browse files
authored
Merge pull request #57 from RelistenNet/codex/fix-incorrect-year-display-due-to-bug-in-e6cbd6d
Fix Archive.org importer date update
2 parents e6cbd6d + 10eeaee commit 670f400

File tree

3 files changed

+156
-90
lines changed

3 files changed

+156
-90
lines changed

RelistenApi/Services/Importers/ArchiveOrgImporter.cs

Lines changed: 3 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
using System.Globalization;
44
using System.Linq;
55
using System.Net.Http;
6-
using System.Text.RegularExpressions;
76
using System.Threading.Tasks;
87
using System.Transactions;
98
using Hangfire.Console;
@@ -22,7 +21,6 @@ public class ArchiveOrgImporter : ImporterBase
2221
{
2322
public const string DataSourceName = "archive.org";
2423

25-
private static readonly Regex ExtractDateFromIdentifier = new(@"(\d{4}-\d{2}-\d{2})");
2624

2725
private readonly LinkService linkService;
2826

@@ -142,8 +140,9 @@ await root.response.docs.AsyncForEachWithProgress(prog, async doc =>
142140
var isNew = dbShow == null;
143141
var needsToUpdateReviews = maxSourceInformation != null &&
144142
doc._iguana_index_date > maxSourceInformation.review_max_updated_at;
143+
var needsDateUpdate = dbShow?.display_date?.Contains('XX');
145144

146-
if (currentIsTargetedShow || isNew || needsToUpdateReviews)
145+
if (currentIsTargetedShow || isNew || needsToUpdateReviews || needsDateUpdate)
147146
{
148147
ctx?.WriteLine("Pulling https://archive.org/metadata/{0}", doc.identifier);
149148

@@ -181,7 +180,7 @@ await root.response.docs.AsyncForEachWithProgress(prog, async doc =>
181180
return;
182181
}
183182

184-
var properDate = FixDisplayDate(detailsRoot.metadata);
183+
var properDate = ArchiveOrgImporterUtils.FixDisplayDate(detailsRoot.metadata);
185184

186185
if (properDate == null)
187186
{
@@ -493,92 +492,6 @@ private Source CreateSourceForMetadata(
493492
};
494493
}
495494

496-
// thanks to this trouble child: https://archive.org/metadata/lotus2011-16-07.lotus2011-16-07_Neumann
497-
private string FixDisplayDate(Metadata meta)
498-
{
499-
if (meta.date.Contains("00"))
500-
{
501-
// XX is the preferred unknown date identifier
502-
return meta.date.Replace("00", "XX");
503-
}
504-
505-
// 1970-03-XX or 1970-XX-XX which is okay because it is handled by the rebuild
506-
if (meta.date.Contains('X'))
507-
{
508-
return meta.date;
509-
}
510-
511-
if (meta.date == "2013-14-02")
512-
{
513-
// this date from The Werks always gives us issues and TryFlippingMonthAndDate doesn't work...I suspect
514-
// some sort cultural issue because I cannot reproduce this locally
515-
return "2013-02-14";
516-
}
517-
518-
// happy case
519-
if (TestDate(meta.date))
520-
{
521-
return meta.date;
522-
}
523-
524-
var d = TryFlippingMonthAndDate(meta.date);
525-
526-
if (d != null)
527-
{
528-
return d;
529-
}
530-
531-
// try to parse it out of the identifier
532-
var matches = ExtractDateFromIdentifier.Match(meta.identifier);
533-
534-
if (matches.Success)
535-
{
536-
var tdate = matches.Groups[1].Value;
537-
538-
if (TestDate(tdate))
539-
{
540-
return tdate;
541-
}
542-
543-
var flipped = TryFlippingMonthAndDate(tdate);
544-
545-
if (flipped != null)
546-
{
547-
return flipped;
548-
}
549-
}
550-
551-
return null;
552-
}
553-
554-
private bool TestDate(string date)
555-
{
556-
return DateTime.TryParseExact(date, "yyyy-MM-dd",
557-
DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal, out var _);
558-
}
559-
560-
private string TryFlippingMonthAndDate(string date)
561-
{
562-
// not a valid date
563-
var parts = date.Split('-');
564-
565-
// try to see if it is YYYY-DD-MM instead
566-
if (parts.Length > 2 && int.TryParse(parts[1], out var month))
567-
{
568-
if (month > 12)
569-
{
570-
// rearrange to YYYY-MM-DD
571-
var dateStr = parts[0] + "-" + parts[2] + "-" + parts[1];
572-
573-
if (TestDate(dateStr))
574-
{
575-
return dateStr;
576-
}
577-
}
578-
}
579-
580-
return null;
581-
}
582495

583496
private IEnumerable<SourceTrack> CreateSourceTracksForFiles(
584497
Artist artist,
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
using System;
2+
using System.Globalization;
3+
using System.Text.RegularExpressions;
4+
using Relisten.Vendor.ArchiveOrg.Metadata;
5+
6+
namespace Relisten.Import;
7+
8+
public static class ArchiveOrgImporterUtils
9+
{
10+
private static readonly Regex ExtractDateFromIdentifier = new(@"(\d{4}-\d{2}-\d{2})");
11+
12+
// thanks to this trouble child: https://archive.org/metadata/lotus2011-16-07.lotus2011-16-07_Neumann
13+
public static string FixDisplayDate(Metadata meta)
14+
{
15+
if (meta == null || string.IsNullOrEmpty(meta.date))
16+
{
17+
return null;
18+
}
19+
20+
var parts = meta.date.Split('-');
21+
22+
if (parts.Length == 3)
23+
{
24+
var year = parts[0];
25+
var month = parts[1];
26+
var day = parts[2];
27+
28+
var changed = false;
29+
30+
if (month == "00")
31+
{
32+
month = "XX";
33+
changed = true;
34+
}
35+
36+
if (day == "00")
37+
{
38+
day = "XX";
39+
changed = true;
40+
}
41+
42+
if (changed)
43+
{
44+
return string.Join('-', year, month, day);
45+
}
46+
}
47+
48+
// 1970-03-XX or 1970-XX-XX which is okay because it is handled by the rebuild
49+
if (meta.date.Contains('X'))
50+
{
51+
return meta.date;
52+
}
53+
54+
if (meta.date == "2013-14-02")
55+
{
56+
// this date from The Werks always gives us issues and TryFlippingMonthAndDate doesn't work...I suspect
57+
// some sort cultural issue because I cannot reproduce this locally
58+
return "2013-02-14";
59+
}
60+
61+
// happy case
62+
if (TestDate(meta.date))
63+
{
64+
return meta.date;
65+
}
66+
67+
var d = TryFlippingMonthAndDate(meta.date);
68+
69+
if (d != null)
70+
{
71+
return d;
72+
}
73+
74+
// try to parse it out of the identifier
75+
var matches = ExtractDateFromIdentifier.Match(meta.identifier);
76+
77+
if (matches.Success)
78+
{
79+
var tdate = matches.Groups[1].Value;
80+
81+
if (TestDate(tdate))
82+
{
83+
return tdate;
84+
}
85+
86+
var flipped = TryFlippingMonthAndDate(tdate);
87+
88+
if (flipped != null)
89+
{
90+
return flipped;
91+
}
92+
}
93+
94+
return null;
95+
}
96+
97+
private static bool TestDate(string date)
98+
{
99+
return DateTime.TryParseExact(date, "yyyy-MM-dd",
100+
DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal, out _);
101+
}
102+
103+
private static string TryFlippingMonthAndDate(string date)
104+
{
105+
// not a valid date
106+
var parts = date.Split('-');
107+
108+
// try to see if it is YYYY-DD-MM instead
109+
if (parts.Length > 2 && int.TryParse(parts[1], out var month))
110+
{
111+
if (month > 12)
112+
{
113+
// rearrange to YYYY-MM-DD
114+
var dateStr = parts[0] + "-" + parts[2] + "-" + parts[1];
115+
116+
if (TestDate(dateStr))
117+
{
118+
return dateStr;
119+
}
120+
}
121+
}
122+
123+
return null;
124+
}
125+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
using FluentAssertions;
2+
using NUnit.Framework;
3+
using Relisten.Import;
4+
using Relisten.Vendor.ArchiveOrg.Metadata;
5+
6+
namespace RelistenApiTests.Importers.ArchiveOrg;
7+
8+
[TestFixture]
9+
public class TestArchiveOrgFixDisplayDate
10+
{
11+
private static string InvokeFixDisplayDate(string date)
12+
{
13+
var meta = new Metadata { date = date, identifier = "id" };
14+
return ArchiveOrgImporterUtils.FixDisplayDate(meta);
15+
}
16+
17+
[Test]
18+
public void FixDisplayDate_ShouldHandleZeroMonthOrDayAcrossYears()
19+
{
20+
for (var year = 1950; year <= 2050; year++)
21+
{
22+
InvokeFixDisplayDate($"{year}-05-05").Should().Be($"{year}-05-05");
23+
InvokeFixDisplayDate($"{year}-00-05").Should().Be($"{year}-XX-05");
24+
InvokeFixDisplayDate($"{year}-05-00").Should().Be($"{year}-05-XX");
25+
InvokeFixDisplayDate($"{year}-00-00").Should().Be($"{year}-XX-XX");
26+
}
27+
}
28+
}

0 commit comments

Comments
 (0)