Skip to content

Commit a24533e

Browse files
committed
KPopLyricsProvider. Added check if Page not found. Extended logs information.
1 parent 4636e06 commit a24533e

File tree

1 file changed

+18
-16
lines changed

1 file changed

+18
-16
lines changed

LyricsScraperNET/Providers/KPopLyrics/KPopLyricsProvider.cs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public sealed class KPopLyricsProvider : ExternalProviderBase
2121
private readonly IExternalUriConverter _uriConverter;
2222

2323
private const string LyricsContainerNodesXPath = "//*[contains(@class, 'entry-content') and contains(@class, 'mh-clearfix')]";
24-
24+
2525
#region Constructors
2626

2727
public KPopLyricsProvider()
@@ -107,6 +107,7 @@ public override void WithLogger(ILoggerFactory loggerFactory)
107107
{
108108
_logger = loggerFactory.CreateLogger<KPopLyricsProvider>();
109109
}
110+
110111
private SearchResult PostProcessLyric(Uri uri, string text)
111112
{
112113
if (string.IsNullOrEmpty(text))
@@ -122,34 +123,42 @@ private SearchResult PostProcessLyric(Uri uri, string text)
122123

123124
if (mainNode is null)
124125
{
125-
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Uri: {uri}");
126+
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Couldn't find lyrics container. Uri: {uri}");
127+
return new SearchResult(ExternalProviderType.KPopLyrics, ResponseStatusCode.NoDataFound);
128+
}
129+
130+
if (mainNode.OuterHtml.Contains("no-content-found"))
131+
{
132+
_logger?.LogInformation($"KPopLyrics. Page not found (404). Uri: {uri}");
126133
return new SearchResult(ExternalProviderType.KPopLyrics, ResponseStatusCode.NoDataFound);
127134
}
128135

129136
var h2Nodes = htmlDoc.DocumentNode.SelectNodes("//h2");
130137

131138
if (h2Nodes is null || !h2Nodes.Any())
132139
{
133-
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Uri: {uri}");
140+
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Couldn't find header nodes. Uri: {uri}");
134141
return new SearchResult(ExternalProviderType.KPopLyrics, ResponseStatusCode.NoDataFound);
135142
}
136143

137-
// sometimes lyrics have eng translation but sometimes its only romanized version.
144+
// Sometimes lyrics have eng translation but sometimes its only romanized version.
138145
var h2Node = h2Nodes
139-
.FirstOrDefault(x => x.OuterHtml.Contains("Official English Translation") || x.OuterHtml.Contains("English Translation Lyrics"))
140-
?? h2Nodes.FirstOrDefault(x => x.OuterHtml.Contains("Romanized"));
146+
.FirstOrDefault(x => x.OuterHtml.Contains("Official English Translation")
147+
|| x.OuterHtml.Contains("English Translation Lyrics"))
148+
?? h2Nodes
149+
.FirstOrDefault(x => x.OuterHtml.Contains("Romanized"));
141150

142151
if (h2Node is null)
143152
{
144-
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Uri: {uri}");
153+
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Couldn't find a valid lyrics header node. Uri: {uri}");
145154
return new SearchResult(ExternalProviderType.KPopLyrics, ResponseStatusCode.NoDataFound);
146155
}
147156

148157
var rawHtmlLyrics = TakeParagraphsUntilHeader(h2Node);
149158

150159
if (string.IsNullOrEmpty(rawHtmlLyrics))
151160
{
152-
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Uri: {uri}");
161+
_logger?.LogWarning($"KPopLyrics. Can't parse lyric from the page. Couldn't extract lyrics content from paragraphs. Uri: {uri}");
153162
return new SearchResult(ExternalProviderType.KPopLyrics, ResponseStatusCode.NoDataFound);
154163
}
155164

@@ -166,19 +175,12 @@ private string TakeParagraphsUntilHeader(HtmlNode startNode)
166175
while (sibling != null)
167176
{
168177
if (sibling.Name == "h2")
169-
{
170178
break;
171-
}
172179

173180
if (sibling.Name == "p")
174-
{
175181
paragraphs.Add(sibling.OuterHtml);
176-
}
177-
178-
if (sibling.Name != "h2" && sibling.Name != "p")
179-
{
182+
else
180183
break;
181-
}
182184

183185
sibling = sibling.NextSibling;
184186
}

0 commit comments

Comments
 (0)