Skip to content

Commit e40eb19

Browse files
committed
Handle full names found in aliases
Handle "Name (Name in romaji)" and "Name in romaji (Name)" pairs found in aliases
1 parent 2e0a6f1 commit e40eb19

File tree

4 files changed

+212
-114
lines changed

4 files changed

+212
-114
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
namespace VndbCharacterNames;
2+
3+
internal static class ExtensionMethods
4+
{
5+
public static void AddIfNotExists<TKey, TElement>(this Dictionary<TKey, List<TElement>> dict, TKey key, TElement element) where TKey : notnull
6+
{
7+
if (dict.TryGetValue(key, out List<TElement>? list))
8+
{
9+
if (!list.Contains(element))
10+
{
11+
list.Add(element);
12+
}
13+
}
14+
else
15+
{
16+
dict[key] = [element];
17+
}
18+
}
19+
}

VndbCharacterNames/Program.cs

Lines changed: 127 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,10 @@
1-
using System.Text.Encodings.Web;
21
using System.Text.Json;
32
using System.Text.Json.Nodes;
4-
using System.Text.RegularExpressions;
53

64
namespace VndbCharacterNames;
75

8-
internal static partial class Program
6+
file static class Program
97
{
10-
private const string SurnameNameType = "Surname";
11-
private const string OtherNameType = "other";
12-
13-
private static readonly JsonSerializerOptions s_jso = new()
14-
{
15-
RespectNullableAnnotations = true,
16-
RespectRequiredConstructorParameters = true,
17-
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping
18-
};
19-
20-
[GeneratedRegex(@"^[\u30A0-\u30FF]+$", RegexOptions.CultureInvariant)]
21-
private static partial Regex KatakanaRegex { get; }
22-
23-
[GeneratedRegex(@"^Full name:.*\n((Sex:.*\n)|)", RegexOptions.CultureInvariant)]
24-
private static partial Regex FullNameAndSexRegex { get; }
25-
26-
[GeneratedRegex(@"[\u00D7\u2000-\u206F\u25A0-\u25FF\u2E80-\u319F\u31C0-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]|\uD82C[\uDC00-\uDD6F]|\uD83C[\uDE00-\uDEFF]|\uD840[\uDC00-\uDFFF]|[\uD841-\uD868][\uDC00-\uDFFF]|\uD869[\uDC00-\uDEDF]|\uD869[\uDF00-\uDFFF]|[\uD86A-\uD87A][\uDC00-\uDFFF]|\uD87B[\uDC00-\uDE5F]|\uD87E[\uDC00-\uDE1F]|\uD880[\uDC00-\uDFFF]|[\uD881-\uD887][\uDC00-\uDFFF]|\uD888[\uDC00-\uDFAF]", RegexOptions.CultureInvariant)]
27-
public static partial Regex JapaneseRegex { get; }
28-
29-
[GeneratedRegex(@"[\u0000-\u024F\u1E00-\u1EFF\u2C60-\u2C7F]", RegexOptions.CultureInvariant)]
30-
public static partial Regex LatinRegex { get; }
31-
32-
private static void AddItemToDictionary(Dictionary<NameRecord, List<string>> dict, NameRecord nameRecord, string nameType)
33-
{
34-
if (dict.TryGetValue(nameRecord, out List<string>? nameTypes))
35-
{
36-
if (!nameTypes.Contains(nameType))
37-
{
38-
nameTypes.Add(nameType);
39-
}
40-
}
41-
else
42-
{
43-
dict[nameRecord] = [nameType];
44-
}
45-
}
46-
478
public static void Main(string[] args)
489
{
4910
string? outputFilePath = null;
@@ -132,7 +93,7 @@ public static void Main(string[] args)
13293
List<VndbNameRecord>? vndbNameRecords;
13394
try
13495
{
135-
vndbNameRecords = JsonSerializer.Deserialize<List<VndbNameRecord>>(fileStream, s_jso)!;
96+
vndbNameRecords = JsonSerializer.Deserialize<List<VndbNameRecord>>(fileStream, Utils.Jso)!;
13697
++validJsonFileCount;
13798
totalVndbNameRecordCount += vndbNameRecords.Count;
13899
}
@@ -149,87 +110,62 @@ public static void Main(string[] args)
149110

150111
if (vndbNameRecord.Sex is not null)
151112
{
152-
AddItemToDictionary(nameTypesDict, new NameRecord(fullNameWithoutAnyWhiteSpace, vndbNameRecord.FullNameInRomaji), vndbNameRecord.Sex);
113+
nameTypesDict.AddIfNotExists(new NameRecord(fullNameWithoutAnyWhiteSpace, vndbNameRecord.FullNameInRomaji), vndbNameRecord.Sex);
153114
}
154115

155116
_ = convertedRecords.Add(new ConvertedNameRecord(fullNameWithoutAnyWhiteSpace, vndbNameRecord.FullNameInRomaji, vndbNameRecord.Sex, definition));
156-
string[] splitRomajiParts = vndbNameRecord.FullNameInRomaji.Split((string[]?)null, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
157-
158-
string? surname = null;
159-
string? surnameInRomaji = null;
160-
string? givenName = null;
161-
string? givenNameInRomaji = null;
162-
if (splitRomajiParts.Length > 1)
117+
(NameRecord surnameRecord, NameRecord givenNameRecord)? surnameAndNameRecords = GetSurnameAndNameRecords(vndbNameRecord.FullName, vndbNameRecord.FullNameInRomaji);
118+
if (surnameAndNameRecords is not null)
163119
{
164-
string[] splitFullNameParts = vndbNameRecord.FullName.Split((string[]?)null, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
165-
if (splitFullNameParts.Length is 1)
166-
{
167-
splitFullNameParts = vndbNameRecord.FullName.Split(['=', '='], StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
168-
}
169-
170-
if (splitFullNameParts.Length > 1)
171-
{
172-
if (splitRomajiParts.Length == splitFullNameParts.Length)
173-
{
174-
if (splitFullNameParts.Length is 2)
175-
{
176-
surname = splitFullNameParts[0];
177-
surnameInRomaji = splitRomajiParts[0];
178-
givenName = splitFullNameParts[1];
179-
givenNameInRomaji = splitRomajiParts[1];
120+
NameRecord surnameRecord = surnameAndNameRecords.Value.surnameRecord;
121+
NameRecord givenNameRecord = surnameAndNameRecords.Value.surnameRecord;
180122

181-
if (KatakanaRegex.IsMatch(surname))
182-
{
183-
(surname, givenName) = (givenName, surname);
184-
(surnameInRomaji, givenNameInRomaji) = (givenNameInRomaji, surnameInRomaji);
185-
}
186-
}
187-
}
188-
}
189-
else
190-
{
191-
splitFullNameParts = vndbNameRecord.FullName.Split(['・', '・', '・'], StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
192-
if (splitRomajiParts.Length == splitFullNameParts.Length)
193-
{
194-
if (splitFullNameParts.Length is 2)
195-
{
196-
givenName = splitFullNameParts[0];
197-
givenNameInRomaji = splitRomajiParts[0];
198-
surname = splitFullNameParts[1];
199-
surnameInRomaji = splitRomajiParts[1];
200-
201-
if (!KatakanaRegex.IsMatch(surname) && !KatakanaRegex.IsMatch(givenName))
202-
{
203-
(givenName, surname) = (surname, givenName);
204-
(givenNameInRomaji, surnameInRomaji) = (surnameInRomaji, givenNameInRomaji);
205-
}
206-
}
207-
}
208-
}
209-
}
210-
211-
if (givenName is not null && givenNameInRomaji is not null && surname is not null && surnameInRomaji is not null)
212-
{
213-
NameRecord surnameAndRomaji = new(surname, surnameInRomaji);
214-
AddItemToDictionary(nameTypesDict, surnameAndRomaji, SurnameNameType);
123+
nameTypesDict.AddIfNotExists(surnameRecord, Utils.SurnameNameType);
215124

216125
if (vndbNameRecord.Sex is not null)
217126
{
218-
NameRecord givenNameAndRomajiRecord = new(givenName, givenNameInRomaji);
219-
AddItemToDictionary(nameTypesDict, givenNameAndRomajiRecord, vndbNameRecord.Sex);
127+
nameTypesDict.AddIfNotExists(givenNameRecord, vndbNameRecord.Sex);
220128
}
221129

222-
_ = convertedRecords.Add(new ConvertedNameRecord(surname, surnameInRomaji));
223-
_ = convertedRecords.Add(new ConvertedNameRecord(givenName, givenNameInRomaji));
130+
_ = convertedRecords.Add(new ConvertedNameRecord(surnameRecord.Name, surnameRecord.NameInRomaji));
131+
_ = convertedRecords.Add(new ConvertedNameRecord(givenNameRecord.Name, givenNameRecord.NameInRomaji));
224132

225133
List<NameRecord>? aliasRecords = vndbNameRecord.GetAliasPairs();
226134
if (aliasRecords is not null)
227135
{
228136
foreach (NameRecord aliasRecord in aliasRecords)
229137
{
230-
if (surname != aliasRecord.Name && givenName != aliasRecord.Name)
138+
if (surnameRecord.Name != aliasRecord.Name && givenNameRecord.Name != aliasRecord.Name)
231139
{
232-
_ = convertedRecords.Add(new ConvertedNameRecord(aliasRecord.Name, aliasRecord.NameInRomaji, vndbNameRecord.Sex, definition));
140+
string fullAliasWithoutAnyWhiteSpace = string.Join("", aliasRecord.Name.Split());
141+
if (vndbNameRecord.Sex is not null)
142+
{
143+
nameTypesDict.AddIfNotExists(new NameRecord(fullAliasWithoutAnyWhiteSpace, aliasRecord.NameInRomaji), vndbNameRecord.Sex);
144+
}
145+
146+
_ = convertedRecords.Add(new ConvertedNameRecord(fullAliasWithoutAnyWhiteSpace, aliasRecord.NameInRomaji, vndbNameRecord.Sex, definition));
147+
(NameRecord surnameRecord, NameRecord givenNameRecord)? aliasSurnameAndNameRecords = GetSurnameAndNameRecords(aliasRecord.Name, aliasRecord.NameInRomaji);
148+
if (aliasSurnameAndNameRecords is not null)
149+
{
150+
NameRecord aliasSurnameRecord = aliasSurnameAndNameRecords.Value.surnameRecord;
151+
NameRecord aliasGivenNameRecord = aliasSurnameAndNameRecords.Value.surnameRecord;
152+
153+
if (aliasSurnameRecord.Name != surnameRecord.Name)
154+
{
155+
nameTypesDict.AddIfNotExists(aliasSurnameRecord, Utils.SurnameNameType);
156+
_ = convertedRecords.Add(new ConvertedNameRecord(aliasSurnameRecord.Name, aliasSurnameRecord.NameInRomaji));
157+
}
158+
159+
if (aliasGivenNameRecord.Name != givenNameRecord.Name)
160+
{
161+
if (vndbNameRecord.Sex is not null)
162+
{
163+
nameTypesDict.AddIfNotExists(aliasGivenNameRecord, vndbNameRecord.Sex);
164+
}
165+
166+
_ = convertedRecords.Add(new ConvertedNameRecord(aliasGivenNameRecord.Name, aliasGivenNameRecord.NameInRomaji));
167+
}
168+
}
233169
}
234170
}
235171
}
@@ -241,7 +177,28 @@ public static void Main(string[] args)
241177
{
242178
foreach (NameRecord aliasRecord in aliasRecords)
243179
{
244-
_ = convertedRecords.Add(new ConvertedNameRecord(aliasRecord.Name, aliasRecord.NameInRomaji, vndbNameRecord.Sex, definition));
180+
string fullAliasWithoutAnyWhiteSpace = string.Join("", aliasRecord.Name.Split());
181+
if (vndbNameRecord.Sex is not null)
182+
{
183+
nameTypesDict.AddIfNotExists(new NameRecord(fullAliasWithoutAnyWhiteSpace, aliasRecord.NameInRomaji), vndbNameRecord.Sex);
184+
}
185+
186+
_ = convertedRecords.Add(new ConvertedNameRecord(fullAliasWithoutAnyWhiteSpace, aliasRecord.NameInRomaji, vndbNameRecord.Sex, definition));
187+
(NameRecord surnameRecord, NameRecord givenNameRecord)? aliasSurnameAndNameRecords = GetSurnameAndNameRecords(aliasRecord.Name, aliasRecord.NameInRomaji);
188+
if (aliasSurnameAndNameRecords is not null)
189+
{
190+
NameRecord aliasSurnameRecord = aliasSurnameAndNameRecords.Value.surnameRecord;
191+
NameRecord aliasGivenNameRecord = aliasSurnameAndNameRecords.Value.surnameRecord;
192+
193+
nameTypesDict.AddIfNotExists(aliasSurnameRecord, Utils.SurnameNameType);
194+
_ = convertedRecords.Add(new ConvertedNameRecord(aliasSurnameRecord.Name, aliasSurnameRecord.NameInRomaji));
195+
196+
if (vndbNameRecord.Sex is not null)
197+
{
198+
nameTypesDict.AddIfNotExists(aliasGivenNameRecord, vndbNameRecord.Sex);
199+
}
200+
_ = convertedRecords.Add(new ConvertedNameRecord(aliasGivenNameRecord.Name, aliasGivenNameRecord.NameInRomaji));
201+
}
245202
}
246203
}
247204
}
@@ -263,10 +220,10 @@ public static void Main(string[] args)
263220
#pragma warning restore CA1308
264221

265222
string? definitionForCustomNameFile = record.Definition is not null
266-
? FullNameAndSexRegex.Replace(record.Definition, "").Replace("\t", " ", StringComparison.Ordinal).ReplaceLineEndings("\\n")
223+
? Utils.FullNameAndSexRegex.Replace(record.Definition, "").Replace("\t", " ", StringComparison.Ordinal).ReplaceLineEndings("\\n")
267224
: null;
268225

269-
string line = $"{record.PrimarySpelling}\t{record.Reading}\t{nameType ?? OtherNameType}\t{definitionForCustomNameFile}";
226+
string line = $"{record.PrimarySpelling}\t{record.Reading}\t{nameType ?? Utils.OtherNameType}\t{definitionForCustomNameFile}";
270227
lines.Add(line);
271228

272229
string definitionForNazeka = record.Definition ?? (nameType is not null
@@ -284,7 +241,7 @@ public static void Main(string[] args)
284241
}
285242

286243
File.WriteAllLines(customNamePath, lines);
287-
File.WriteAllText(outputFilePath!, JsonSerializer.Serialize(nazekaJsonArray, s_jso));
244+
File.WriteAllText(outputFilePath!, JsonSerializer.Serialize(nazekaJsonArray, Utils.Jso));
288245
Console.WriteLine($"Successfully created {outputFilePath} and {customNamePath}!");
289246

290247
if (validJsonFileCount is 1 && totalVndbNameRecordCount is 100_000)
@@ -301,4 +258,65 @@ public static void Main(string[] args)
301258
Main(args);
302259
}
303260
}
261+
262+
private static (NameRecord surnameRecord, NameRecord givenNameRecord)? GetSurnameAndNameRecords(string fullName, string fullNameInRomaji)
263+
{
264+
string[] splitRomajiParts = fullNameInRomaji.Split((string[]?)null, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
265+
string? surname = null;
266+
string? surnameInRomaji = null;
267+
string? givenName = null;
268+
string? givenNameInRomaji = null;
269+
if (splitRomajiParts.Length > 1)
270+
{
271+
string[] splitFullNameParts = fullName.Split((string[]?)null, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
272+
if (splitFullNameParts.Length is 1)
273+
{
274+
splitFullNameParts = fullName.Split(['=', '='], StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
275+
}
276+
277+
if (splitFullNameParts.Length > 1)
278+
{
279+
if (splitRomajiParts.Length == splitFullNameParts.Length)
280+
{
281+
if (splitFullNameParts.Length is 2)
282+
{
283+
surname = splitFullNameParts[0];
284+
surnameInRomaji = splitRomajiParts[0];
285+
givenName = splitFullNameParts[1];
286+
givenNameInRomaji = splitRomajiParts[1];
287+
288+
if (Utils.KatakanaRegex.IsMatch(surname))
289+
{
290+
(surname, givenName) = (givenName, surname);
291+
(surnameInRomaji, givenNameInRomaji) = (givenNameInRomaji, surnameInRomaji);
292+
}
293+
}
294+
}
295+
}
296+
else
297+
{
298+
splitFullNameParts = fullName.Split(['・', '・', '・'], StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
299+
if (splitRomajiParts.Length == splitFullNameParts.Length)
300+
{
301+
if (splitFullNameParts.Length is 2)
302+
{
303+
givenName = splitFullNameParts[0];
304+
givenNameInRomaji = splitRomajiParts[0];
305+
surname = splitFullNameParts[1];
306+
surnameInRomaji = splitRomajiParts[1];
307+
308+
if (!Utils.KatakanaRegex.IsMatch(surname) && !Utils.KatakanaRegex.IsMatch(givenName))
309+
{
310+
(givenName, surname) = (surname, givenName);
311+
(givenNameInRomaji, surnameInRomaji) = (surnameInRomaji, givenNameInRomaji);
312+
}
313+
}
314+
}
315+
}
316+
}
317+
318+
return givenName is not null && givenNameInRomaji is not null && surname is not null && surnameInRomaji is not null
319+
? (surnameRecord: new NameRecord(surname, surnameInRomaji), givenNameRecord: new NameRecord(givenName, givenNameInRomaji))
320+
: null;
321+
}
304322
}

VndbCharacterNames/Utils.cs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
using System.Text.Encodings.Web;
2+
using System.Text.Json;
3+
using System.Text.RegularExpressions;
4+
5+
namespace VndbCharacterNames;
6+
internal static partial class Utils
7+
{
8+
public const string SurnameNameType = "Surname";
9+
public const string OtherNameType = "other";
10+
11+
public static readonly JsonSerializerOptions Jso = new()
12+
{
13+
RespectNullableAnnotations = true,
14+
RespectRequiredConstructorParameters = true,
15+
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping
16+
};
17+
18+
[GeneratedRegex(@"^[\u30A0-\u30FF]+$", RegexOptions.CultureInvariant)]
19+
public static partial Regex KatakanaRegex { get; }
20+
21+
[GeneratedRegex(@"^Full name:.*\n((Sex:.*\n)|)", RegexOptions.CultureInvariant)]
22+
public static partial Regex FullNameAndSexRegex { get; }
23+
24+
[GeneratedRegex(@"[\u00D7\u2000-\u206F\u25A0-\u25FF\u2E80-\u319F\u31C0-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\uFE30-\uFE4F\uFF00-\uFFEF]|\uD82C[\uDC00-\uDD6F]|\uD83C[\uDE00-\uDEFF]|\uD840[\uDC00-\uDFFF]|[\uD841-\uD868][\uDC00-\uDFFF]|\uD869[\uDC00-\uDEDF]|\uD869[\uDF00-\uDFFF]|[\uD86A-\uD87A][\uDC00-\uDFFF]|\uD87B[\uDC00-\uDE5F]|\uD87E[\uDC00-\uDE1F]|\uD880[\uDC00-\uDFFF]|[\uD881-\uD887][\uDC00-\uDFFF]|\uD888[\uDC00-\uDFAF]", RegexOptions.CultureInvariant)]
25+
public static partial Regex JapaneseRegex { get; }
26+
27+
[GeneratedRegex(@"[\u0000-\u024F\u1E00-\u1EFF\u2C60-\u2C7F]", RegexOptions.CultureInvariant)]
28+
public static partial Regex LatinRegex { get; }
29+
30+
[GeneratedRegex(@"(\S+)\s*\(([^,]+?)\)", RegexOptions.CultureInvariant)]
31+
public static partial Regex NameInParentheses { get; }
32+
}

0 commit comments

Comments
 (0)