diff --git a/src/backend/tests/listenbrainz/listenbrainz.test.ts b/src/backend/tests/listenbrainz/listenbrainz.test.ts index 7f702421..e735b218 100644 --- a/src/backend/tests/listenbrainz/listenbrainz.test.ts +++ b/src/backend/tests/listenbrainz/listenbrainz.test.ts @@ -9,18 +9,18 @@ import { UpstreamError } from "../../common/errors/UpstreamError.js"; import { ListenbrainzApiClient, ListenResponse } from "../../common/vendor/ListenbrainzApiClient.js"; import { ExpectedResults } from "../utils/interfaces.js"; import { withRequestInterception } from "../utils/networking.js"; -import artistWithProperJoiner from './correctlyMapped/artistProperHasJoinerInName.json'; +import artistWithProperJoiner from './correctlyMapped/artistProperHasJoinerInName.json' with { type: "json" }; // correct mappings -import multiArtistInArtistName from './correctlyMapped/multiArtistInArtistName.json'; -import multiArtistsInTrackName from './correctlyMapped/multiArtistInTrackName.json'; -import multiMappedArtistsWithSingleUserArtist from './correctlyMapped/multiArtistMappingWithSingleRecordedArtist.json'; -import noArtistMapping from './correctlyMapped/noArtistMapping.json'; -import normalizedValues from './correctlyMapped/normalizedName.json'; -import slightlyDifferentNames from './correctlyMapped/trackNameSlightlyDifferent.json'; +import multiArtistInArtistName from './correctlyMapped/multiArtistInArtistName.json' with { type: "json" }; +import multiArtistsInTrackName from './correctlyMapped/multiArtistInTrackName.json' with { type: "json" }; +import multiMappedArtistsWithSingleUserArtist from './correctlyMapped/multiArtistMappingWithSingleRecordedArtist.json' with { type: "json" }; +import noArtistMapping from './correctlyMapped/noArtistMapping.json' with { type: "json" }; +import normalizedValues from './correctlyMapped/normalizedName.json' with { type: "json" }; +import slightlyDifferentNames from './correctlyMapped/trackNameSlightlyDifferent.json' with { type: "json" }; // incorrect mappings -import incorrectMultiArtistsTrackName from './incorrectlyMapped/multiArtistsInTrackName.json'; -import veryWrong from './incorrectlyMapped/veryWrong.json'; +import incorrectMultiArtistsTrackName from './incorrectlyMapped/multiArtistsInTrackName.json' with { type: "json" }; +import veryWrong from './incorrectlyMapped/veryWrong.json' with { type: "json" }; interface LZTestFixture { data: ListenResponse diff --git a/src/backend/tests/plays/playParsing.test.ts b/src/backend/tests/plays/playParsing.test.ts new file mode 100644 index 00000000..39d5dd19 --- /dev/null +++ b/src/backend/tests/plays/playParsing.test.ts @@ -0,0 +1,114 @@ +import { loggerTest, loggerDebug, childLogger } from "@foxxmd/logging"; +import chai, { assert, expect } from 'chai'; +import asPromised from 'chai-as-promised'; +import { after, before, describe, it } from 'mocha'; + +import { asPlays, generateArtistsStr, generatePlay, normalizePlays } from "../utils/PlayTestUtils.js"; +import { parseArtistCredits, parseContextAwareStringList, parseCredits } from "../../utils/StringUtils.js"; + +describe('Parsing Artists from String', function() { + + it('Parses Artists from an Artist-like string', function () { + for(const i of Array(20)) { + const [str, primaries, secondaries] = generateArtistsStr(); + const credits = parseArtistCredits(str); + const allArtists = primaries.concat(secondaries); + const parsed = [credits.primary].concat(credits.secondary ?? []) + expect(primaries.concat(secondaries),` +'${str}' +Expected => ${allArtists.join(' || ')} +Found => ${parsed.join(' || ')}`) + +.eql(parsed) + } + }); + + it('Parses & as "local" joiner when other delimiters present', function () { + + const data = [{ + str: `Melendi \\ Ryan Lewis \\ The Righteous Brothers (featuring Joan Jett & The Blackhearts \\ Robin Schulz)`, + expected: ['Melendi', 'Ryan Lewis', 'The Righteous Brothers', 'Joan Jett & The Blackhearts', 'Robin Schulz'] + }, { + str: `Gigi D'Agostino \\ YOASOBI (vs Sam Hunt, Lisa Loeb & Booba)`, + expected: [`Gigi D'Agostino`, 'YOASOBI', 'Sam Hunt', 'Lisa Loeb', 'Booba'] + }]; + + for(const d of data) { + const credits = parseArtistCredits(d.str); + const parsed = [credits.primary].concat(credits.secondary ?? []) + expect(d.expected).eql(parsed) + } + + }); + + it('Only parses & as "global" joiner when no other delimiters present', function () { + + const data = [{ + str: `Melendi & Ryan Lewis & The Righteous Brothers (featuring The Blackhearts \\ Robin Schulz)`, + expected: ['Melendi', 'Ryan Lewis', 'The Righteous Brothers', 'The Blackhearts', 'Robin Schulz'] + }]; + + for(const d of data) { + const credits = parseArtistCredits(d.str); + const parsed = [credits.primary].concat(credits.secondary ?? []) + expect(d.expected).eql(parsed) + } + }); + + it('Parses secondary free regex', function () { + + const data = [{ + str: `Diddy & Grand Funk Railroad feat. Daya & (G)I-DLE`, + expected: ['Diddy', 'Grand Funk Railroad', 'Daya', '(G)I-DLE'] + }]; + + for(const d of data) { + const credits = parseArtistCredits(d.str); + const parsed = [credits.primary].concat(credits.secondary ?? []) + expect(d.expected).eql(parsed) + } + }); + + it('Parses singlar Artist with wrapped vs multiple', function () { + const [str, primaries, secondaries] = generateArtistsStr({primary: 1, secondary: {num: 2, ft: 'vs', joiner: '/', ftWrap: true}}); + const credits = parseArtistCredits(str); + const moreCredits = parseCredits(str); + expect(true).eq(true); + }); + + describe('When joiner is known', function () { + + it('Parses many primary artists', function () { + for(const i of Array(10)) { + const [str, primaries, secondaries] = generateArtistsStr({primary: {max: 3, joiner: '/'}, secondary: 0}); + const credits = parseArtistCredits(str, ['/']); + const allArtists = primaries.concat(secondaries); + const parsed = [credits.primary].concat(credits.secondary ?? []) + expect(primaries.concat(secondaries),` +'${str}' +Expected => ${allArtists.join(' || ')} +Found => ${parsed.join(' || ')}`) + .eql(parsed) + } + }); + + it('Parses many secondary artists', function () { + // fails on -- Peso Pluma / Lil Baby / R. Kelly (featuring TOMORROW X TOGETHER / AC/DC / DaVido) + for(const i of Array(10)) { + const [str, primaries, secondaries] = generateArtistsStr({primary: {max: 3, joiner: '/'}, secondary: {joiner: '/', finalJoiner: false}}); + const credits = parseArtistCredits(str, ['/']); + const allArtists = primaries.concat(secondaries); + const parsed = [credits.primary].concat(credits.secondary ?? []) + expect(primaries.concat(secondaries),` +'${str}' +Expected => ${allArtists.join(' || ')} +Found => ${parsed.join(' || ')}`) + .eql(parsed) + } + }); + + + }); + + +}); \ No newline at end of file diff --git a/src/backend/tests/utils/PlayTestUtils.ts b/src/backend/tests/utils/PlayTestUtils.ts index cf9f4eea..d00a65ed 100644 --- a/src/backend/tests/utils/PlayTestUtils.ts +++ b/src/backend/tests/utils/PlayTestUtils.ts @@ -5,9 +5,11 @@ import isBetween from "dayjs/plugin/isBetween.js"; import relativeTime from "dayjs/plugin/relativeTime.js"; import timezone from "dayjs/plugin/timezone.js"; import utc from "dayjs/plugin/utc.js"; -import { JsonPlayObject, ObjectPlayData, PlayMeta, PlayObject } from "../../../core/Atomic.js"; +import { FEAT, JOINERS, JOINERS_FINAL, JsonPlayObject, ObjectPlayData, PlayMeta, PlayObject } from "../../../core/Atomic.js"; import { sortByNewestPlayDate } from "../../utils.js"; import { NO_DEVICE, NO_USER, PlayerStateDataMaybePlay, PlayPlatformId, ReportedPlayerStatus } from '../../common/infrastructure/Atomic.js'; +import { arrayListAnd } from '../../../core/StringUtils.js'; +import { findDelimiters } from '../../utils/StringUtils.js'; dayjs.extend(utc) dayjs.extend(isBetween); @@ -170,3 +172,115 @@ export const generatePlay = (data: ObjectPlayData = {}, meta: PlayMeta = {}): Pl export const generatePlays = (numberOfPlays: number, data: ObjectPlayData = {}, meta: PlayMeta = {}): PlayObject[] => { return Array.from(Array(numberOfPlays), () => generatePlay(data, meta)); } + +export const generateArtist = () => faker.music.artist; + +export const generateArtists = (num?: number, max: number = 3, opts: {ambiguousJoinedNames?: boolean, trailingAmpersand?: boolean} = {}) => { + if(num === 0 || max === 0) { + return []; + } + let artists = faker.helpers.multiple(faker.music.artist, {count: {min: num ?? 1, max: num ?? max}}); + + const { + trailingAmpersand = false, + ambiguousJoinedNames = false + } = opts; + + if(!trailingAmpersand) { + // its really hard to parse an artist name that contains an '&' when it comes at the end of a list + // because its ambigious if the list is joining the list with & or if & is part of the artist name + // so by default don't generate these (we test for specific scenarios in playParsing.test.ts) + while(artists[artists.length - 1].includes('&')) { + artists = artists.slice(0, artists.length - 1).concat(faker.music.artist()); + } + } + if(!ambiguousJoinedNames) { + artists = artists.map(x => { + let a = x; + let foundDelims = findDelimiters(a); + while(foundDelims !== undefined && foundDelims.length > 0 && !(foundDelims.length === 1 && foundDelims[0] === '&')) { + a = faker.music.artist(); + foundDelims = findDelimiters(a); + } + return a; + }); + } + return artists; +} + +export interface ArtistGenerateOptions { + num?: number + max?: number + joiner?: string + finalJoiner?: false | string + spacedJoiners?: boolean +} + +export interface SecondaryArtistGenerateOptions extends ArtistGenerateOptions { + ft?: string + ftWrap?: boolean +} + +export interface CompoundArtistGenerateOptions { + primary?: number | ArtistGenerateOptions + secondary?: number | SecondaryArtistGenerateOptions +} + +export const generateArtistsStr = (options: CompoundArtistGenerateOptions = {}): [string, string[], string[]] => { + + const {primary = {}, secondary = {}} = options; + + const primaryOpts: ArtistGenerateOptions = typeof primary === 'number' ? {num: primary} : primary; + const secondaryOpts: SecondaryArtistGenerateOptions = typeof secondary === 'number' ? {num: secondary} : secondary; + + const primaryArt = generateArtists(primaryOpts.num, primaryOpts.max) + const secondaryArt = generateArtists(secondaryOpts.num, secondaryOpts.max); + + + const joinerPrimary: string = primaryOpts.joiner ?? faker.helpers.arrayElement(JOINERS); + let finalJoinerPrimary: string = joinerPrimary; + if(primaryOpts.finalJoiner !== false) { + if(primaryOpts.finalJoiner === undefined) { + if(joinerPrimary === ',' && !primaryArt.some(x => x.includes('&'))) { + finalJoinerPrimary = faker.helpers.arrayElement(JOINERS_FINAL); + } + + } else { + finalJoinerPrimary = primaryOpts.finalJoiner; + } + } + + const primaryStr = arrayListAnd(primaryArt, joinerPrimary, finalJoinerPrimary, primaryOpts.spacedJoiners); + + if(secondaryArt.length === 0) { + return [primaryStr, primaryArt, []]; + } + + const joinerSecondary: string = secondaryOpts.joiner ?? faker.helpers.arrayElement(JOINERS); + let finalJoinerSecondary: string = joinerSecondary; + if(secondaryOpts.finalJoiner !== false) { + if(secondaryOpts.finalJoiner === undefined) { + if(joinerSecondary === ',' && !secondaryArt.some(x => x.includes('&'))) { + finalJoinerSecondary = faker.helpers.arrayElement(JOINERS_FINAL); + } + } else { + finalJoinerSecondary = secondaryOpts.finalJoiner; + } + } + + const secondaryStr = arrayListAnd(secondaryArt, joinerSecondary, finalJoinerSecondary, secondaryOpts.spacedJoiners); + const ft = secondaryOpts.ft ?? faker.helpers.arrayElement(FEAT); + let sec = `${ft} ${secondaryStr}`; + let wrap: boolean; + if(secondaryOpts.ftWrap !== undefined) { + wrap = secondaryOpts.ftWrap; + } else { + wrap = faker.datatype.boolean(); + } + if(wrap) { + sec = `(${sec})`; + } + const artistStr = `${primaryStr} ${sec}`; + + return [artistStr, primaryArt, secondaryArt]; +} \ No newline at end of file diff --git a/src/backend/utils/StringUtils.ts b/src/backend/utils/StringUtils.ts index 16bac70b..c311ea96 100644 --- a/src/backend/utils/StringUtils.ts +++ b/src/backend/utils/StringUtils.ts @@ -1,7 +1,7 @@ import { strategies, stringSameness, StringSamenessResult } from "@foxxmd/string-sameness"; import { PlayObject } from "../../core/Atomic.js"; import { asPlayerStateData, DELIMITERS, PlayerStateDataMaybePlay } from "../common/infrastructure/Atomic.js"; -import { genGroupIdStr, getPlatformIdFromData, parseRegexSingleOrFail } from "../utils.js"; +import { genGroupIdStr, getPlatformIdFromData, intersect, parseRegexSingleOrFail } from "../utils.js"; import { buildTrackString } from "../../core/StringUtils.js"; const {levenStrategy, diceStrategy} = strategies; @@ -61,7 +61,7 @@ export const SECONDARY_CAPTURED_REGEX = new RegExp(/[([]\s*(?ft\.?\W|fea * !!!! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ******* * * */ -export const SECONDARY_FREE_REGEX = new RegExp(/^\s*(?ft\.?\W|feat\.?\W|featuring|vs\.?\W)\s*(?(?:.+?(?= - |\s*[([]))|(?:.*))(?.*)/i); +export const SECONDARY_FREE_REGEX = new RegExp(/^\s*(?ft\.?\W|feat\.?\W|featuring|vs\.?\W)\s*(?(?:.+?(?= - |\s*[([].+[)\]]$))|(?:.*))(?.*)/i); const SECONDARY_REGEX_STRATS: RegExp[] = [SECONDARY_CAPTURED_REGEX, SECONDARY_FREE_REGEX]; @@ -116,7 +116,7 @@ export const parseCredits = (str: string, delimiters?: boolean | string[]): Play for(const strat of SECONDARY_REGEX_STRATS) { const secCredits = parseRegexSingleOrFail(strat, results.named.secondary); if(secCredits !== undefined) { - secondary = parseStringList(secCredits.named.credits as string, delims) + secondary = parseContextAwareStringList(secCredits.named.credits as string, delims) suffix = secCredits.named.creditsSuffix; break; } @@ -148,7 +148,7 @@ export const parseArtistCredits = (str: string, delimiters?: boolean | string[]) if (withJoiner !== undefined) { // all this does is make sure and "ft" or parenthesis/brackets are separated -- // it doesn't also separate primary artists so do that now - const primaries = parseStringList(withJoiner.primary, delims); + const primaries = parseContextAwareStringList(withJoiner.primary, delims); if (primaries.length > 1) { return { primary: primaries[0], @@ -182,6 +182,50 @@ export const parseStringList = (str: string, delimiters: string[] = [',', '&', ' return explodedStrings.flat(1); }, [str]).map(x => x.trim()); } +export const parseContextAwareStringList = (str: string, delimiters: string[] = [',', '/', '\\'], opts: {ignoreGlobalAmpersand?: boolean} = {}): string[] => { + if (delimiters.length === 0) { + return [str]; + } + // bypass tokens using slashes without spaces + const cleanStr = bypassJoiners(str); + const nonAmpersandDelims = delimiters.some(x => cleanStr.includes(x)); + const shouldIgnoreGlobalAmpersand = opts.ignoreGlobalAmpersand ?? nonAmpersandDelims; + + let awareList: string[] = []; + + const list = parseStringList(cleanStr, nonAmpersandDelims === false && shouldIgnoreGlobalAmpersand === false ? ['&'] : delimiters); + if(shouldIgnoreGlobalAmpersand && list.length > 1 && list[list.length - 1].includes('&') && nonAmpersandDelims) { //&& !list[list.length - 1].includes('& the') + awareList = list.slice(0, list.length - 1).concat(list[list.length - 1].split('&') ); + } else { + awareList = list; + } + return awareList.map(x =>rejoinBypassed(x.trim())); +} + +const bypassJoinerMap = [ + { + rejoin: str => str.replaceAll(/(.*?\S)(\^\^\^)(\S.*?)/g, '$1/$3'), + bypass: str => str.replaceAll(/(.*?\S)(\/)(\S.*?)/g, '$1^^^$3') + }, + { + rejoin: str => str.replaceAll(/(.*)(###)(.*)/g, '$1\\$3'), + bypass: str => str.replaceAll(/(.*\S)(\\)(.*\S)/g, '$1###$3') + } +]; +export const bypassJoiners = (str: string): string => { + let bypassed: string = str; + for(const b of bypassJoinerMap) { + bypassed = b.bypass(bypassed) + } + return bypassed; +} +export const rejoinBypassed = (str: string): string => { + let bypassed: string = str; + for(const b of bypassJoinerMap) { + bypassed = b.rejoin(bypassed) + } + return bypassed; +} export const containsDelimiters = (str: string) => null !== str.match(/[,&/\\]+/i) export const findDelimiters = (str: string) => { const found: string[] = []; diff --git a/src/core/Atomic.ts b/src/core/Atomic.ts index 44ae219e..a4c85b43 100644 --- a/src/core/Atomic.ts +++ b/src/core/Atomic.ts @@ -275,4 +275,13 @@ export interface URLData { url: URL normal: string port: number -} \ No newline at end of file +} + +export type Joiner = ',' | '&' | '/' | '\\' | string; +export const JOINERS: Joiner[] = [',','/','\\']; + +export type FinalJoiners = '&'; +export const JOINERS_FINAL: FinalJoiners[] = ['&']; + +export type Feat = 'ft' | 'feat' | 'vs' | 'ft.' | 'feat.' | 'vs.' | 'featuring' +export const FEAT: Feat[] = ['ft','feat','vs','ft.','feat.','vs.','featuring']; \ No newline at end of file diff --git a/src/core/StringUtils.ts b/src/core/StringUtils.ts index ca958481..1aeeb233 100644 --- a/src/core/StringUtils.ts +++ b/src/core/StringUtils.ts @@ -196,3 +196,29 @@ export const combinePartsToString = (parts: any[], glue: string = '-'): string | } return undefined; } + +export const arrayListOxfordAnd = (list: string[], joiner: string, finalJoiner: string, spaced: boolean = true): string => { + if(list.length === 1) { + return list[0]; + } + const start = list.slice(0, list.length - 1); + const end = list.slice(list.length - 1); + + const joinerProper = joiner === ',' ? ', ' : (spaced ? ` ${joiner} ` : joiner); + const finalProper = spaced ? ` ${finalJoiner} ` : finalJoiner; + + return [start.join(joinerProper), end].join(joiner === ',' && spaced ? `,${finalProper}` : finalProper); +} + +export const arrayListAnd = (list: string[], joiner: string, finalJoiner: string, spaced: boolean = true): string => { + if(list.length === 1) { + return list[0]; + } + const start = list.slice(0, list.length - 1); + const end = list.slice(list.length - 1); + + const joinerProper = joiner === ',' ? ', ' : (spaced ? ` ${joiner} ` : joiner); + const finalProper = spaced ? ` ${finalJoiner} ` : finalJoiner; + + return [start.join(joinerProper), end].join(finalProper); +} \ No newline at end of file