From a4d17c1395c5942e96e980fdda1f1489874f59d2 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Sat, 17 Aug 2024 20:05:33 +0200 Subject: [PATCH] feat(ltData): fetch and extract structured data on Commons --- app/api/ltData.ts | 46 ++++++++++++++++++++++++--- app/model/index.ts | 11 +++++++ app/model/mediainfo.ts | 72 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 app/model/mediainfo.ts diff --git a/app/api/ltData.ts b/app/api/ltData.ts index 70bc165..17e6321 100644 --- a/app/api/ltData.ts +++ b/app/api/ltData.ts @@ -1,7 +1,8 @@ import deepmerge from 'deepmerge'; import getFilePath from 'wikimedia-commons-file-path'; -import {CommonsFile, CommonsTitle, LatLng} from '../model'; +import {CommonsFile, CommonsTitle, LatLng, WikidataProperty} from '../model'; +import {MediaInfo, Statement} from '../model/mediainfo.ts'; export const API_URL = 'https://commons.wikimedia.org/w/api.php'; const NS_FILE = 6; @@ -83,6 +84,7 @@ interface Revision { export interface Slots { main: MainSlot; + mediainfo: MainSlot; } export interface MainSlot { @@ -148,12 +150,13 @@ export interface FileDetails { author: string; timestamp: string; url?: string; - objectLocation: LatLng; + coordinates?: LatLng; + objectLocation?: LatLng; } export async function getFileDetails( pageid: number, - prop = 'categories|imageinfo|revisions', + prop = 'categories|imageinfo|revisions|wbentityusage', iiprop = 'url|extmetadata' ): Promise { const params = { @@ -163,7 +166,7 @@ export async function getFileDetails( iiextmetadatafilter: 'ImageDescription|Artist|DateTimeOriginal', iiextmetadatalanguage: document.body.parentElement!.lang, ...(prop.includes('categories') ? {clshow: '!hidden'} : {}), - ...(prop.includes('revisions') ? {rvslots: 'main', rvprop: 'content'} : {}) + ...(prop.includes('revisions') ? {rvslots: '*', rvprop: 'content'} : {}) }; const data = await $query>(params); const page: DetailsPage | undefined = data?.query?.pages?.[pageid]; @@ -177,9 +180,42 @@ export async function getFileDetails( author: extmetadata?.Artist?.value, timestamp: extmetadata?.DateTimeOriginal?.value, ...(iiprop.includes('url') ? {url: page?.imageinfo[0]?.descriptionurl} : {}), - objectLocation: extractObjectLocation(page) + objectLocation: extractObjectLocation(page), + ...extractMediaInfo(page) }; + function extractMediaInfo(page: DetailsPage): Partial { + const json = page?.revisions?.[0]?.slots?.mediainfo['*']; + if (!json) return {}; + const mediainfo: MediaInfo = JSON.parse(json); + const coordinates = extractMediaInfoLocation( + 'Location', + mediainfo.statements[WikidataProperty['Location']]?.[0] + ); + const objectLocation = extractMediaInfoLocation( + 'Object location', + mediainfo.statements[WikidataProperty['Object location']]?.[0] + ); + return { + ...(coordinates ? {coordinates} : {}), + ...(objectLocation ? {objectLocation} : {}) + }; + } + + function extractMediaInfoLocation( + type: LatLng['type'], + statement: Statement | undefined + ): LatLng | undefined { + if (statement?.mainsnak.datavalue?.type !== 'globecoordinate') { + return; + } + return new LatLng( + type, + statement?.mainsnak.datavalue.value.latitude, + statement?.mainsnak.datavalue.value.longitude + ); + } + function extractObjectLocation(page: DetailsPage) { try { const wikitext: string = page?.revisions?.[0]?.slots?.main['*'] || ''; diff --git a/app/model/index.ts b/app/model/index.ts index 614fc59..8e3cfa3 100644 --- a/app/model/index.ts +++ b/app/model/index.ts @@ -1,4 +1,5 @@ import {LatLng} from './LatLng'; + export {LatLng} from './LatLng'; export interface CommonsFile { @@ -7,9 +8,19 @@ export interface CommonsFile { url: string; coordinates: LatLng; objectLocation: LatLng; + imageUrl(width?: number): string; } export type CommonsTitle = string; export type User = string; + +export const WikidataProperty = { + // coordinate location (P625) + '*': 'P625', + // coordinates of the point of view (P1259) + Location: 'P1259', + // coordinates of depicted place (P9149) + 'Object location': 'P9149' +}; diff --git a/app/model/mediainfo.ts b/app/model/mediainfo.ts new file mode 100644 index 0000000..09d0dc5 --- /dev/null +++ b/app/model/mediainfo.ts @@ -0,0 +1,72 @@ +export interface MediaInfo { + type: 'mediainfo'; + id: string; + labels: unknown[]; + descriptions: unknown[]; + statements: Record; +} + +export interface Statement { + type: 'statement'; + mainsnak: Mainsnak; + id: string; + rank: string; + qualifiers: Record; +} + +export interface Mainsnak { + snaktype: 'value' | 'somevalue'; + property: string; + hash: string; + datavalue?: Datavalue; +} + +export type Datavalue = + | { + type: 'string'; + value: string; + } + | { + type: 'wikibase-entityid'; + value: EntityID; + } + | { + type: 'globecoordinate'; + value: GlobeCoordinate; + } + | { + type: 'quantity'; + value: Quanity; + } + | { + type: 'time'; + value: TimeValue; + }; + +export interface EntityID { + 'entity-type': string; + 'numeric-id': number; + id: string; +} + +export interface Quanity { + amount: string; + unit: string; +} + +export interface TimeValue { + time: string; + timezone: number; + before: number; + after: number; + precision: number; + calendarmodel: string; +} + +export interface GlobeCoordinate { + latitude: number; + longitude: number; + altitude: number | null; + precision: number; + globe: 'http://www.wikidata.org/entity/Q2'; +}