Skip to content

Commit bbc26ce

Browse files
authored
[FINNA-3498] MARC: Add support for ignoring authors by relator/role. (#177)
1 parent b5a4812 commit bbc26ce

File tree

4 files changed

+263
-21
lines changed

4 files changed

+263
-21
lines changed

conf/recordmanager.ini.sample

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,11 @@ disable_proxy_hosts[] = "::1"
260260
; MARCXML - MARCXML
261261
; ISO2709 - ISO 2709 with MARCXML for too long records
262262
;solr_serialization = JSON
263-
; The following section can be used to override the default linking id fields.
263+
; The following setting can be used to override the default linking id fields.
264264
; Fields included will be transformed into either id or datasource.id format if applicable
265265
;linking_id_fields = "760,762,765,767,770,772,773,774,775,776,777,780,785,786,787"
266+
; The following relators/roles are completely hidden from authors.
267+
;hidden_author_relators = "publisher,distributor"
266268

267269
[ForwardRecord]
268270
; The following settings can be used to override the default relator codes that

src/RecordManager/Base/Record/Marc.php

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* PHP version 8
77
*
8-
* Copyright (C) The National Library of Finland 2011-2023.
8+
* Copyright (C) The National Library of Finland 2011-2025.
99
*
1010
* This program is free software; you can redistribute it and/or modify
1111
* it under the terms of the GNU General Public License version 2,
@@ -70,6 +70,13 @@ class Marc extends AbstractRecord
7070
'adp', 'aut', 'cmp', 'cre', 'dub', 'inv',
7171
];
7272

73+
/**
74+
* Author relators that are not considered for indexing
75+
*
76+
* @var array
77+
*/
78+
protected $hiddenAuthorRelators = [];
79+
7380
/**
7481
* Strings in field 300 that signify that the work is illustrated.
7582
*
@@ -214,19 +221,14 @@ public function __construct(
214221
$this->createRecordCallback = $recordCallback;
215222
$this->formatCalculator = $formatCalculator;
216223

217-
if (isset($config['MarcRecord']['primary_author_relators'])) {
218-
$this->primaryAuthorRelators = explode(
219-
',',
220-
$config['MarcRecord']['primary_author_relators']
221-
);
224+
if ($relators = $config['MarcRecord']['primary_author_relators'] ?? null) {
225+
$this->primaryAuthorRelators = explode(',', $relators);
222226
}
223-
if (isset($config['MarcRecord']['linking_id_fields'])) {
224-
$this->linkingIdFields = array_filter(
225-
explode(
226-
',',
227-
$config['MarcRecord']['linking_id_fields']
228-
)
229-
);
227+
if ($relators = $config['MarcRecord']['hidden_author_relators'] ?? null) {
228+
$this->hiddenAuthorRelators = explode(',', $relators);
229+
}
230+
if (null !== ($fields = $config['MarcRecord']['linking_id_fields'] ?? null)) {
231+
$this->linkingIdFields = array_filter(explode(',', $fields));
230232
}
231233
}
232234

@@ -1210,6 +1212,14 @@ public function getWorkIdentificationData()
12101212
foreach ($authorFields as $tag => $subfields) {
12111213
$tag = (string)$tag;
12121214
foreach ($this->record->getFields($tag) as $field) {
1215+
$fieldRelators = $this->normalizeRelators(
1216+
$this->getSubfieldsArray($field, ['4', 'e'])
1217+
);
1218+
1219+
if ($this->hiddenAuthorRelators && array_intersect($this->hiddenAuthorRelators, $fieldRelators)) {
1220+
continue;
1221+
}
1222+
12131223
// Check for analytical entries to be processed later:
12141224
if (
12151225
in_array($tag, ['700', '710', '711'])
@@ -2270,6 +2280,10 @@ protected function getAuthorsByRelator(
22702280
$this->getSubfieldsArray($field, ['4', 'e'])
22712281
);
22722282

2283+
if ($this->hiddenAuthorRelators && array_intersect($this->hiddenAuthorRelators, $fieldRelators)) {
2284+
continue;
2285+
}
2286+
22732287
$match = empty($relators);
22742288
if (!$match) {
22752289
$match = empty($fieldRelators)

tests/RecordManagerTest/Base/Record/MarcTest.php

Lines changed: 229 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* PHP version 8
77
*
8-
* Copyright (C) The National Library of Finland 2020-2023.
8+
* Copyright (C) The National Library of Finland 2020-2025.
99
*
1010
* This program is free software; you can redistribute it and/or modify
1111
* it under the terms of the GNU General Public License version 2,
@@ -98,6 +98,8 @@ public function testMarc1()
9898
'tutkimus',
9999
'Remes, Pirkko',
100100
'Sajavaara, Paula',
101+
'Example Distributor',
102+
'distributor',
101103
'urn:doi:doi2',
102104
'urn:doif:not-doi',
103105
'http://doi.org/doi%3a3',
@@ -130,8 +132,12 @@ public function testMarc1()
130132
'',
131133
'',
132134
],
133-
'author_corporate' => [],
134-
'author_corporate_role' => [],
135+
'author_corporate' => [
136+
'Example Distributor',
137+
],
138+
'author_corporate_role' => [
139+
'distributor',
140+
],
135141
'author_additional' => [],
136142
'title' => 'Tutki ja kirjoita',
137143
'title_sub' => '',
@@ -232,6 +238,10 @@ public function testMarc1()
232238
'type' => 'author',
233239
'value' => 'Sajavaara, Paula.',
234240
],
241+
[
242+
'type' => 'author',
243+
'value' => 'Example Distributor',
244+
],
235245
],
236246
'authorsAltScript' => [],
237247
'titles' => [
@@ -803,13 +813,13 @@ public static function getTestMarcLinkingData(): Generator
803813
*
804814
* @param array $firstExpects First expected links
805815
* @param array $secondExpects Second expected links
806-
* @param array $conf Main configuration
816+
* @param array $config Main configuration
807817
* @param int $searchCount Record db search expect
808818
*
809819
* @return void
810820
* @dataProvider getTestMarcLinkingData
811821
*/
812-
public function testMarcLinking(array $firstExpects, array $secondExpects, array $conf, int $searchCount = 5)
822+
public function testMarcLinking(array $firstExpects, array $secondExpects, array $config, int $searchCount = 5)
813823
{
814824
$db = $this->createMock(Database::class);
815825
$map = [
@@ -855,7 +865,7 @@ public function testMarcLinking(array $firstExpects, array $secondExpects, array
855865
->method('findRecord')
856866
->will($this->returnValueMap($map));
857867

858-
$record = $this->createMarcRecord(Marc::class, 'marc_links.xml', config: $conf);
868+
$record = $this->createMarcRecord(Marc::class, 'marc_links.xml', config: $config);
859869
$record->toSolrArray($db);
860870
$marc = new \VuFind\Marc\MarcReader($record->serialize());
861871
$marc776 = $marc->getFields('776');
@@ -873,7 +883,7 @@ public function testMarcLinking(array $firstExpects, array $secondExpects, array
873883
'driverParams' => ['003InLinkingID=true'],
874884
],
875885
],
876-
config: $conf
886+
config: $config
877887
);
878888
$record->toSolrArray($db);
879889
$marc = new \VuFind\Marc\MarcReader($record->serialize());
@@ -992,4 +1002,216 @@ public function testGetWorkIdentificationData()
9921002
];
9931003
$this->compareArray($expected, $keys, 'getWorkIdentificationData');
9941004
}
1005+
1006+
/**
1007+
* Test hidden author relator
1008+
*
1009+
* @return void
1010+
*/
1011+
public function testHiddenRelator()
1012+
{
1013+
$config = [
1014+
'MarcRecord' => [
1015+
'hidden_author_relators' => 'distributor',
1016+
],
1017+
];
1018+
$record = $this->createMarcRecord(Marc::class, 'marc1.xml', config: $config);
1019+
$fields = $record->toSolrArray();
1020+
unset($fields['fullrecord']);
1021+
1022+
$expected = [
1023+
'record_format' => 'marc',
1024+
'building' => [
1025+
'150',
1026+
'150',
1027+
],
1028+
'lccn' => '',
1029+
'ctrlnum' => [
1030+
'FCC005246184',
1031+
'378890',
1032+
'401416',
1033+
],
1034+
'allfields' => [
1035+
'Hirsjärvi, Sirkka',
1036+
'Tutki ja kirjoita',
1037+
'Sirkka Hirsjärvi, Pirkko Remes, Paula Sajavaara',
1038+
'17. uud. p.',
1039+
'Helsinki',
1040+
'Tammi',
1041+
'2345 [2013?]',
1042+
'teksti',
1043+
'txt',
1044+
'rdacontent',
1045+
'käytettävissä ilman laitetta',
1046+
'n',
1047+
'rdamedia',
1048+
'nide',
1049+
'nc',
1050+
'rdacarrier',
1051+
'18. p. 2013',
1052+
'Summary field',
1053+
'oppaat',
1054+
'ft: kirjoittaminen',
1055+
'apurahat',
1056+
'tutkimusrahoitus',
1057+
'tutkimuspolitiikka',
1058+
'opinnäytteet',
1059+
'tiedonhaku',
1060+
'kielioppaat',
1061+
'tutkimustyö',
1062+
'tutkimus',
1063+
'Remes, Pirkko',
1064+
'Sajavaara, Paula',
1065+
'Example Distributor',
1066+
'distributor',
1067+
'urn:doi:doi2',
1068+
'urn:doif:not-doi',
1069+
'http://doi.org/doi%3a3',
1070+
'https://dx.doi.org/doi4',
1071+
],
1072+
'language' => [
1073+
'fin',
1074+
'fin',
1075+
],
1076+
'format' => ['Book'],
1077+
'author' => [
1078+
'Hirsjärvi, Sirkka',
1079+
],
1080+
'author_variant' => [
1081+
's h sh',
1082+
],
1083+
'author_role' => [
1084+
'',
1085+
],
1086+
'author_sort' => 'Hirsjärvi, Sirkka',
1087+
'author2' => [
1088+
'Remes, Pirkko',
1089+
'Sajavaara, Paula',
1090+
],
1091+
'author2_variant' => [
1092+
'p r pr',
1093+
'p s ps',
1094+
],
1095+
'author2_role' => [
1096+
'',
1097+
'',
1098+
],
1099+
'author_corporate' => [],
1100+
'author_corporate_role' => [],
1101+
'author_additional' => [],
1102+
'title' => 'Tutki ja kirjoita',
1103+
'title_sub' => '',
1104+
'title_short' => 'Tutki ja kirjoita',
1105+
'title_full' => 'Tutki ja kirjoita / Sirkka Hirsjärvi, Pirkko Remes,'
1106+
. ' Paula Sajavaara',
1107+
'title_alt' => [],
1108+
'title_old' => [],
1109+
'title_new' => [],
1110+
'title_sort' => 'tutki ja kirjoita sirkka hirsjärvi pirkko remes'
1111+
. ' paula sajavaara',
1112+
'series' => [],
1113+
'publisher' => [
1114+
'Tammi',
1115+
],
1116+
'publishDateSort' => '2013',
1117+
'publishDate' => [
1118+
'2013',
1119+
],
1120+
'physical' => [],
1121+
'dateSpan' => [],
1122+
'edition' => '17. uud. p.',
1123+
'contents' => [],
1124+
'isbn' => [
1125+
'9789513148362',
1126+
],
1127+
'issn' => [],
1128+
'doi_str_mv' => [
1129+
'doi1',
1130+
'doi2',
1131+
'doi:3',
1132+
'doi4',
1133+
],
1134+
'callnumber-first' => 'QC861.2',
1135+
'callnumber-raw' => [
1136+
'38.04',
1137+
'38.03',
1138+
'QC861.2 .B36',
1139+
],
1140+
'callnumber-subject' => 'QC',
1141+
'callnumber-label' => 'QC861',
1142+
'callnumber-sort' => 'QC 3861.2 B236',
1143+
'topic' => [
1144+
'oppaat',
1145+
'ft: kirjoittaminen',
1146+
'apurahat',
1147+
'tutkimusrahoitus',
1148+
'tutkimuspolitiikka',
1149+
'opinnäytteet',
1150+
'tiedonhaku',
1151+
'kielioppaat',
1152+
'tutkimustyö',
1153+
'tutkimus',
1154+
],
1155+
'genre' => [],
1156+
'geographic' => [],
1157+
'era' => [],
1158+
'topic_facet' => [
1159+
'oppaat',
1160+
'ft: kirjoittaminen',
1161+
'apurahat',
1162+
'tutkimusrahoitus',
1163+
'tutkimuspolitiikka',
1164+
'opinnäytteet',
1165+
'tiedonhaku',
1166+
'kielioppaat',
1167+
'tutkimustyö',
1168+
'tutkimus',
1169+
],
1170+
'genre_facet' => [],
1171+
'geographic_facet' => [],
1172+
'era_facet' => [],
1173+
'url' => [
1174+
'urn:doi:doi2',
1175+
'urn:doif:not-doi',
1176+
'http://doi.org/doi%3a3',
1177+
'https://dx.doi.org/doi4',
1178+
],
1179+
'illustrated' => 'Not Illustrated',
1180+
];
1181+
1182+
$this->compareArray($expected, $fields, 'toSolrArray');
1183+
1184+
$keys = $record->getWorkIdentificationData();
1185+
1186+
$expected = [
1187+
[
1188+
'authors' => [
1189+
[
1190+
'type' => 'author',
1191+
'value' => 'Hirsjärvi, Sirkka.',
1192+
],
1193+
[
1194+
'type' => 'author',
1195+
'value' => 'Remes, Pirkko.',
1196+
],
1197+
[
1198+
'type' => 'author',
1199+
'value' => 'Sajavaara, Paula.',
1200+
],
1201+
],
1202+
'authorsAltScript' => [],
1203+
'titles' => [
1204+
[
1205+
'type' => 'title',
1206+
'value' => 'Tutki ja kirjoita /',
1207+
],
1208+
],
1209+
'titlesAltScript' => [],
1210+
],
1211+
];
1212+
1213+
$this->compareArray($expected, $keys, 'getWorkIdentificationData');
1214+
1215+
$this->assertEquals(['(FOO)2345'], $record->getUniqueIDs());
1216+
}
9951217
}

tests/fixtures/Base/record/marc1.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@
125125
<subfield code="a">Sajavaara, Paula.</subfield>
126126
<subfield code="0">(TEST)3</subfield>
127127
</datafield>
128+
<datafield tag="710" ind1="1" ind2=" ">
129+
<subfield code="a">Example Distributor</subfield>
130+
<subfield code="e">distributor</subfield>
131+
</datafield>
128132
<datafield tag="852" ind1="8" ind2=" ">
129133
<subfield code="a">E</subfield>
130134
<subfield code="b">150</subfield>

0 commit comments

Comments
 (0)