From 38dc63b8c19008b46d61383ca8655dd4b8b48c4e Mon Sep 17 00:00:00 2001 From: Chris MacDonald <31731869+chrismacdonaldw@users.noreply.github.com> Date: Thu, 22 Aug 2024 14:44:19 -0300 Subject: [PATCH 1/4] Refactor AssembleDate plugin to handle multiple single dates --- .../migrations/dgis_nodes.yml | 103 ++++++++++++++++-- src/Plugin/migrate/process/AssembleDate.php | 30 +++-- 2 files changed, 115 insertions(+), 18 deletions(-) diff --git a/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml b/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml index 1529ad8..daacb8b 100644 --- a/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml +++ b/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml @@ -55,6 +55,9 @@ destination: #translations: true validate: &validate true process: + _unspecified_term: + - plugin: default_value + default_value: 'Unspecified' _node_foxml_parsed: - plugin: dgi_migrate.load_entity source: fid @@ -236,6 +239,9 @@ process: - '@_sequence_number' - '@_constituent_sequence' - plugin: null_coalesce + # Prevent leading zeroes from crashing the migration. + - plugin: callback + callable: intval - plugin: skip_on_empty method: process _mods_xpath: @@ -1724,13 +1730,15 @@ process: default_value: genre <<: *generic_term_after - <<: *generic_term_extract - _resource_type: + _resource_type_query: - << : *base_mods_node query: 'mods:typeOfResource' - plugin: callback callable: iterator_to_array - plugin: skip_on_empty method: process + _resource_type: + - << : *_resource_type_query - plugin: multiple_values - plugin: dgi_migrate.sub_process process_values: true @@ -1741,8 +1749,9 @@ process: <<: *generic_term_after - <<: *generic_term_extract _unspecified_resource_type: - - plugin: default_value - default_value: Unspecified + - <<: *_resource_type_query + - plugin: get + source: '@_unspecified_term' - plugin: gate use_as_key: '@_resource_type' valid_keys: @@ -1998,7 +2007,7 @@ process: validate: *validate field_use_and_reproduction: - << : *base_mods_node - query: 'mods:accessCondition[@type="use and reproduction"]' + query: 'mods:accessCondition[@type="use and reproduction" or @type="Use and Reproduction"]' - plugin: callback callable: iterator_to_array - plugin: multiple_values @@ -2036,13 +2045,15 @@ process: default_value: degree_names <<: *generic_term_after - <<: *generic_term_extract - _degree_level: + _degree_level_query: - <<: *base_mods_node query: 'mods:extension/etd:degree/etd:level' - plugin: callback callable: iterator_to_array - plugin: skip_on_empty method: process + _degree_level: + - <<: *_degree_level_query - plugin: multiple_values - plugin: dgi_migrate.sub_process process_values: true @@ -2053,8 +2064,9 @@ process: <<: *generic_term_after - <<: *generic_term_extract _unspecified_degree_level: - - plugin: default_value - default_value: Unspecified + - <<: *_degree_level_query + - plugin: get + source: '@_unspecified_term' - plugin: gate use_as_key: '@_degree_level' valid_keys: @@ -2533,13 +2545,15 @@ process: - plugin: single_value - plugin: callback callable: array_filter - _use_license: + _use_license_query: - <<: *base_mods_node - query: 'mods:accessCondition[@type="use and reproduction"][@displayLabel="Creative Commons" or @displayLabe="Creative Commons"]' + query: 'mods:accessCondition[@type="use and reproduction" or @type="Use and Reproduction"][@displayLabel="Creative Commons" or @displayLabe="Creative Commons"]' - plugin: callback callable: iterator_to_array - plugin: skip_on_empty method: process + _use_license: + - <<: *_use_license_query - plugin: multiple_values - plugin: dgi_migrate.sub_process process_values: true @@ -2550,8 +2564,9 @@ process: <<: *generic_term_after - <<: *generic_term_extract _unspecified_use_license: - - plugin: default_value - default_value: Unspecified + - <<: *_use_license_query + - plugin: get + source: '@_unspecified_term' - plugin: gate use_as_key: '@_use_license' valid_keys: @@ -2592,6 +2607,72 @@ process: - '@_use_license' - '@_unspecified_use_license' - plugin: flatten + _rights_statement_query: + - <<: *base_mods_node + query: 'mods:accessCondition[@type="rights statement"]' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + _rights_statement: + - <<: *_rights_statement_query + - plugin: multiple_values + - plugin: dgi_migrate.sub_process + process_values: true + values: + _vid: + - plugin: default_value + default_value: rights_statements + <<: *generic_term_after + - <<: *generic_term_extract + - plugin: single_value + - plugin: callback + callable: array_filter + - plugin: null_coalesce + _unspecified_rights_statement: + - <<: *_rights_statement_query + - plugin: get + source: '@_unspecified_term' + - plugin: gate + use_as_key: '@_rights_statement' + valid_keys: + - null + key_direction: unlock + - plugin: dgi_migrate.sub_process + process_values: true + values: + _vid: + - plugin: default_value + default_value: rights_statements + _auth_value_uri: + - plugin: default_value + default_value: '' + _auth_source: + - plugin: default_value + default_value: '' + _value: + - plugin: get + source: parent_value + actual: + - plugin: get + source: + - '@_auth_source' + - '@_auth_value_uri' + - '@_value' + # XXX: Needs to be provided wherever this is used, corresponding + # to the vocab in which to do the things. + - '@_vid' + - plugin: flatten + - plugin: migration_lookup + migration: dgis_stub_terms_generic + stub_id: dgis_stub_terms_generic + - <<: *generic_term_extract + field_rights_statement: + - plugin: get + source: + - '@_rights_statement' + - '@_unspecified_rights_statement' + - plugin: null_coalesce nid: - plugin: migration_lookup source: '@field_pid' diff --git a/src/Plugin/migrate/process/AssembleDate.php b/src/Plugin/migrate/process/AssembleDate.php index 86a6078..17126f0 100644 --- a/src/Plugin/migrate/process/AssembleDate.php +++ b/src/Plugin/migrate/process/AssembleDate.php @@ -18,7 +18,7 @@ * The output is handled thus: * - If a range_start, or a range_end, or both, are provided and not empty, an * EDTF-style date range will be assembled, and any results from single_date - * will be ignored. + * will be combined. * - If neither a range_start nor a range_end are provided or are empty, but the * single_date is provided and has a value, it is returned. * - If no provided property has a value, null will be returned. @@ -37,7 +37,8 @@ * @code * process: * - plugin: dgi_migrate.process.assemble_date - * single_date: 2001-01-01 + * single_date: + * - 2001-01-01 * range_start: 2002-02-02 * range_end: 2003-03-03 * indicate_open: false @@ -96,7 +97,7 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition 'range_start' => $this->configuration['range_start'] ?? NULL, 'range_end' => $this->configuration['range_end'] ?? NULL, ]; - if (!array_filter($this->dates)) { + if (empty($this->dates['single_date']) && !$this->dates['range_start'] && !$this->dates['range_end']) { throw new MigrateException('Plugin dgi_migrate.process.assemble_date requires at least one of the three properties, "single_date", "range_start", or "range_end" to be provided.'); } $indicate_open = $this->configuration['indicate_open'] ?? FALSE; @@ -108,11 +109,26 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition * {@inheritdoc} */ public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) { - $return_value = $this->getDateRange($value, $migrate_executable, $row); - if (!$return_value) { - $return_value = $this->getValues ? $row->get($this->dates['single_date']) : $this->dates['single_date']; + $return_dates = []; + + $date_range = $this->getDateRange($value, $migrate_executable, $row); + if ($date_range !== NULL) { + $return_dates[] = $date_range; + } + + // Get single dates and add them to return_dates + $single_dates = $this->getValues ? $row->get($this->dates['single_date']) : $this->dates['single_date']; + if (is_array($single_dates)) { + $return_dates = array_merge($return_dates, $single_dates); + } elseif ($single_dates !== NULL) { + $return_dates[] = $single_dates; } - return $return_value; + + if (count($return_dates) === 1) { + return $return_dates[0]; + } + + return $return_dates; } /** From c7b9a52f7b2c77f0cf6b49ddf92e3364d7f002cd Mon Sep 17 00:00:00 2001 From: Chris MacDonald <31731869+chrismacdonaldw@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:43:25 -0300 Subject: [PATCH 2/4] Refactor queries to not use anchors --- .../migrations/dgis_nodes.yml | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml b/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml index daacb8b..ed09902 100644 --- a/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml +++ b/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml @@ -1735,10 +1735,11 @@ process: query: 'mods:typeOfResource' - plugin: callback callable: iterator_to_array + _resource_type: + - plugin: get + source: '@_resource_type_query' - plugin: skip_on_empty method: process - _resource_type: - - << : *_resource_type_query - plugin: multiple_values - plugin: dgi_migrate.sub_process process_values: true @@ -1749,7 +1750,10 @@ process: <<: *generic_term_after - <<: *generic_term_extract _unspecified_resource_type: - - <<: *_resource_type_query + - plugin: get + source: '@_resource_type_query' + - plugin: skip_on_empty + method: process - plugin: get source: '@_unspecified_term' - plugin: gate @@ -2050,10 +2054,11 @@ process: query: 'mods:extension/etd:degree/etd:level' - plugin: callback callable: iterator_to_array + _degree_level: + - plugin: get + source: '@_degree_level_query' - plugin: skip_on_empty method: process - _degree_level: - - <<: *_degree_level_query - plugin: multiple_values - plugin: dgi_migrate.sub_process process_values: true @@ -2064,7 +2069,10 @@ process: <<: *generic_term_after - <<: *generic_term_extract _unspecified_degree_level: - - <<: *_degree_level_query + - plugin: get + source: '@_degree_level_query' + - plugin: skip_on_empty + method: process - plugin: get source: '@_unspecified_term' - plugin: gate @@ -2550,10 +2558,11 @@ process: query: 'mods:accessCondition[@type="use and reproduction" or @type="Use and Reproduction"][@displayLabel="Creative Commons" or @displayLabe="Creative Commons"]' - plugin: callback callable: iterator_to_array + _use_license: + - plugin: get + source: '@_use_license_query' - plugin: skip_on_empty method: process - _use_license: - - <<: *_use_license_query - plugin: multiple_values - plugin: dgi_migrate.sub_process process_values: true @@ -2564,7 +2573,10 @@ process: <<: *generic_term_after - <<: *generic_term_extract _unspecified_use_license: - - <<: *_use_license_query + - plugin: get + source: '@_use_license_query' + - plugin: skip_on_empty + method: process - plugin: get source: '@_unspecified_term' - plugin: gate @@ -2612,10 +2624,11 @@ process: query: 'mods:accessCondition[@type="rights statement"]' - plugin: callback callable: iterator_to_array + _rights_statement: + - plugin: get + source: '@_rights_statement_query' - plugin: skip_on_empty method: process - _rights_statement: - - <<: *_rights_statement_query - plugin: multiple_values - plugin: dgi_migrate.sub_process process_values: true @@ -2630,7 +2643,10 @@ process: callable: array_filter - plugin: null_coalesce _unspecified_rights_statement: - - <<: *_rights_statement_query + - plugin: get + source: '@_rights_statement_query' + - plugin: skip_on_empty + method: process - plugin: get source: '@_unspecified_term' - plugin: gate From 666581e4b397285327e0c5b2d6211cdb23a134d7 Mon Sep 17 00:00:00 2001 From: Chris MacDonald <31731869+chrismacdonaldw@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:58:12 -0300 Subject: [PATCH 3/4] Adjust for sniffer errors --- src/Plugin/migrate/process/AssembleDate.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Plugin/migrate/process/AssembleDate.php b/src/Plugin/migrate/process/AssembleDate.php index 17126f0..8b7becb 100644 --- a/src/Plugin/migrate/process/AssembleDate.php +++ b/src/Plugin/migrate/process/AssembleDate.php @@ -116,11 +116,12 @@ public function transform($value, MigrateExecutableInterface $migrate_executable $return_dates[] = $date_range; } - // Get single dates and add them to return_dates + // Get single dates and add them to return_dates. $single_dates = $this->getValues ? $row->get($this->dates['single_date']) : $this->dates['single_date']; if (is_array($single_dates)) { $return_dates = array_merge($return_dates, $single_dates); - } elseif ($single_dates !== NULL) { + } + elseif ($single_dates !== NULL) { $return_dates[] = $single_dates; } From 95771391c383388fb0baa54db5f25a7517b17c34 Mon Sep 17 00:00:00 2001 From: Chris MacDonald <31731869+chrismacdonaldw@users.noreply.github.com> Date: Mon, 26 Aug 2024 08:54:06 -0300 Subject: [PATCH 4/4] Update according to Adam's feedback. --- src/Plugin/migrate/process/AssembleDate.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Plugin/migrate/process/AssembleDate.php b/src/Plugin/migrate/process/AssembleDate.php index 8b7becb..ee80600 100644 --- a/src/Plugin/migrate/process/AssembleDate.php +++ b/src/Plugin/migrate/process/AssembleDate.php @@ -97,7 +97,7 @@ public function __construct(array $configuration, $plugin_id, $plugin_definition 'range_start' => $this->configuration['range_start'] ?? NULL, 'range_end' => $this->configuration['range_end'] ?? NULL, ]; - if (empty($this->dates['single_date']) && !$this->dates['range_start'] && !$this->dates['range_end']) { + if (!array_filter($this->dates)) { throw new MigrateException('Plugin dgi_migrate.process.assemble_date requires at least one of the three properties, "single_date", "range_start", or "range_end" to be provided.'); } $indicate_open = $this->configuration['indicate_open'] ?? FALSE; @@ -126,7 +126,7 @@ public function transform($value, MigrateExecutableInterface $migrate_executable } if (count($return_dates) === 1) { - return $return_dates[0]; + return reset($return_dates); } return $return_dates;