|
9 | 9 |
|
10 | 10 | import datetime |
11 | 11 | import logging |
| 12 | +from pathlib import Path |
12 | 13 |
|
13 | 14 | from invenio_rdm_migrator.streams.records.transform import ( |
14 | 15 | RDMRecordEntry, |
@@ -84,7 +85,8 @@ def _pids(self, json_entry): |
84 | 85 |
|
85 | 86 | def _files(self, record_dump): |
86 | 87 | """Transform the files of a record.""" |
87 | | - files = record_dump.prepare_files() |
| 88 | + record_dump.prepare_files() |
| 89 | + files = record_dump.files |
88 | 90 | return {"enabled": True if files else False} |
89 | 91 |
|
90 | 92 | def _communities(self, json_entry): |
@@ -158,6 +160,11 @@ def transform(self, entry): |
158 | 160 | class CDSToRDMRecordTransform(RDMRecordTransform): |
159 | 161 | """CDSToRDMRecordTransform.""" |
160 | 162 |
|
| 163 | + def __init__(self, workers=None, throw=False, files_dump_dir=None): |
| 164 | + """Constructor.""" |
| 165 | + self.files_dump_dir = Path(files_dump_dir).absolute().as_posix() |
| 166 | + super().__init__(workers, throw) |
| 167 | + |
161 | 168 | def _community_id(self, entry, record): |
162 | 169 | communities = record.get("communities") |
163 | 170 | if communities: |
@@ -201,26 +208,64 @@ def _transform(self, entry): |
201 | 208 | } |
202 | 209 |
|
203 | 210 | def _record(self, entry): |
| 211 | + # could be in draft as well, depends on how we decide to publish |
204 | 212 | return CDSToRDMRecordEntry().transform(entry) |
205 | 213 |
|
206 | 214 | def _draft(self, entry): |
207 | 215 | return None |
208 | 216 |
|
209 | 217 | def _draft_files(self, entry): |
210 | | - return None |
| 218 | + """Point to temporary eos storage to import files from.""" |
| 219 | + _files = entry["files"] |
| 220 | + draft_files = [] |
| 221 | + legacy_path_root = Path("/opt/cdsweb/var/data/files/") |
| 222 | + tmp_eos_root = Path(self.files_dump_dir) |
| 223 | + |
| 224 | + for file in _files: |
| 225 | + full_path = Path(file["full_path"]) |
| 226 | + draft_files.append({ |
| 227 | + "eos_tmp_path": tmp_eos_root / full_path.relative_to(legacy_path_root), |
| 228 | + "key": file["full_name"], |
| 229 | + "metadata": {}, |
| 230 | + "mimetype": file["mime"], |
| 231 | + "checksum": file["checksum"] |
| 232 | + }) |
| 233 | + return draft_files |
211 | 234 |
|
212 | 235 | def _record_files(self, entry, record): |
213 | | - # files = entry["json"].get("_files", []) |
214 | | - # return [ |
215 | | - # { |
216 | | - # "key": f["key"], |
217 | | - # "object_version": { |
218 | | - # "file": { |
219 | | - # "size": f["size"], |
220 | | - # "checksum": f["checksum"], |
221 | | - # }, |
222 | | - # }, |
223 | | - # } |
224 | | - # for f in files |
225 | | - # ] |
| 236 | + """Record files entries transform.""" |
| 237 | + # TO implement if we decide not to go via draft publish |
226 | 238 | return [] |
| 239 | + |
| 240 | + # |
| 241 | + # |
| 242 | + # "files": [ |
| 243 | + # { |
| 244 | + # "comment": null, |
| 245 | + # "status": "firerole: allow group \"council-full [CERN]\"\ndeny until \"1996-02-01\"\nallow all", |
| 246 | + # "version": 1, |
| 247 | + # "encoding": null, |
| 248 | + # "creation_date": "2009-11-03T12:29:06+00:00", |
| 249 | + # "bibdocid": 502379, |
| 250 | + # "mime": "application/pdf", |
| 251 | + # "full_name": "CM-P00080632-e.pdf", |
| 252 | + # "superformat": ".pdf", |
| 253 | + # "recids_doctype": [[32097, "Main", "CM-P00080632-e.pdf"]], |
| 254 | + # "path": "/opt/cdsweb/var/data/files/g50/502379/CM-P00080632-e.pdf;1", |
| 255 | + # "size": 5033532, |
| 256 | + # "license": {}, |
| 257 | + # "modification_date": "2009-11-03T12:29:06+00:00", |
| 258 | + # "copyright": {}, |
| 259 | + # "url": "http://cds.cern.ch/record/32097/files/CM-P00080632-e.pdf", |
| 260 | + # "checksum": "ed797ce5d024dcff0040db79c3396da9", |
| 261 | + # "description": "English", |
| 262 | + # "format": ".pdf", |
| 263 | + # "name": "CM-P00080632-e", |
| 264 | + # "subformat": "", |
| 265 | + # "etag": "\"502379.pdf1\"", |
| 266 | + # "recid": 32097, |
| 267 | + # "flags": [], |
| 268 | + # "hidden": false, |
| 269 | + # "type": "Main", |
| 270 | + # "full_path": "/opt/cdsweb/var/data/files/g50/502379/CM-P00080632-e.pdf;1" |
| 271 | + # },] |
0 commit comments