Skip to content

Commit c5edd81

Browse files
videos: submitter migration
1 parent aa4b3d4 commit c5edd81

File tree

26 files changed

+1098
-151
lines changed

26 files changed

+1098
-151
lines changed

cds_migrator_kit/rdm/users/api.py

Lines changed: 6 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -10,48 +10,24 @@
1010
import json
1111
from copy import deepcopy
1212

13-
from flask import current_app
13+
from cds_migrator_kit.users.api import MigrationUserAPI
14+
15+
from invenio_db import db
1416
from invenio_accounts.models import User, UserIdentity
1517
from invenio_cern_sync.sso import cern_remote_app_name
16-
from invenio_db import db
17-
from invenio_oauthclient.models import RemoteAccount
18-
from invenio_rdm_migrator.transform.base import Entry, Transform
19-
from invenio_userprofiles import UserProfile
2018
from psycopg.errors import UniqueViolation
2119
from sqlalchemy.exc import IntegrityError
2220

2321
from cds_migrator_kit.transform.dumper import CDSRecordDump
2422

2523

26-
class CDSMigrationUserAPI:
24+
25+
class CDSMigrationUserAPI(MigrationUserAPI):
2726
"""CDS missing user load class."""
2827

2928
def __init__(self, remote_account_client_id=None):
3029
"""Constructor."""
31-
self.client_id = current_app.config["CERN_APP_CREDENTIALS"]["consumer_key"]
32-
33-
def check_person_id_exists(self, person_id):
34-
"""Check if uer identity already exists."""
35-
return UserIdentity.query.filter_by(id=person_id).one_or_none()
36-
37-
def create_invenio_user(self, email, username):
38-
"""Commit new user in db."""
39-
try:
40-
user = User(email=email, username=username, active=False)
41-
db.session.add(user)
42-
db.session.commit()
43-
return user
44-
except IntegrityError as e:
45-
db.session.rollback()
46-
email_username = email.split("@")[0]
47-
user = User(
48-
email=email,
49-
username=f"duplicated_{username}_{email_username}",
50-
active=False,
51-
)
52-
db.session.add(user)
53-
db.session.commit()
54-
return user
30+
super().__init__(remote_account_client_id)
5531

5632
def create_invenio_user_identity(self, user_id, person_id):
5733
"""Return new user identity entry."""
@@ -72,40 +48,3 @@ def create_invenio_user_identity(self, user_id, person_id):
7248
db.session.commit()
7349
return user_identity
7450

75-
def create_invenio_user_profile(self, user, name):
76-
"""Return new user profile."""
77-
user_profile = UserProfile(user=user)
78-
user_profile.full_name = name
79-
return user_profile
80-
81-
def create_invenio_remote_account(self, user_id, extra_data=None):
82-
"""Return new user entry."""
83-
if extra_data is None:
84-
extra_data = {}
85-
return RemoteAccount.create(
86-
client_id=self.client_id, user_id=user_id, extra_data=extra_data
87-
)
88-
89-
def create_user(self, email, name, person_id, username, extra_data=None):
90-
"""Create an invenio user."""
91-
user = self.create_invenio_user(email, username)
92-
user_id = user.id
93-
profile_data = {}
94-
if person_id:
95-
identity = self.create_invenio_user_identity(user_id, person_id)
96-
db.session.add(identity)
97-
profile_data = {
98-
"person_id": person_id,
99-
}
100-
if name:
101-
if "department" in extra_data:
102-
profile_data.update({"department": extra_data["department"]})
103-
profile = deepcopy(user.user_profile)
104-
profile.update(profile_data)
105-
user.user_profile = profile
106-
db.session.add(user)
107-
108-
remote_account = self.create_invenio_remote_account(user_id, extra_data)
109-
db.session.add(remote_account)
110-
111-
return user

cds_migrator_kit/users/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2025 CERN.
4+
#
5+
# cds-migrator-kit is free software; you can redistribute it and/or modify it under
6+
# the terms of the MIT License; see LICENSE file for more details.
7+
8+
"""cds-migrator-kit users migration module."""

cds_migrator_kit/users/api.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2024-2025 CERN.
4+
#
5+
# Ccds-migrator-kit is free software; you can redistribute it and/or modify it under
6+
# the terms of the MIT License; see LICENSE file for more details.
7+
8+
"""cds-migrator-kit user api."""
9+
import csv
10+
import json
11+
from copy import deepcopy
12+
13+
from flask import current_app
14+
from invenio_accounts.models import User, UserIdentity
15+
from invenio_db import db
16+
from invenio_oauthclient.models import RemoteAccount
17+
from invenio_userprofiles import UserProfile
18+
from psycopg.errors import UniqueViolation
19+
from sqlalchemy.exc import IntegrityError
20+
21+
from cds_migrator_kit.transform.dumper import CDSRecordDump
22+
23+
cern_remote_app_name = "cern"
24+
25+
class MigrationUserAPI:
26+
"""CDS missing user load class."""
27+
28+
def __init__(self, remote_account_client_id=None):
29+
"""Constructor."""
30+
self.client_id = current_app.config["CERN_APP_CREDENTIALS"]["consumer_key"]
31+
32+
def check_person_id_exists(self, person_id):
33+
"""Check if uer identity already exists."""
34+
return UserIdentity.query.filter_by(id=person_id).one_or_none()
35+
36+
def create_invenio_user(self, email, username):
37+
"""Commit new user in db."""
38+
try:
39+
user = User(email=email, username=username, active=False)
40+
db.session.add(user)
41+
db.session.commit()
42+
return user
43+
except IntegrityError as e:
44+
db.session.rollback()
45+
email_username = email.split("@")[0]
46+
user = User(
47+
email=email,
48+
username=f"duplicated_{username}_{email_username}",
49+
active=False,
50+
)
51+
db.session.add(user)
52+
db.session.commit()
53+
return user
54+
55+
def create_invenio_user_identity(self, user_id, person_id):
56+
"""Return new user identity entry."""
57+
try:
58+
return UserIdentity(
59+
id=person_id,
60+
method=cern_remote_app_name,
61+
id_user=user_id,
62+
)
63+
except (IntegrityError, UniqueViolation) as e:
64+
db.session.rollback()
65+
user_identity = UserIdentity(
66+
id=f"duplicate{person_id}",
67+
method=cern_remote_app_name,
68+
id_user=user_id,
69+
)
70+
db.session.add(user_identity)
71+
db.session.commit()
72+
return user_identity
73+
74+
def create_invenio_user_profile(self, user, name):
75+
"""Return new user profile."""
76+
user_profile = UserProfile(user=user)
77+
user_profile.full_name = name
78+
return user_profile
79+
80+
def create_invenio_remote_account(self, user_id, extra_data=None):
81+
"""Return new user entry."""
82+
if extra_data is None:
83+
extra_data = {}
84+
return RemoteAccount.create(
85+
client_id=self.client_id, user_id=user_id, extra_data=extra_data
86+
)
87+
88+
def create_user(self, email, name, person_id, username, extra_data=None):
89+
"""Create an invenio user."""
90+
user = self.create_invenio_user(email, username)
91+
user_id = user.id
92+
profile_data = {}
93+
if person_id:
94+
identity = self.create_invenio_user_identity(user_id, person_id)
95+
db.session.add(identity)
96+
profile_data = {
97+
"person_id": person_id,
98+
}
99+
if name:
100+
if "department" in extra_data:
101+
profile_data.update({"department": extra_data["department"]})
102+
profile = deepcopy(user.user_profile)
103+
profile.update(profile_data)
104+
user.user_profile = profile
105+
db.session.add(user)
106+
107+
remote_account = self.create_invenio_remote_account(user_id, extra_data)
108+
db.session.add(remote_account)
109+
110+
return user

cds_migrator_kit/videos/README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Run the following command to dump a subset of records:
1010

1111
inveniomigrator dump records -q '8567_x:"Absolute master path" 8567_d:/mnt/master_share* -980__C:MIGRATED -980__c:DELETED -5831_a:digitized' --file-prefix lectures --chunk-size=1000
1212

13-
Place your dumps into the `cds_migrator_kit/videos/weblecture_migration/data/weblectures/dump/` folder, or update the `extract: / dirpath:` in `cds_migrator_kit/videos/weblecture_migration/streams.yaml`.
13+
Place your dumps into the `cds_migrator_kit/videos/weblecture_migration/data/weblectures/dump/` folder, or update the `records/weblectures/extract/dirpath:` in `cds_migrator_kit/videos/weblecture_migration/streams.yaml`.
1414

1515
For the files, modify the `migration_config.py` file located at `cds_migrator_kit/videos/migration_config.py`, specifically updating the following variables:
1616

@@ -22,6 +22,26 @@ For the files, modify the `migration_config.py` file located at `cds_migrator_ki
2222
- Composite videos will always be named as `<id-composite-...p-quality.mp4>`, and frames of the composite will be stored in `MOUNTED_MEDIA_CEPH_PATH/frames`.
2323
- If no composite exists (i.e., the master contains only one video), subformats and frames will be obtained using `data.v2.json`.
2424

25+
## Missing Users Migration
26+
27+
If you need to migrate missing users, you need `missing_users.json` file, you can find the file in the folder:` eos\...\CDS Videos\Projects\Weblectures migration\`. Place the file in `cds_migrator_kit/videos/weblecture_migration/data/users/` or update the `submitters/data_dir` in `cds_migrator_kit/videos/weblecture_migration/streams.yaml`. User migration is also using the same record dumps with record migration, so make sure you have your dumps in `cds_migrator_kit/videos/weblecture_migration/data/weblectures/dump/` folder.
28+
29+
To migrate the missing users, run:
30+
31+
.. code-block:: bash
32+
33+
invenio migration videos submitters run
34+
35+
## Creating System User
36+
37+
If you need to use a system user for the migration, you can configure the username by updating the `WEBLECTURES_MIGRATION_SYSTEM_USER` variable in the `migration_config.py` file.
38+
39+
To create the system user (if it doesn't already exist), run:
40+
41+
.. code-block:: bash
42+
43+
invenio migration videos submitters create-system-user
44+
2545
## Running the Migration
2646

2747
Once you have the dump and files, you can proceed with the migration.

cds_migrator_kit/videos/migration_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,5 @@ def _(x): # needed to avoid start time failure with lazy strings
5050

5151
# TODO CHANGE THEM
5252
MOUNTED_MEDIA_CEPH_PATH = "/cephfs/media_data"
53+
54+
WEBLECTURES_MIGRATION_SYSTEM_USER = "[email protected]"

cds_migrator_kit/videos/weblecture_migration/cli.py

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,24 @@
88
"""CDS-Videos command line module."""
99
import logging
1010
from pathlib import Path
11+
import re
1112

13+
from cds_migrator_kit.videos.weblecture_migration.users.api import CDSVideosMigrationUserAPI
1214
import click
1315
from flask import current_app
1416
from flask.cli import with_appcontext
17+
from invenio_accounts.models import User
18+
from sqlalchemy.exc import NoResultFound
1519

1620
from cds_migrator_kit.runner.runner import Runner
17-
from cds_migrator_kit.videos.weblecture_migration.streams import RecordStreamDefinition
1821
from cds_migrator_kit.videos.weblecture_migration.logger import VideosJsonLogger
22+
from cds_migrator_kit.videos.weblecture_migration.streams import RecordStreamDefinition
23+
from cds_migrator_kit.videos.weblecture_migration.users.runner import (
24+
VideosSubmitterRunner,
25+
)
26+
from cds_migrator_kit.videos.weblecture_migration.users.streams import (
27+
SubmitterStreamDefinition,
28+
)
1929

2030
cli_logger = logging.getLogger("migrator")
2131

@@ -40,13 +50,69 @@ def weblectures():
4050
@with_appcontext
4151
def run(dry_run=False):
4252
"""Run."""
43-
# TODO temporary, it'll change
44-
VideosJsonLogger.initialize(Path("cds_migrator_kit/videos/weblecture_migration/log"))
4553
stream_config = current_app.config["CDS_MIGRATOR_KIT_VIDEOS_STREAM_CONFIG"]
4654
runner = Runner(
4755
stream_definitions=[RecordStreamDefinition],
4856
config_filepath=Path(stream_config).absolute(),
4957
dry_run=dry_run,
5058
collection="weblectures",
5159
)
60+
VideosJsonLogger.initialize(runner.log_dir)
61+
runner.run()
62+
63+
64+
@videos.group()
65+
def submitters():
66+
"""Migration CLI for weblectures."""
67+
pass
68+
69+
70+
@submitters.command()
71+
@click.option(
72+
"--dry-run",
73+
is_flag=True,
74+
)
75+
@with_appcontext
76+
def run(dry_run=False):
77+
"""Migrate the users(submitters) if missing."""
78+
stream_config = current_app.config["CDS_MIGRATOR_KIT_VIDEOS_STREAM_CONFIG"]
79+
runner = VideosSubmitterRunner(
80+
stream_definition=SubmitterStreamDefinition,
81+
config_filepath=Path(stream_config).absolute(),
82+
dry_run=dry_run,
83+
)
5284
runner.run()
85+
86+
87+
@submitters.command()
88+
@with_appcontext
89+
def create_system_user():
90+
"""Create the sytem user if missing."""
91+
email = current_app.config["WEBLECTURES_MIGRATION_SYSTEM_USER"]
92+
93+
if not email:
94+
cli_logger.error("System user email is not configured.")
95+
return
96+
97+
try:
98+
user = User.query.filter_by(email=email).one()
99+
cli_logger.info(f"User {email} exists.")
100+
return
101+
except NoResultFound:
102+
username = email.split("@")[0].replace(".", "")
103+
username = re.sub(r"\W+", "", username).lower()
104+
extra_data = {"migration": {"note": "System user for migration"}}
105+
user_api = CDSVideosMigrationUserAPI()
106+
107+
try:
108+
user = user_api.create_user(
109+
email,
110+
name="Weblectures System User",
111+
username=username,
112+
person_id=None,
113+
extra_data=extra_data,
114+
)
115+
except Exception as exc:
116+
cli_logger.error(
117+
f"System user creation failed: {email}, {username}\n {exc}"
118+
)

0 commit comments

Comments
 (0)