Skip to content

Commit

Permalink
sso: do not expect the person_id for external accounts
Browse files Browse the repository at this point in the history
  • Loading branch information
ntarocco committed Sep 27, 2024
1 parent 1e0c6f4 commit 3db6984
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 30 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,13 @@ def sync_groups_task():

### LDAP

You can use LDAP instead. Define the LDAP url:
You can use LDAP instead. Install this module with the ldap extra dependency:

```shell
pip install invenio-cern-sync[ldap]
```

Define the LDAP url:

```python
CERN_SYNC_LDAP_URL = <url>
Expand Down
32 changes: 20 additions & 12 deletions invenio_cern_sync/authz/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,35 +132,43 @@ def _fetch_all(self, url, headers):
yield resp.json()["data"]

def get_identities(self, fields=IDENTITY_FIELDS):
"""Get all identities."""
query_params = [("field", value) for value in IDENTITY_FIELDS]
query_params += [
("limit", self.limit),
("filter", "type:Person"),
("filter", "blocked:false"),
]
query_string = urlencode(query_params)
"""Get all identities.
It will retrieve all user identities (type:Person), with a primary account
(source:cern) and actively at CERN (activeUser:true).
If you need to also get externals with EduGain account, you need to use
source:edugain and omit the activeUser filter, as external don't have this."""
token = self.keycloak_service.get_authz_token()
headers = {
"Authorization": f"Bearer {token}",
"accept": "application/json",
}
url_without_offset = f"{self.base_url}/api/v1.0/Identity?{query_string}"
return self._fetch_all(url_without_offset, headers)

def get_groups(self, fields=GROUPS_FIELDS):
"""Get all groups."""
query_params = [("field", value) for value in fields]
query_params += [
("limit", self.limit),
("filter", "type:Person"),
("filter", "source:cern"),
("filter", "activeUser:true"),
]
query_string = urlencode(query_params)

url_without_offset = f"{self.base_url}/api/v1.0/Identity?{query_string}"
return self._fetch_all(url_without_offset, headers)

def get_groups(self, fields=GROUPS_FIELDS):
"""Get all groups."""
token = self.keycloak_service.get_authz_token()
headers = {
"Authorization": f"Bearer {token}",
"accept": "application/json",
}

query_params = [("field", value) for value in fields]
query_params += [
("limit", self.limit),
]
query_string = urlencode(query_params)

url_without_offset = f"{self.base_url}/api/v1.0/Groups?{query_string}"
return self._fetch_all(url_without_offset, headers)
3 changes: 2 additions & 1 deletion invenio_cern_sync/authz/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def serialize_cern_identity(cern_identity):
userprofile_mapper = current_app.config["CERN_SYNC_AUTHZ_USERPROFILE_MAPPER"]
extra_data_mapper = current_app.config["CERN_SYNC_AUTHZ_USER_EXTRADATA_MAPPER"]
try:
# this should always exist
# The assumption here is that we only sync CERN primary accounts.
# The personId does not exist for external accounts (EduGain, social logins or guest accounts)
person_id = cern_identity["personId"]
except KeyError:
raise InvalidCERNIdentity("personId", "unknown")
Expand Down
3 changes: 2 additions & 1 deletion invenio_cern_sync/ldap/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def serialize_ldap_user(ldap_user, userprofile_mapper=None, extra_data_mapper=No
extra_data_mapper or current_app.config["CERN_SYNC_LDAP_USER_EXTRADATA_MAPPER"]
)
try:
# this should always exist
# The assumption here is that we only sync CERN primary accounts.
# The personId does not exist for external accounts (EduGain, social logins or guest accounts)
person_id = first_or_raise(ldap_user, "employeeID")
except KeyError:
raise InvalidLdapUser("employeeID", "unknown")
Expand Down
15 changes: 8 additions & 7 deletions invenio_cern_sync/sso/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,22 @@ class _Form(Form):
######################################################################################
# User handler


def cern_setup_handler(remote, token, resp):
"""Perform additional setup after the user has been logged in."""
token_user_info, _ = get_user_info(remote, resp)

with db.session.begin_nested():
username = token_user_info["sub"]
person_id = token_user_info["cern_person_id"]
# cern_person_id is not set for non-CERN users (EduGain)
external_id = token_user_info.get("cern_person_id", username)
extra_data = {
"keycloak_id": username,
"person_id": person_id,
"person_id": external_id,
}
token.remote_account.extra_data = extra_data

user = token.remote_account.user
user_identity = {"id": person_id, "method": remote.name}
user_identity = {"id": external_id, "method": remote.name}

# link User with UserIdentity
oauth_link_external_id(user, user_identity)
Expand All @@ -72,8 +72,10 @@ def cern_info_serializer(remote, resp, token_user_info, user_info):
"""Info serializer."""
user_info = user_info or {}

username = token_user_info["sub"]
email = token_user_info["email"]
external_id = token_user_info["cern_person_id"]
# cern_person_id might be missing for non-CERN users (EduGain)
external_id = token_user_info.get("cern_person_id", username)
preferred_language = user_info.get("cern_preferred_language", "en").lower()
return {
"user": {
Expand All @@ -82,7 +84,7 @@ def cern_info_serializer(remote, resp, token_user_info, user_info):
"profile": {
"affiliations": user_info.get("home_institute", ""),
"full_name": user_info["name"],
"username": token_user_info["sub"],
"username": username,
},
"prefs": {
"visibility": "public",
Expand All @@ -98,7 +100,6 @@ def cern_info_serializer(remote, resp, token_user_info, user_info):
######################################################################################
# Groups handler


def cern_groups_handler(remote, resp):
"""Retrieves groups from remote account.
Expand Down
30 changes: 24 additions & 6 deletions invenio_cern_sync/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,41 @@

"""Invenio-CERN-sync tasks."""

from celery import current_app, shared_task
from flask import current_app
from celery import shared_task
from invenio_db import db

from .users.sync import sync
from .users.sync import sync as users_sync
from .groups.sync import sync as groups_sync


@shared_task
def sync_users():
"""Task to sync users with LDAP."""
def sync_users(*args, **kwargs):
"""Task to sync users with CERN database."""
if current_app.config.get("DEBUG", True):
current_app.logger.warning(
"Users sync with CERN LDAP disabled, the DEBUG env var is True."
"Users sync disabled, the DEBUG env var is True."
)
return

try:
sync()
users_sync(*args, **kwargs)
except Exception as e:
db.session.rollback()
current_app.logger.exception(e)


@shared_task
def sync_groups(*args, **kwargs):
"""Task to sync groups with CERN database."""
if current_app.config.get("DEBUG", True):
current_app.logger.warning(
"Groups sync disabled, the DEBUG env var is True."
)
return

try:
groups_sync(*args, **kwargs)
except Exception as e:
db.session.rollback()
current_app.logger.exception(e)
8 changes: 6 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ python_requires = >=3.9
zip_safe = False
install_requires =
invenio-accounts>=5.0.0,<6.0.0
invenio-celery>=1.0.0,<2.0.0
invenio-oauthclient>=4.0.0,<5.0.0
invenio_userprofiles>=3.0.0,<4.0.0
python-ldap>=3.4.0
invenio-userprofiles>=3.0.0,<4.0.0

[options.extras_require]
tests =
Expand All @@ -39,10 +39,14 @@ tests =
pytest-black-ng>=0.4.0
opensearch2 =
invenio-search[opensearch2]>=2.1.0,<3.0.0
ldap =
python-ldap>=3.4.0

[options.entry_points]
invenio_base.apps =
invenio_cern_sync = invenio_cern_sync:InvenioCERNSync
invenio_celery.tasks =
invenio_cern_sync = invenio_cern_sync.tasks

[bdist_wheel]
universal = 1
Expand Down

0 comments on commit 3db6984

Please sign in to comment.