-
Notifications
You must be signed in to change notification settings - Fork 13
Modifications for Classifier Pipeline #189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 14 commits
bea8797
efef121
865e7e3
51d5edb
8f785d7
fd7bd09
f8eebb4
a1ab2c5
c3dce3a
94573f5
a55c99a
1b88b6f
3a092c6
8f9c8b0
26fe6d6
0fca178
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,9 +35,12 @@ def task_update_record(msg): | |
- bibcode | ||
- and specific payload | ||
""" | ||
logger.debug('Updating record: %s', msg) | ||
# logger.debug('Updating record: %s', msg) | ||
logger.info('Updating record: %s', msg) | ||
status = app.get_msg_status(msg) | ||
logger.info(f'Message status: {status}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These could probably become debug statements long term so we aren't flooding the logs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changes made and committed. |
||
type = app.get_msg_type(msg) | ||
logger.info(f'Message type: {type}') | ||
bibcodes = [] | ||
|
||
if status == 'deleted': | ||
|
@@ -84,7 +87,14 @@ def task_update_record(msg): | |
msg.toJSON(including_default_value_fields=True)) | ||
if record: | ||
logger.debug('Saved augment message: %s', msg) | ||
|
||
elif type == 'classify': | ||
bibcodes.append(msg.bibcode) | ||
logger.info(f'message to JSON: {msg.toJSON(including_default_value_fields=True)}') | ||
payload = msg.toJSON(including_default_value_fields=True) | ||
payload = payload['collections'] | ||
record = app.update_storage(msg.bibcode, 'classify',payload) | ||
if record: | ||
logger.debug('Saved classify message: %s', msg) | ||
else: | ||
# here when record has a single bibcode | ||
bibcodes.append(msg.bibcode) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you know why this file changed? I am just a bit concerned because this alembic upgrade not matching the one that was used to upgrade the DB previously could pose an issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I was having an issue at one point so I added the if statement to check the database. I can revert it so it matches. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reverted file committed. |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the difference between the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We decided to use the name |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
"""add_classifications_column | ||
|
||
Revision ID: 6e98dcc397e6 | ||
Revises: 2d2af8a9c996 | ||
Create Date: 2025-02-28 08:52:00.341542 | ||
|
||
""" | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = '6e98dcc397e6' | ||
down_revision = '2d2af8a9c996' | ||
|
||
from alembic import op | ||
import sqlalchemy as sa | ||
|
||
|
||
|
||
def upgrade(): | ||
# sqlite doesn't have ALTER command | ||
cx = op.get_context() | ||
if 'sqlite' in cx.connection.engine.name: | ||
with op.batch_alter_table("records") as batch_op: | ||
batch_op.add_column(sa.Column('classifications', sa.Text)) | ||
batch_op.add_column(sa.Column('classifications_updated', sa.TIMESTAMP)) | ||
else: | ||
op.add_column('records', sa.Column('classifications', sa.Text)) | ||
op.add_column('records', sa.Column('classifications_updated', sa.TIMESTAMP)) | ||
|
||
|
||
def downgrade(): | ||
cx = op.get_context() | ||
if 'sqlite' in cx.connection.engine.name: | ||
with op.batch_alter_table("records") as batch_op: | ||
batch_op.drop_column('classifications') | ||
batch_op.drop_column('classifications_updated') | ||
else: | ||
op.drop_column('records', 'classifications') | ||
op.drop_column('records', 'classifications_updated') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might make more sense to move this to the
run.py
so that way this code isn't being pulled into the celery workers.