Skip to content

Commit

Permalink
[airflow] Update AIR302 to check for deprecated context keys (#15144
Browse files Browse the repository at this point in the history
)

**Summary**

Airflow 3.0 removes a set of deprecated context variables that were
phased out in 2.x. This PR introduces lint rules to detect usage of
these removed variables in various patterns, helping identify
incompatibilities. The removed context variables include:

```
conf
execution_date
next_ds
next_ds_nodash
next_execution_date
prev_ds
prev_ds_nodash
prev_execution_date
prev_execution_date_success
tomorrow_ds
yesterday_ds
yesterday_ds_nodash
```

**Detected Patterns and Examples**

The linter now flags the use of removed context variables in the
following scenarios:

1. **Direct Subscript Access**  
   ```python
   execution_date = context["execution_date"]  # Flagged
   ```
   
2. **`.get("key")` Method Calls**  
   ```python
   print(context.get("execution_date"))  # Flagged
   ```
   
3. **Variables Assigned from `get_current_context()`**  
If a variable is assigned from `get_current_context()` and then used to
access a removed key:
   ```python
   c = get_current_context()
   print(c.get("execution_date"))  # Flagged
   ```
   
4. **Function Parameters in `@task`-Decorated Functions**  
Parameters named after removed context variables in functions decorated
with `@task` are flagged:
   ```python
   from airflow.decorators import task
   
   @task
def my_task(execution_date, **kwargs): # Parameter 'execution_date'
flagged
       pass
   ```
   
5. **Removed Keys in Task Decorator `kwargs` and Other Scenarios**  
Other similar patterns where removed context variables appear (e.g., as
part of `kwargs` in a `@task` function) are also detected.
```
from airflow.decorators import task

@task
def process_with_execution_date(**context):
    execution_date = lambda: context["execution_date"]  # flagged
    print(execution_date)

@task(kwargs={"execution_date": "2021-01-01"})   # flagged
def task_with_kwargs(**context):  
    pass
```

**Test Plan**

Test fixtures covering various patterns of deprecated context usage are
included in this PR. For example:

```python
from airflow.decorators import task, dag, get_current_context
from airflow.models import DAG
from airflow.operators.dummy import DummyOperator
import pendulum
from datetime import datetime

@task
def access_invalid_key_task(**context):
    print(context.get("conf"))  # 'conf' flagged

@task
def print_config(**context):
    execution_date = context["execution_date"]  # Flagged
    prev_ds = context["prev_ds"]                # Flagged

@task
def from_current_context():
    context = get_current_context()
    print(context["execution_date"])            # Flagged

# Usage outside of a task decorated function
c = get_current_context()
print(c.get("execution_date"))                 # Flagged

@task
def some_task(execution_date, **kwargs):
    print("execution date", execution_date)     # Parameter flagged

@dag(
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC")
)
def my_dag():
    task1 = DummyOperator(
        task_id="task1",
        params={
            "execution_date": "{{ execution_date }}",  # Flagged in template context
        },
    )

    access_invalid_key_task()
    print_config()
    from_current_context()
    
dag = my_dag()

class CustomOperator(BaseOperator):
    def execute(self, context):
        execution_date = context.get("execution_date")                      # Flagged
        next_ds = context.get("next_ds")                                               # Flagged
        next_execution_date = context["next_execution_date"]          # Flagged
```

Ruff will emit `AIR302` diagnostics for each deprecated usage, with
suggestions when applicable, aiding in code migration to Airflow 3.0.

related: apache/airflow#44409,
apache/airflow#41641

---------

Co-authored-by: Wei Lee <[email protected]>
  • Loading branch information
sunank200 and Lee-W authored Jan 24, 2025
1 parent 9384ba4 commit 34cc3ca
Show file tree
Hide file tree
Showing 6 changed files with 941 additions and 5 deletions.
127 changes: 127 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/airflow/AIR302_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from datetime import datetime

import pendulum

from airflow.decorators import dag, task
from airflow.models import DAG
from airflow.models.baseoperator import BaseOperator
from airflow.operators.dummy import DummyOperator
from airflow.plugins_manager import AirflowPlugin
from airflow.providers.standard.operators.python import PythonOperator
from airflow.utils.context import get_current_context


def access_invalid_key_in_context(**context):
print("access invalid key", context["conf"])

@task
def access_invalid_key_task_out_of_dag(**context):
print("access invalid key", context.get("conf"))

@dag(
schedule=None,
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
catchup=False,
tags=[""],
)
def invalid_dag():
@task()
def access_invalid_key_task(**context):
print("access invalid key", context.get("conf"))

task1 = PythonOperator(
task_id="task1",
python_callable=access_invalid_key_in_context,
)
access_invalid_key_task() >> task1
access_invalid_key_task_out_of_dag()

invalid_dag()

@task
def print_config(**context):
# This should not throw an error as logical_date is part of airflow context.
logical_date = context["logical_date"]

# Removed usage - should trigger violations
execution_date = context["execution_date"]
next_ds = context["next_ds"]
next_ds_nodash = context["next_ds_nodash"]
next_execution_date = context["next_execution_date"]
prev_ds = context["prev_ds"]
prev_ds_nodash = context["prev_ds_nodash"]
prev_execution_date = context["prev_execution_date"]
prev_execution_date_success = context["prev_execution_date_success"]
tomorrow_ds = context["tomorrow_ds"]
yesterday_ds = context["yesterday_ds"]
yesterday_ds_nodash = context["yesterday_ds_nodash"]

with DAG(
dag_id="example_dag",
schedule_interval="@daily",
start_date=datetime(2023, 1, 1),
template_searchpath=["/templates"],
) as dag:
task1 = DummyOperator(
task_id="task1",
params={
# Removed variables in template
"execution_date": "{{ execution_date }}",
"next_ds": "{{ next_ds }}",
"prev_ds": "{{ prev_ds }}"
},
)

class CustomMacrosPlugin(AirflowPlugin):
name = "custom_macros"
macros = {
"execution_date_macro": lambda context: context["execution_date"],
"next_ds_macro": lambda context: context["next_ds"]
}

@task
def print_config():
context = get_current_context()
execution_date = context["execution_date"]
next_ds = context["next_ds"]
next_ds_nodash = context["next_ds_nodash"]
next_execution_date = context["next_execution_date"]
prev_ds = context["prev_ds"]
prev_ds_nodash = context["prev_ds_nodash"]
prev_execution_date = context["prev_execution_date"]
prev_execution_date_success = context["prev_execution_date_success"]
tomorrow_ds = context["tomorrow_ds"]
yesterday_ds = context["yesterday_ds"]
yesterday_ds_nodash = context["yesterday_ds_nodash"]

class CustomOperator(BaseOperator):
def execute(self, context):
execution_date = context["execution_date"]
next_ds = context["next_ds"]
next_ds_nodash = context["next_ds_nodash"]
next_execution_date = context["next_execution_date"]
prev_ds = context["prev_ds"]
prev_ds_nodash = context["prev_ds_nodash"]
prev_execution_date = context["prev_execution_date"]
prev_execution_date_success = context["prev_execution_date_success"]
tomorrow_ds = context["tomorrow_ds"]
yesterday_ds = context["yesterday_ds"]
yesterday_ds_nodash = context["yesterday_ds_nodash"]

@task
def access_invalid_argument_task_out_of_dag(execution_date, tomorrow_ds, logical_date, **context):
print("execution date", execution_date)
print("access invalid key", context.get("conf"))

@task(task_id="print_the_context")
def print_context(ds=None, **kwargs):
"""Print the Airflow context and ds variable from the context."""
print(ds)
print(kwargs.get("tomorrow_ds"))
c = get_current_context()
c.get("execution_date")

class CustomOperatorNew(BaseOperator):
def execute(self, context):
execution_date = context.get("execution_date")
next_ds = context.get("next_ds")
4 changes: 3 additions & 1 deletion crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::NonPEP646Unpack) {
pyupgrade::rules::use_pep646_unpack(checker, subscript);
}

if checker.enabled(Rule::Airflow3Removal) {
airflow::rules::removed_in_3(checker, expr);
}
pandas_vet::rules::subscript(checker, value, expr);
}
Expr::Tuple(ast::ExprTuple {
Expand Down
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
if checker.enabled(Rule::PytestParameterWithDefaultArgument) {
flake8_pytest_style::rules::parameter_with_default_argument(checker, function_def);
}
if checker.enabled(Rule::Airflow3Removal) {
airflow::rules::removed_in_3_function_def(checker, function_def);
}
if checker.enabled(Rule::NonPEP695GenericFunction) {
pyupgrade::rules::non_pep695_generic_function(checker, function_def);
}
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/airflow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod tests {
#[test_case(Rule::Airflow3Removal, Path::new("AIR302_names.py"))]
#[test_case(Rule::Airflow3Removal, Path::new("AIR302_class_attribute.py"))]
#[test_case(Rule::Airflow3Removal, Path::new("AIR302_airflow_plugin.py"))]
#[test_case(Rule::Airflow3Removal, Path::new("AIR302_context.py"))]
#[test_case(Rule::Airflow3MovedToProvider, Path::new("AIR303.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
Expand Down
Loading

0 comments on commit 34cc3ca

Please sign in to comment.