-
Notifications
You must be signed in to change notification settings - Fork 14.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move DAG bundle config into config, not db #44924
base: main
Are you sure you want to change the base?
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
from airflow.configuration import conf | ||
from airflow.exceptions import AirflowConfigException | ||
from airflow.models.dagbundle import DagBundleModel | ||
from airflow.utils.log.logging_mixin import LoggingMixin | ||
from airflow.utils.module_loading import import_string | ||
from airflow.utils.session import NEW_SESSION, provide_session | ||
|
||
if TYPE_CHECKING: | ||
from sqlalchemy.orm import Session | ||
|
||
from airflow.dag_processing.bundles.base import BaseDagBundle | ||
|
||
|
||
class DagBundlesManager(LoggingMixin): | ||
"""Manager for DAG bundles.""" | ||
|
||
@property | ||
def bundle_configs(self) -> dict[str, dict]: | ||
"""Get all DAG bundle configurations.""" | ||
configured_bundles = conf.getsection("dag_bundles") | ||
|
||
if not configured_bundles: | ||
return {} | ||
|
||
# If dags_folder is empty string, we remove it. This allows the default dags_folder bundle to be disabled. | ||
if not configured_bundles["dags_folder"]: | ||
del configured_bundles["dags_folder"] | ||
|
||
dict_bundles: dict[str, dict] = {} | ||
for key in configured_bundles.keys(): | ||
config = conf.getjson("dag_bundles", key) | ||
jedcunningham marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if not isinstance(config, dict): | ||
raise AirflowConfigException(f"Bundle config for {key} is not a dict: {config}") | ||
dict_bundles[key] = config | ||
|
||
return dict_bundles | ||
|
||
@provide_session | ||
def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: | ||
known_bundles = {b.name: b for b in session.query(DagBundleModel).all()} | ||
|
||
for name in self.bundle_configs.keys(): | ||
if bundle := known_bundles.get(name): | ||
bundle.enabled = True | ||
else: | ||
session.add(DagBundleModel(name=name)) | ||
self.log.info("Added new DAG bundle %s to the database", name) | ||
|
||
for name, bundle in known_bundles.items(): | ||
if name not in self.bundle_configs: | ||
bundle.enabled = False | ||
self.log.warning("DAG bundle %s is no longer found in config and has been disabled", name) | ||
|
||
def get_all_dag_bundles(self) -> list[BaseDagBundle]: | ||
""" | ||
Get all DAG bundles. | ||
|
||
:param session: A database session. | ||
|
||
:return: list of DAG bundles. | ||
""" | ||
return [self.get_bundle(name, version=None) for name in self.bundle_configs.keys()] | ||
|
||
def get_bundle(self, name: str, version: str | None = None) -> BaseDagBundle: | ||
""" | ||
Get a DAG bundle by name. | ||
|
||
:param name: The name of the DAG bundle. | ||
:param version: The version of the DAG bundle you need (optional). If not provided, ``tracking_ref`` will be used instead. | ||
|
||
:return: The DAG bundle. | ||
""" | ||
# TODO: proper validation of the bundle configuration so we have better error messages | ||
bundle_config = self.bundle_configs[name] | ||
bundle_class = import_string(bundle_config["classpath"]) | ||
return bundle_class(name=name, version=version, **bundle_config["kwargs"]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,7 +66,6 @@ | |
from sqlalchemy.ext.hybrid import hybrid_property | ||
from sqlalchemy.orm import backref, relationship | ||
from sqlalchemy.sql import Select, expression | ||
from sqlalchemy_utils import UUIDType | ||
|
||
from airflow import settings, utils | ||
from airflow.configuration import conf as airflow_conf, secrets_backend_list | ||
|
@@ -2028,7 +2027,7 @@ class DagModel(Base): | |
fileloc = Column(String(2000)) | ||
# The base directory used by Dag Processor that parsed this dag. | ||
processor_subdir = Column(String(2000), nullable=True) | ||
bundle_id = Column(UUIDType(binary=False), ForeignKey("dag_bundle.id"), nullable=True) | ||
bundle_name = Column(StringID(), ForeignKey("dag_bundle.name"), nullable=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think to preserve history, we should use an association table so that when a dag object is assigned a new bundle object, the history will be preserved. Example: If a dag 'A' is in dag-bundle 'DA', and 'DA' is no longer configured or the name was changed, which triggers a new dag-bundle object, say 'DB', which now has dag 'A' in it. The DAG bundle_name will update to the new dagbundle object 'DB', causing us to lose the previous bundle name. With an association table, we can have an is_active in the table that tells whether the bundle has been removed. However, there will be more complex queries. Another thing I thought of is using a history table like in TIH, but DAG changes more often. |
||
# The version of the bundle the last time the DAG was parsed | ||
latest_bundle_version = Column(String(200), nullable=True) | ||
# String representing the owners | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
ccb8ef5583b2a6b3ee3ab4212139c112b92953675655010a6775fffb4945b206 | ||
dc1ed8fb08456efddbcfcb0a1665b90091b5157432f11654fc4d0744baa90cdb |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
describe that the section is important, and airflow will consume any new option you add.
add examples on how to define them
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dstandish updated