airflow 1.10.11 高可用模式安装配置:
总共三台机器三个角色:master1 master2 worker1
# 所有节点安装airflow
mkdir -p ~/airflow
export AIRFLOW_HOME=~/airflow
cd ~/airflow
python3.6 -m pip install virtualenv
/usr/local/bin/virtualenv .venv
source .venv/bin/activate
pip3.6 install apache-airflow[crypto,password,async,devel,mysql,redis] celery redis
# master2节点安装failover-controller
pip3.6 install git+git://github.com/teamclairvoyant/airflow-scheduler-failover-controller.git@v1.0.5
# 修改默认端口22为指定端口
vim ./.venv/lib/python3.6/site-packages/scheduler_failover_controller/command_runner/command_runner.py
34 command_split = ["ssh", "-tt", host, base_command]
36 command_split = ["ssh", host, base_command]
改为
"-p", "12321",
# master1 上初始化默认配置
airflow initdb
# 修改airflow.cfg配置
详见airflow.cfg
# 重置airflow
airflow resetdb
# master1上添加web登录用户
cd ~/airflow
source .venv/bin/activate
python3
from airflow import models, settings
from airflow.contrib.auth.backends.password_auth import PasswordUser
user = PasswordUser(models.User())
user.username = 'admin'
user.email = 'admin@test.com'
user.password = 'admin'
session = settings.Session()
session.add(user)
session.commit()
session.close()
exit()
# 分发airflow.cfg到其他所有节点
scp
# msater2 上初始化failover
scheduler_failover_controller -venv /root/.venv/bin/activate init
# 更改 failover 配置
scheduler_nodes_in_cluster = 10.0.4.13,10.0.4.10
# 注:当前主机名称可以通过下面命令获得
scheduler_failover_controller get_current_host
# 配置安装 failover 的机器之间的免密登录,配置完成后,可以使用如下命令进行验证:
scheduler_failover_controller test_connection
# 需要注意的一点是,每次只能运行一个 scheduler 守护进程。如果您有多个 scheduler 运行,那么就有可能一个任务被执行多次。
scheduler 不能同时运行两个,那么运行 scheduler 的节点一旦出了问题,任务不就完全不运行了吗?
这是个非常好的问题,不过已经有解决方案了,我们可以在两台机器上部署 scheduler ,只运行一台机器上的 scheduler 守护进程 ,一旦运行 scheduler 守护进程的机器出现故障,立刻启动另一台机器上的 scheduler 即可。我们可以借助第三方组件 airflow-scheduler-failover-controller 实现 scheduler 的高可用。
# 服务启动
source ~/airflow/.venv/bin/activate
# master1 上启动
# start the web server, default port is 8080
airflow webserver -D
# start the scheduler
airflow scheduler -D
# master2 上启动
# start the web server, default port is 8080
airflow webserver -D
# start the scheduler_failover_controller
nohup scheduler_failover_controller start > /dev/null &
# 参考
version Prints out the version of the Scheduler Failover
Controller
init Initialize Configurations to allow Scheduler Failover
Controller to run
test_connection Tests if you can connect to all the necessary machines
listed in 'scheduler_nodes_in_cluster' config
is_scheduler_running
Checks if the Scheduler is running on the machines you
have listed in 'scheduler_nodes_in_cluster' config
clear_metadata Clear the Metadata in Metastore
metadata Get the Metadata from Metastore
send_test_email Send a Test Email
get_current_host Get the Current Hostname
start Start the Airflow Scheduler Failover Controller
# woker节点启动
# 启动worker
airflow worker -D
# 启动flower(flower 是一个守护进程,用于是监控 celery 消息队列) -- 可以不启动,本范例没有配置和使用
airflow flower -D
# print the list of active DAGs
airflow list_dags
# prints the list of tasks the "tutorial" dag_id
airflow list_tasks tutorial
# prints the hierarchy of tasks in the tutorial DAG
airflow list_tasks tutorial --tree