From 18aba06f55fc7b66a6b5518e2b5d192e3bab6947 Mon Sep 17 00:00:00 2001 From: Willy Morin Date: Fri, 12 Jun 2015 11:43:26 +0200 Subject: [PATCH 001/163] Fix docker run command --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4f0fa74dce..53b1826295 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,6 @@ Alternatively, you can build an image from [Dockerfile](https://github.com/pucke ```bash docker run -d \ --name airflow \ - -p 8080:8080 + -p 8080:8080 \ puckel/docker-airflow ``` From 8310bd482454e26ddb3d9b1f6a360789bbc5485b Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 18 Jun 2015 14:41:08 +0200 Subject: [PATCH 002/163] Configuration to test a complete architecture with MySQL and RabbitMQ + docker-compose.yml file. --- Dockerfile | 19 ++++++++++++++---- README.md | 28 +++++++++++++++++++++----- config/airflow.cfg | 38 +++++++++++++++++++++++++++++++++++ docker-compose.yml | 48 ++++++++++++++++++++++++++++++++++++++++++++ script/entrypoint.sh | 24 ++++++++++++++++++++++ 5 files changed, 148 insertions(+), 9 deletions(-) create mode 100644 config/airflow.cfg create mode 100644 docker-compose.yml create mode 100644 script/entrypoint.sh diff --git a/Dockerfile b/Dockerfile index f59b8d0cb6..d89891e8df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,15 +14,20 @@ ENV TERM linux ENV INITRD No ENV AIRFLOW_HOME /usr/local/airflow +ENV C_FORCE_ROOT true +ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages RUN apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ + netcat \ python-pip \ python-dev \ + libmysqlclient-dev \ build-essential \ - && mkdir /usr/local/airflow \ - && pip install airflow \ - && airflow initdb \ + && mkdir -p $AIRFLOW_HOME/logs \ + && mkdir $AIRFLOW_HOME/dags \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql] \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ @@ -32,6 +37,12 @@ RUN apt-get update -yqq \ /usr/share/doc \ /usr/share/doc-base +ADD config/airflow.cfg $AIRFLOW_HOME/airflow.cfg +ADD script/entrypoint.sh /root/entrypoint.sh +RUN chmod +x /root/entrypoint.sh + EXPOSE 8080 +EXPOSE 5555 +EXPOSE 8793 -CMD ["airflow","webserver","-p","8080"] +ENTRYPOINT ["/root/entrypoint.sh"] diff --git a/README.md b/README.md index 53b1826295..b5067fa661 100644 --- a/README.md +++ b/README.md @@ -13,16 +13,34 @@ This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/a 1. Install [Docker](https://www.docker.com/). -2. Download [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) from public [Docker Hub Registry](https://registry.hub.docker.com/): `docker pull puckel/docker-airflow` +2. Install [Docker-compose](https://docs.docker.com/compose/install/). + +3. Download [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) from public [Docker Hub Registry](https://registry.hub.docker.com/): `docker pull puckel/docker-airflow` Alternatively, you can build an image from [Dockerfile](https://github.com/puckel/docker-airflow) ### Usage +Start the stack (mariadb, rabbitmq, airflow-webserver, airflow-flower & airflow-worker) : + +```bash + docker-compose up +``` + +UI Interface : + +Airflow: http://container-ip:8080/ +Flower (Celery): http://container-ip:5555/ +RabbitMQ: http://container-ip:15672/ + +To scale the number of workers : + +```bash + docker-compose scale worker=5 +``` + +Then you can run the "tutorial" : ```bash - docker run -d \ - --name airflow \ - -p 8080:8080 \ - puckel/docker-airflow + docker exec dockerairflow_webserver_1 airflow backfill tutorial -s 2015-05-01 -e 2015-06-01 ``` diff --git a/config/airflow.cfg b/config/airflow.cfg new file mode 100644 index 0000000000..9e441071b7 --- /dev/null +++ b/config/airflow.cfg @@ -0,0 +1,38 @@ +[core] +airflow_home = /usr/local/airflow +dags_folder = /usr/local/airflow/dags +base_log_folder = /usr/local/airflow/logs +executor = CeleryExecutor +sql_alchemy_conn = mysql://airflow:airflow@mariadb/airflow +parallelism = 32 +load_examples = True + +[webserver] +base_url = http://localhost:8080 +web_server_host = 0.0.0.0 +web_server_port = 8080 + +[smtp] +smtp_host = localhost +smtp_user = airflow +smtp_port = 25 +smtp_password = airflow +smtp_mail_from = airflow@airflow.com + +[celery] +celery_app_name = airflow.executors.celery_executor +celeryd_concurrency = 16 +worker_log_server_port = 8793 +broker_url = amqp://airflow:airflow@rabbitmq:5672/airflow +celery_result_backend = amqp://airflow:airflow@rabbitmq:5672/airflow +flower_port = 8383 +default_queue = default + +[scheduler] +job_heartbeat_sec = 5 +scheduler_heartbeat_sec = 5 +# Statsd (https://github.com/etsy/statsd) integration settings +# statsd_on = False +# statsd_host = localhost +# statsd_port = 8125 +# statsd_prefix = airflow diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000..1f91ae0951 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,48 @@ +mariadb: + image: mariadb:10 + restart: always + ports: + - "3306:3306" + environment: + - MYSQL_USER=airflow + - MYSQL_PASSWORD=airflow + - MYSQL_DATABASE=airflow + - MYSQL_ROOT_PASSWORD=wootwoot + +rabbitmq: + image: puckel/docker-rabbitmq + restart: always + ports: + - "15672:15672" + - "5672:5672" + environment: + - RABBITMQ_USER=airflow + - RABBITMQ_PASSWORD=airflow + - RABBITMQ_VHOST=airflow + +webserver: + image: puckel/docker-airflow + restart: always + ports: + - "8080:8080" + links: + - mariadb:mariadb + - rabbitmq:rabbitmq + command: webserver + +flower: + image: puckel/docker-airflow + restart: always + ports: + - "5555:5555" + links: + - rabbitmq:rabbitmq + command: flower + +worker: + image: puckel/docker-airflow + restart: always + links: + - mariadb:mariadb + - rabbitmq:rabbitmq + command: worker diff --git a/script/entrypoint.sh b/script/entrypoint.sh new file mode 100644 index 0000000000..c233071299 --- /dev/null +++ b/script/entrypoint.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +CMD="airflow" + +if [ "$@" = "webserver" ]; then + #wait for mariadb + DB_LOOPS="20" + MYSQL_HOST="mariadb" + MYSQL_PORT="3306" + i=0 + while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do + i=`expr $i + 1` + if [ $i -ge $DB_LOOPS ]; then + echo "$(date) - ${MYSQL_HOST}:${MYSQL_PORT} still not reachable, giving up" + exit 1 + fi + echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}..." + sleep 1 + done + sleep 15 + $CMD initdb +fi + +exec $CMD "$@" From 8a020b9e27df1d15d7902a58c2ee24bf64cc61b3 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 4 Aug 2015 17:32:48 +0200 Subject: [PATCH 003/163] Fix issue Error connected to database #2 --- Dockerfile | 7 +++++-- README.md | 2 +- config/airflow.cfg | 2 +- docker-compose.yml | 13 ++++++------- script/entrypoint.sh | 4 ++-- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index d89891e8df..011ff55ead 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,7 @@ ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No +ENV AIRFLOW_VERSION 1.3.0 ENV AIRFLOW_HOME /usr/local/airflow ENV C_FORCE_ROOT true ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages @@ -23,11 +24,13 @@ RUN apt-get update -yqq \ python-pip \ python-dev \ libmysqlclient-dev \ + libkrb5-dev \ + libsasl2-dev \ build-essential \ && mkdir -p $AIRFLOW_HOME/logs \ && mkdir $AIRFLOW_HOME/dags \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql] \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==$AIRFLOW_VERSION \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==$AIRFLOW_VERSION \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ diff --git a/README.md b/README.md index b5067fa661..eacbd4fdef 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Alternatively, you can build an image from [Dockerfile](https://github.com/pucke ### Usage -Start the stack (mariadb, rabbitmq, airflow-webserver, airflow-flower & airflow-worker) : +Start the stack (mysql, rabbitmq, airflow-webserver, airflow-flower & airflow-worker) : ```bash docker-compose up diff --git a/config/airflow.cfg b/config/airflow.cfg index 9e441071b7..4cfba06d38 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -3,7 +3,7 @@ airflow_home = /usr/local/airflow dags_folder = /usr/local/airflow/dags base_log_folder = /usr/local/airflow/logs executor = CeleryExecutor -sql_alchemy_conn = mysql://airflow:airflow@mariadb/airflow +sql_alchemy_conn = mysql://airflow:airflow@mysqldb/airflow parallelism = 32 load_examples = True diff --git a/docker-compose.yml b/docker-compose.yml index 1f91ae0951..edcac213fd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,12 @@ -mariadb: - image: mariadb:10 +mysqldb: + image: tutum/mysql restart: always ports: - "3306:3306" environment: - MYSQL_USER=airflow - - MYSQL_PASSWORD=airflow - - MYSQL_DATABASE=airflow - - MYSQL_ROOT_PASSWORD=wootwoot + - MYSQL_PASS=airflow + - ON_CREATE_DB=airflow rabbitmq: image: puckel/docker-rabbitmq @@ -26,7 +25,7 @@ webserver: ports: - "8080:8080" links: - - mariadb:mariadb + - mysqldb:mysqldb - rabbitmq:rabbitmq command: webserver @@ -43,6 +42,6 @@ worker: image: puckel/docker-airflow restart: always links: - - mariadb:mariadb + - mysqldb:mysqldb - rabbitmq:rabbitmq command: worker diff --git a/script/entrypoint.sh b/script/entrypoint.sh index c233071299..eececb8ae2 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -3,9 +3,9 @@ CMD="airflow" if [ "$@" = "webserver" ]; then - #wait for mariadb + #wait for mysql DB_LOOPS="20" - MYSQL_HOST="mariadb" + MYSQL_HOST="mysqldb" MYSQL_PORT="3306" i=0 while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do From af924782e665fcb1d647189e2455c6ef8ceef118 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Sun, 23 Aug 2015 13:21:59 +0200 Subject: [PATCH 004/163] Release 1.4.0 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 011ff55ead..80d117fee7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No -ENV AIRFLOW_VERSION 1.3.0 +ENV AIRFLOW_VERSION 1.4.0 ENV AIRFLOW_HOME /usr/local/airflow ENV C_FORCE_ROOT true ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages From 003cc257d7f48a53d367fea828ffc51e61e5048d Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 24 Aug 2015 14:35:36 +0200 Subject: [PATCH 005/163] Update README --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index eacbd4fdef..387d276312 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). -### Base Docker Image +### Based on Debian Wheezy official Image * [debian:wheezy](https://registry.hub.docker.com/_/debian/) @@ -17,7 +17,7 @@ This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/a 3. Download [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) from public [Docker Hub Registry](https://registry.hub.docker.com/): `docker pull puckel/docker-airflow` -Alternatively, you can build an image from [Dockerfile](https://github.com/puckel/docker-airflow) +Alternatively, you can build an image from the [Dockerfile](https://github.com/puckel/docker-airflow) ### Usage @@ -29,9 +29,11 @@ Start the stack (mysql, rabbitmq, airflow-webserver, airflow-flower & airflow-wo UI Interface : -Airflow: http://container-ip:8080/ -Flower (Celery): http://container-ip:5555/ -RabbitMQ: http://container-ip:15672/ +* Airflow: http://localhost:8080/ +* Flower (Celery): http://localhost:5555/ +* RabbitMQ: http://localhost:15672/ + +(with boot2docker, use: open http://$(boot2docker ip):8080) To scale the number of workers : From c2a60b875daf0c92d649a04501c9348e742dba33 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 27 Aug 2015 18:01:07 +0200 Subject: [PATCH 006/163] Release 1.4.1 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 80d117fee7..63384d97d3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No -ENV AIRFLOW_VERSION 1.4.0 +ENV AIRFLOW_VERSION 1.4.1 ENV AIRFLOW_HOME /usr/local/airflow ENV C_FORCE_ROOT true ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages From 3e0f364890c0ac99eceb45637a44f423b32cadec Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 28 Aug 2015 13:26:01 +0200 Subject: [PATCH 007/163] Update README.md --- README.md | 51 +++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 387d276312..bfae95d6f8 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,43 @@ -## Airflow Dockerfile - +# Airflow Dockerfile This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). +## Informations -### Based on Debian Wheezy official Image - -* [debian:wheezy](https://registry.hub.docker.com/_/debian/) - +* Based on Debian Wheezy official Image [debian:wheezy](https://registry.hub.docker.com/_/debian/) +* Install [Docker](https://www.docker.com/) +* Install [Docker-compose](https://docs.docker.com/compose/install/) -### Installation +## Installation -1. Install [Docker](https://www.docker.com/). + docker pull puckel/docker-airflow -2. Install [Docker-compose](https://docs.docker.com/compose/install/). +## Build -3. Download [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) from public [Docker Hub Registry](https://registry.hub.docker.com/): `docker pull puckel/docker-airflow` + docker build --rm -t puckel/docker-airflow . -Alternatively, you can build an image from the [Dockerfile](https://github.com/puckel/docker-airflow) - -### Usage +# Usage Start the stack (mysql, rabbitmq, airflow-webserver, airflow-flower & airflow-worker) : -```bash - docker-compose up -``` + docker-compose up -d -UI Interface : +## UI Links -* Airflow: http://localhost:8080/ -* Flower (Celery): http://localhost:5555/ -* RabbitMQ: http://localhost:15672/ +- Airflow: [localhost:8080](http://localhost:8080/) +- Flower: [localhost:5555](http://localhost:5555/) +- RabbitMQ: [localhost:15672](http://localhost:15672/) (with boot2docker, use: open http://$(boot2docker ip):8080) -To scale the number of workers : +## To scale the number of workers + + docker-compose scale worker=5 + +## Run the test "tutorial" -```bash - docker-compose scale worker=5 -``` + docker exec dockerairflow_webserver_1 airflow backfill tutorial -s 2015-05-01 -e 2015-06-01 -Then you can run the "tutorial" : +# Wanna help? -```bash - docker exec dockerairflow_webserver_1 airflow backfill tutorial -s 2015-05-01 -e 2015-06-01 -``` +Fork, improve and PR. ;-) From 8301193df6afa71e0c33a52811c49731232ec9a6 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 28 Aug 2015 13:26:57 +0200 Subject: [PATCH 008/163] Fix tabulation --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bfae95d6f8..779f668e69 100644 --- a/README.md +++ b/README.md @@ -32,11 +32,11 @@ Start the stack (mysql, rabbitmq, airflow-webserver, airflow-flower & airflow-wo ## To scale the number of workers - docker-compose scale worker=5 + docker-compose scale worker=5 ## Run the test "tutorial" - docker exec dockerairflow_webserver_1 airflow backfill tutorial -s 2015-05-01 -e 2015-06-01 + docker exec dockerairflow_webserver_1 airflow backfill tutorial -s 2015-05-01 -e 2015-06-01 # Wanna help? From 9b13b9a37a35380cf3e349871cb91459766f19ec Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 7 Sep 2015 11:27:50 +0200 Subject: [PATCH 009/163] Add fernet_key generation for Encrypt passwords --- Dockerfile | 3 +++ script/entrypoint.sh | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/Dockerfile b/Dockerfile index 63384d97d3..eeb53ab881 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,9 +26,12 @@ RUN apt-get update -yqq \ libmysqlclient-dev \ libkrb5-dev \ libsasl2-dev \ + libssl-dev \ + libffi-dev \ build-essential \ && mkdir -p $AIRFLOW_HOME/logs \ && mkdir $AIRFLOW_HOME/dags \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" cryptography \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==$AIRFLOW_VERSION \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==$AIRFLOW_VERSION \ && apt-get clean \ diff --git a/script/entrypoint.sh b/script/entrypoint.sh index eececb8ae2..719a97f049 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -2,6 +2,11 @@ CMD="airflow" +# Generate Fernet key +FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY") + +sed -i "s/{FERNET_KEY}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg + if [ "$@" = "webserver" ]; then #wait for mysql DB_LOOPS="20" From 3da72d20bd4c987cc6dc8fd82df82084c82168d7 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 7 Sep 2015 11:30:43 +0200 Subject: [PATCH 010/163] Bump to release 1.5.1 --- Dockerfile | 2 +- README.md | 2 ++ config/airflow.cfg | 69 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index eeb53ab881..6dfc0bd145 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No -ENV AIRFLOW_VERSION 1.4.1 +ENV AIRFLOW_VERSION 1.5.1 ENV AIRFLOW_HOME /usr/local/airflow ENV C_FORCE_ROOT true ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages diff --git a/README.md b/README.md index 779f668e69..082d94d6b1 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/a ## Build +For example, if you need to install [Extra Packages](http://pythonhosted.org/airflow/installation.html#extra-package), edit the Dockerfile and than build-it. + docker build --rm -t puckel/docker-airflow . # Usage diff --git a/config/airflow.cfg b/config/airflow.cfg index 4cfba06d38..a367014952 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -1,35 +1,96 @@ [core] -airflow_home = /usr/local/airflow -dags_folder = /usr/local/airflow/dags -base_log_folder = /usr/local/airflow/logs +# The home folder for airflow, default is ~/airflow +airflow_home = {AIRFLOW_HOME} +# The folder where your airflow pipelines live, most likely a +# subfolder in a code repository +dags_folder = {AIRFLOW_HOME}/dags +# The folder where airflow should store its log files +base_log_folder = {AIRFLOW_HOME}/logs +# The executor class that airflow should use. Choices include +# SequentialExecutor, LocalExecutor, CeleryExecutor executor = CeleryExecutor +# The SqlAlchemy connection string to the metadata database. +# SqlAlchemy supports many different database engine, more information +# their website sql_alchemy_conn = mysql://airflow:airflow@mysqldb/airflow +# The amount of parallelism as a setting to the executor. This defines +# the max number of task instances that should run simultaneously +# on this airflow installation parallelism = 32 +# Whether to load the examples that ship with Airflow. It's good to +# get started, but you probably want to set this to False in a production +# environment load_examples = True +# Where your Airflow plugins are stored +plugins_folder = {AIRFLOW_HOME}/plugins +# Secret key to save connection passwords in the db +fernet_key = {FERNET_KEY} [webserver] +# The base url of your website as airflow cannot guess what domain or +# cname you are using. This is use in automated emails that +# airflow sends to point links to the right web server base_url = http://localhost:8080 +# The ip specified when starting the web server web_server_host = 0.0.0.0 +# The port on which to run the web server web_server_port = 8080 +# Secret key used to run your flask app +secret_key = temporary_key +# Expose the configuration file in the web server +expose_config = true +# Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication +authenticate = False +# Filter the list of dags by owner name (requires authentication to be enabled) +filter_by_owner = False [smtp] +# If you want airflow to send emails on retries, failure, and you want to +# the airflow.utils.send_email function, you have to configure an smtp +# server here smtp_host = localhost +smtp_starttls = True smtp_user = airflow smtp_port = 25 smtp_password = airflow smtp_mail_from = airflow@airflow.com [celery] +# This section only applies if you are using the CeleryExecutor in +# [core] section above +# The app name that will be used by celery celery_app_name = airflow.executors.celery_executor +# The concurrency that will be used when starting workers with the +# "airflow worker" command. This defines the number of task instances that +# a worker will take, so size up your workers based on the resources on +# your worker box and the nature of your tasks celeryd_concurrency = 16 +# When you start an airflow worker, airflow starts a tiny web server +# subprocess to serve the workers local log files to the airflow main +# web server, who then builds pages and sends them to users. This defines +# the port on which the logs are served. It needs to be unused, and open +# visible from the main web server to connect into the workers. worker_log_server_port = 8793 +# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally +# a sqlalchemy database. Refer to the Celery documentation for more +# information. broker_url = amqp://airflow:airflow@rabbitmq:5672/airflow +# Another key Celery setting celery_result_backend = amqp://airflow:airflow@rabbitmq:5672/airflow -flower_port = 8383 +# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start +# it `airflow flower`. This defines the port that Celery Flower runs on +flower_port = 5555 +# Default queue that tasks get assigned to and that worker listen on. default_queue = default [scheduler] +# Task instances listen for external kill signal (when you clear tasks +# from the CLI or the UI), this defines the frequency at which they should +# listen (in seconds). job_heartbeat_sec = 5 +# The scheduler constantly tries to trigger new tasks (look at the +# scheduler section in the docs for more information). This defines +# how often the scheduler should run (in seconds). scheduler_heartbeat_sec = 5 # Statsd (https://github.com/etsy/statsd) integration settings # statsd_on = False From b22217e643a5e2a6aec6c88b997b0357d99c2a4d Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 8 Sep 2015 16:29:46 +0200 Subject: [PATCH 011/163] Add circle.yml --- circle.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 circle.yml diff --git a/circle.yml b/circle.yml new file mode 100644 index 0000000000..413466747f --- /dev/null +++ b/circle.yml @@ -0,0 +1,13 @@ +machine: + services: + - docker + +dependencies: + override: + - docker build -t puckel/docker-airflow . + +test: + pre: + - sleep 5 + override: + - docker run puckel/docker-airflow From 25dfe7025940c3ab6ad51d4b61bce2bdd07a0d02 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 8 Sep 2015 16:48:34 +0200 Subject: [PATCH 012/163] Update circle.yml --- circle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/circle.yml b/circle.yml index 413466747f..a83efdb3da 100644 --- a/circle.yml +++ b/circle.yml @@ -10,4 +10,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow + - docker run puckel/docker-airflow version From 295ef8ab7d4ec40839afb7689dae42d7c8b3f0bb Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 8 Sep 2015 17:02:39 +0200 Subject: [PATCH 013/163] Add CircleCI status badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 082d94d6b1..50cd57b95c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Airflow Dockerfile +[![Circle CI](https://circleci.com/gh/puckel/docker-airflow.svg?style=svg)](https://circleci.com/gh/puckel/docker-airflow) This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). From 21eb19aafb47a028e289d33558744310256a6973 Mon Sep 17 00:00:00 2001 From: Roger Lam Date: Thu, 10 Sep 2015 11:38:27 -0700 Subject: [PATCH 014/163] Add scheduler to docker-compose.yml and have it wait until mysql is ready when starting containers --- docker-compose.yml | 8 ++++++++ script/entrypoint.sh | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index edcac213fd..e95ff9dbd8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -45,3 +45,11 @@ worker: - mysqldb:mysqldb - rabbitmq:rabbitmq command: worker + +scheduler: + image: puckel/docker-airflow + restart: always + links: + - mysqldb:mysqldb + - rabbitmq:rabbitmq + command: scheduler diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 719a97f049..0a65dd4a8d 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -7,7 +7,7 @@ FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fer sed -i "s/{FERNET_KEY}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg -if [ "$@" = "webserver" ]; then +if [ "$@" = "webserver" ] || [ "$@" = "scheduler" ] ; then #wait for mysql DB_LOOPS="20" MYSQL_HOST="mysqldb" From 1ff4ba6e310901eae3af5006a7859da2ccdc5101 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Sat, 12 Sep 2015 10:42:05 +0300 Subject: [PATCH 015/163] Add a check for rabbitmq availability at startup --- Dockerfile | 1 + script/entrypoint.sh | 27 +++++++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6dfc0bd145..571f89a4c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,7 @@ ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages RUN apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ netcat \ + curl \ python-pip \ python-dev \ libmysqlclient-dev \ diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 0a65dd4a8d..345f1db1d3 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -1,17 +1,28 @@ -#!/bin/sh +#!/usr/bin/env bash CMD="airflow" - -# Generate Fernet key +DB_LOOPS="10" +MYSQL_HOST="mysqldb" +MYSQL_PORT="3306" +RABBITMQ_HOST="rabbitmq" +RABBITMQ_CREDS="airflow:airflow" FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY") +# Generate Fernet key sed -i "s/{FERNET_KEY}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg -if [ "$@" = "webserver" ] || [ "$@" = "scheduler" ] ; then +# wait for rabbitmq +while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do + echo "$(date) - waiting for RabbitMQ..." + sleep 2 +done + +if [ "$@" = "flower" ]; then + sleep 10 +fi + +if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; then #wait for mysql - DB_LOOPS="20" - MYSQL_HOST="mysqldb" - MYSQL_PORT="3306" i=0 while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do i=`expr $i + 1` @@ -22,7 +33,7 @@ if [ "$@" = "webserver" ] || [ "$@" = "scheduler" ] ; then echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}..." sleep 1 done - sleep 15 + sleep 2 $CMD initdb fi From 10578f43d71148d0e94e0f45f4268c1cb3c125a4 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Sat, 12 Sep 2015 11:01:22 +0300 Subject: [PATCH 016/163] Fix previous commit --- script/entrypoint.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 345f1db1d3..559e3e3373 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -12,17 +12,24 @@ FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fer sed -i "s/{FERNET_KEY}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg # wait for rabbitmq -while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do - echo "$(date) - waiting for RabbitMQ..." - sleep 2 -done - +if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] || [ "$@" = "flower" ] ; then + j=0 + while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do + j=`expr $j + 1` + if [ $j -ge $DB_LOOPS ]; then + echo "$(date) - $RABBITMQ_HOST still not reachable, giving up" + exit 1 + fi + echo "$(date) - waiting for RabbitMQ..." + sleep 2 + done +fi if [ "$@" = "flower" ]; then sleep 10 fi +# wait for DB if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; then - #wait for mysql i=0 while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do i=`expr $i + 1` From 851c2a67441e0f19802598d519d2dbe5fc73128a Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 5 Oct 2015 16:26:58 +0200 Subject: [PATCH 017/163] Add a user airflow --- Dockerfile | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 571f89a4c1..03185cbecb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.0 +# VERSION 1.1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow @@ -15,9 +15,11 @@ ENV INITRD No ENV AIRFLOW_VERSION 1.5.1 ENV AIRFLOW_HOME /usr/local/airflow -ENV C_FORCE_ROOT true ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages +# Add airflow user +RUN useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow + RUN apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ netcat \ @@ -44,12 +46,15 @@ RUN apt-get update -yqq \ /usr/share/doc \ /usr/share/doc-base -ADD config/airflow.cfg $AIRFLOW_HOME/airflow.cfg -ADD script/entrypoint.sh /root/entrypoint.sh -RUN chmod +x /root/entrypoint.sh +ADD script/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh +ADD config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg + +RUN \ + chown -R airflow: ${AIRFLOW_HOME} \ + && chmod +x ${AIRFLOW_HOME}/entrypoint.sh -EXPOSE 8080 -EXPOSE 5555 -EXPOSE 8793 +EXPOSE 8080 5555 8793 -ENTRYPOINT ["/root/entrypoint.sh"] +USER airflow +WORKDIR ${AIRFLOW_HOME} +ENTRYPOINT ["./entrypoint.sh"] From 3d4a55878a1f7543e7f5723ce6620d5aed9a0320 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 5 Oct 2015 16:28:17 +0200 Subject: [PATCH 018/163] Use container_name esthetic commit --- docker-compose.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index e95ff9dbd8..13657c20cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,5 @@ mysqldb: + container_name: mysqldb image: tutum/mysql restart: always ports: @@ -9,6 +10,7 @@ mysqldb: - ON_CREATE_DB=airflow rabbitmq: + container_name: rabbitmq image: puckel/docker-rabbitmq restart: always ports: @@ -20,6 +22,7 @@ rabbitmq: - RABBITMQ_VHOST=airflow webserver: + container_name: webserver image: puckel/docker-airflow restart: always ports: @@ -27,9 +30,11 @@ webserver: links: - mysqldb:mysqldb - rabbitmq:rabbitmq + - worker:worker command: webserver flower: + container_name: flower image: puckel/docker-airflow restart: always ports: @@ -39,14 +44,18 @@ flower: command: flower worker: + container_name: worker image: puckel/docker-airflow restart: always + ports: + - "8793:8793" links: - mysqldb:mysqldb - rabbitmq:rabbitmq command: worker scheduler: + container_name: scheduler image: puckel/docker-airflow restart: always links: From 8b88292c2d4c2e4a1f88b050b7350273b550ba22 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 5 Oct 2015 16:29:53 +0200 Subject: [PATCH 019/163] Use curly brackets in Dockerfile --- Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 03185cbecb..fb46a63512 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,11 +32,9 @@ RUN apt-get update -yqq \ libssl-dev \ libffi-dev \ build-essential \ - && mkdir -p $AIRFLOW_HOME/logs \ - && mkdir $AIRFLOW_HOME/dags \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" cryptography \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==$AIRFLOW_VERSION \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==$AIRFLOW_VERSION \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==${AIRFLOW_VERSION} \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==${AIRFLOW_VERSION} \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ From dc8fca26587a81b3a170298e11bc05ffe70280e5 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 5 Oct 2015 16:30:20 +0200 Subject: [PATCH 020/163] Fix issue #6 --- config/airflow.cfg | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/airflow.cfg b/config/airflow.cfg index a367014952..53a959ab39 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -1,11 +1,11 @@ [core] # The home folder for airflow, default is ~/airflow -airflow_home = {AIRFLOW_HOME} +airflow_home = /usr/local/airflow # The folder where your airflow pipelines live, most likely a # subfolder in a code repository -dags_folder = {AIRFLOW_HOME}/dags +dags_folder = /usr/local/airflow/dags # The folder where airflow should store its log files -base_log_folder = {AIRFLOW_HOME}/logs +base_log_folder = /usr/local/airflow/logs # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor executor = CeleryExecutor @@ -22,7 +22,7 @@ parallelism = 32 # environment load_examples = True # Where your Airflow plugins are stored -plugins_folder = {AIRFLOW_HOME}/plugins +plugins_folder = /usr/local/airflow/plugins # Secret key to save connection passwords in the db fernet_key = {FERNET_KEY} From 9e21d104cb542e5471e141998007d102582d22af Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 5 Oct 2015 18:40:20 +0200 Subject: [PATCH 021/163] Add AIRFLOW_HOME ENV_VARIABLE in docker-compose.yml --- docker-compose.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 13657c20cf..fadab96014 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,6 +25,8 @@ webserver: container_name: webserver image: puckel/docker-airflow restart: always + environment: + - AIRFLOW_HOME=/usr/local/airflow ports: - "8080:8080" links: @@ -37,6 +39,8 @@ flower: container_name: flower image: puckel/docker-airflow restart: always + environment: + - AIRFLOW_HOME=/usr/local/airflow ports: - "5555:5555" links: @@ -47,6 +51,8 @@ worker: container_name: worker image: puckel/docker-airflow restart: always + environment: + - AIRFLOW_HOME=/usr/local/airflow ports: - "8793:8793" links: @@ -58,6 +64,8 @@ scheduler: container_name: scheduler image: puckel/docker-airflow restart: always + environment: + - AIRFLOW_HOME=/usr/local/airflow links: - mysqldb:mysqldb - rabbitmq:rabbitmq From 63b87548b5ae84ce9d8c04983b5be4d7f12caa07 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 16 Oct 2015 16:13:42 +0200 Subject: [PATCH 022/163] Example to add custom dags with a volume definition --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index fadab96014..c4d2413f9a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,6 +25,8 @@ webserver: container_name: webserver image: puckel/docker-airflow restart: always + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags environment: - AIRFLOW_HOME=/usr/local/airflow ports: From ca7c38eb25cfa9742ad917f8fcf0ed62c3a27131 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 16 Oct 2015 16:14:13 +0200 Subject: [PATCH 023/163] Remove trailing whitespace --- docker-compose.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index c4d2413f9a..4e83da7dc3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,7 +38,7 @@ webserver: command: webserver flower: - container_name: flower + container_name: flower image: puckel/docker-airflow restart: always environment: @@ -50,7 +50,7 @@ flower: command: flower worker: - container_name: worker + container_name: worker image: puckel/docker-airflow restart: always environment: @@ -63,7 +63,7 @@ worker: command: worker scheduler: - container_name: scheduler + container_name: scheduler image: puckel/docker-airflow restart: always environment: From a35e560dc9dd79682802cdbe91261c4dfc75a3fd Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 26 Oct 2015 17:29:08 +0100 Subject: [PATCH 024/163] Bump to v1.5.2 --- Dockerfile | 3 +- config/airflow.cfg | 73 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index fb46a63512..19f88e28bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No -ENV AIRFLOW_VERSION 1.5.1 +ENV AIRFLOW_VERSION 1.5.2 ENV AIRFLOW_HOME /usr/local/airflow ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages @@ -34,6 +34,7 @@ RUN apt-get update -yqq \ build-essential \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" cryptography \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==${AIRFLOW_VERSION} \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[celery]==${AIRFLOW_VERSION} \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==${AIRFLOW_VERSION} \ && apt-get clean \ && rm -rf \ diff --git a/config/airflow.cfg b/config/airflow.cfg index 53a959ab39..3092e9bc68 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -1,46 +1,66 @@ [core] # The home folder for airflow, default is ~/airflow -airflow_home = /usr/local/airflow +airflow_home = /usr/local/airflow + # The folder where your airflow pipelines live, most likely a # subfolder in a code repository dags_folder = /usr/local/airflow/dags + # The folder where airflow should store its log files base_log_folder = /usr/local/airflow/logs + # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor -executor = CeleryExecutor +executor = SequentialExecutor + # The SqlAlchemy connection string to the metadata database. # SqlAlchemy supports many different database engine, more information # their website -sql_alchemy_conn = mysql://airflow:airflow@mysqldb/airflow +sql_alchemy_conn = mysql://airflow:airflow@mysql/airflow + # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously # on this airflow installation parallelism = 32 + # Whether to load the examples that ship with Airflow. It's good to # get started, but you probably want to set this to False in a production # environment load_examples = True + # Where your Airflow plugins are stored plugins_folder = /usr/local/airflow/plugins + # Secret key to save connection passwords in the db -fernet_key = {FERNET_KEY} +fernet_key = $FERNET_KEY + +# Whether to disable pickling dags +donot_pickle = False [webserver] # The base url of your website as airflow cannot guess what domain or # cname you are using. This is use in automated emails that # airflow sends to point links to the right web server base_url = http://localhost:8080 + # The ip specified when starting the web server web_server_host = 0.0.0.0 + # The port on which to run the web server web_server_port = 8080 + # Secret key used to run your flask app secret_key = temporary_key + +# number of threads to run the Gunicorn web server +thread = 4 + # Expose the configuration file in the web server expose_config = true + # Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication authenticate = False + # Filter the list of dags by owner name (requires authentication to be enabled) filter_by_owner = False @@ -58,28 +78,35 @@ smtp_mail_from = airflow@airflow.com [celery] # This section only applies if you are using the CeleryExecutor in # [core] section above + # The app name that will be used by celery celery_app_name = airflow.executors.celery_executor + # The concurrency that will be used when starting workers with the # "airflow worker" command. This defines the number of task instances that # a worker will take, so size up your workers based on the resources on # your worker box and the nature of your tasks celeryd_concurrency = 16 + # When you start an airflow worker, airflow starts a tiny web server # subprocess to serve the workers local log files to the airflow main # web server, who then builds pages and sends them to users. This defines # the port on which the logs are served. It needs to be unused, and open # visible from the main web server to connect into the workers. worker_log_server_port = 8793 + # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally # a sqlalchemy database. Refer to the Celery documentation for more # information. broker_url = amqp://airflow:airflow@rabbitmq:5672/airflow + # Another key Celery setting celery_result_backend = amqp://airflow:airflow@rabbitmq:5672/airflow + # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start # it `airflow flower`. This defines the port that Celery Flower runs on flower_port = 5555 + # Default queue that tasks get assigned to and that worker listen on. default_queue = default @@ -88,12 +115,50 @@ default_queue = default # from the CLI or the UI), this defines the frequency at which they should # listen (in seconds). job_heartbeat_sec = 5 + # The scheduler constantly tries to trigger new tasks (look at the # scheduler section in the docs for more information). This defines # how often the scheduler should run (in seconds). scheduler_heartbeat_sec = 5 + # Statsd (https://github.com/etsy/statsd) integration settings # statsd_on = False # statsd_host = localhost # statsd_port = 8125 # statsd_prefix = airflow + +[mesos] +# Mesos master address which MesosExecutor will connect to. +master = localhost:5050 + +# The framework name which Airflow scheduler will register itself as on mesos +framework_name = Airflow + +# Number of cpu cores required for running one task instance using +# 'airflow run --local -p ' +# command on a mesos slave +task_cpu = 1 + +# Memory in MB required for running one task instance using +# 'airflow run --local -p ' +# command on a mesos slave +task_memory = 256 + +# Enable framework checkpointing for mesos +# See http://mesos.apache.org/documentation/latest/slave-recovery/ +checkpoint = False + +# Failover timeout in milliseconds. +# When checkpointing is enabled and this option is set, Mesos waits until the configured timeout for +# the MesosExecutor framework to re-register after a failover. Mesos shuts down running tasks if the +# MesosExecutor framework fails to re-register within this timeframe. +# failover_timeout = 604800 + +# Enable framework authentication for mesos +# See http://mesos.apache.org/documentation/latest/configuration/ +authenticate = False + +# Mesos credentials, if authentication is enabled +# default_principal = admin +# default_secret = admin + From 013869f0b1c6a57e818d74f8f81afcac0b56f7e9 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 26 Oct 2015 17:30:11 +0100 Subject: [PATCH 025/163] Rename mysqldb to mysql --- docker-compose.yml | 4 ++-- script/entrypoint.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 4e83da7dc3..6f81f38de4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,5 @@ -mysqldb: - container_name: mysqldb +mysql: + container_name: mysql image: tutum/mysql restart: always ports: diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 559e3e3373..af21d0d9c4 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -2,7 +2,7 @@ CMD="airflow" DB_LOOPS="10" -MYSQL_HOST="mysqldb" +MYSQL_HOST="mysql" MYSQL_PORT="3306" RABBITMQ_HOST="rabbitmq" RABBITMQ_CREDS="airflow:airflow" From a33b0102a1dcc19cd8d9960d3ef7a584cd1705ba Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 26 Oct 2015 17:31:06 +0100 Subject: [PATCH 026/163] Move to the official rabbitmq container --- docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6f81f38de4..bfac5f8e44 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,15 +11,15 @@ mysql: rabbitmq: container_name: rabbitmq - image: puckel/docker-rabbitmq + image: rabbitmq:3-management restart: always ports: - "15672:15672" - "5672:5672" environment: - - RABBITMQ_USER=airflow - - RABBITMQ_PASSWORD=airflow - - RABBITMQ_VHOST=airflow + - RABBITMQ_DEFAULT_USER=airflow + - RABBITMQ_DEFAULT_PASS=airflow + - RABBITMQ_DEFAULT_VHOST=airflow webserver: container_name: webserver From a11b25f7eaf36730ebfa71cf220bc148c07d36a8 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 26 Oct 2015 17:32:00 +0100 Subject: [PATCH 027/163] Fix mysql links --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index bfac5f8e44..75a2fb68ba 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,7 +32,7 @@ webserver: ports: - "8080:8080" links: - - mysqldb:mysqldb + - mysql:mysql - rabbitmq:rabbitmq - worker:worker command: webserver From f39f1c3f88d207d8452bc2bbc7df1a80de5a0571 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 26 Oct 2015 17:35:28 +0100 Subject: [PATCH 028/163] Add loop counter in init script --- script/entrypoint.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index af21d0d9c4..995b36630a 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -20,10 +20,11 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] || [ echo "$(date) - $RABBITMQ_HOST still not reachable, giving up" exit 1 fi - echo "$(date) - waiting for RabbitMQ..." + echo "$(date) - waiting for RabbitMQ... $j/$DB_LOOPS" sleep 2 done fi + if [ "$@" = "flower" ]; then sleep 10 fi @@ -37,7 +38,7 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; the echo "$(date) - ${MYSQL_HOST}:${MYSQL_PORT} still not reachable, giving up" exit 1 fi - echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}..." + echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$DB_LOOPS" sleep 1 done sleep 2 From 1434f70d014864f6a7e928f78974fe7662111bb5 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 26 Oct 2015 17:35:52 +0100 Subject: [PATCH 029/163] Limit the initdb command to webserver container --- script/entrypoint.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 995b36630a..815dabdf97 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -41,8 +41,11 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; the echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$DB_LOOPS" sleep 1 done - sleep 2 - $CMD initdb + if [ "$@" = "webserver" ]; then + echo "Initialize database..." + $CMD initdb + fi + sleep 5 fi exec $CMD "$@" From f45be11fde8b32705b925931b6672af98693c3f8 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 26 Oct 2015 17:38:12 +0100 Subject: [PATCH 030/163] Fix links between container --- docker-compose.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 75a2fb68ba..f1225e4ae4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -35,6 +35,7 @@ webserver: - mysql:mysql - rabbitmq:rabbitmq - worker:worker + - scheduler:scheduler command: webserver flower: @@ -58,7 +59,6 @@ worker: ports: - "8793:8793" links: - - mysqldb:mysqldb - rabbitmq:rabbitmq command: worker @@ -68,7 +68,4 @@ scheduler: restart: always environment: - AIRFLOW_HOME=/usr/local/airflow - links: - - mysqldb:mysqldb - - rabbitmq:rabbitmq command: scheduler From 7461a59c039ef8d2a1a944d026f74a754ee915ce Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 28 Oct 2015 10:36:11 +0100 Subject: [PATCH 031/163] Bump circle to v1.5.2 --- circle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/circle.yml b/circle.yml index a83efdb3da..4d9eb3a4a4 100644 --- a/circle.yml +++ b/circle.yml @@ -10,4 +10,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version + - docker run puckel/docker-airflow version |grep '1.5.2' From e4422b8c70912eee59346fbb13e0a16d4f580275 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 13 Nov 2015 16:05:07 +0100 Subject: [PATCH 032/163] Change to v1.6.0 --- Dockerfile | 3 +-- circle.yml | 2 +- config/airflow.cfg | 22 +++++++++++++++++----- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 19f88e28bb..ff78f7beb0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,8 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No - -ENV AIRFLOW_VERSION 1.5.2 +ENV AIRFLOW_VERSION 1.6.0 ENV AIRFLOW_HOME /usr/local/airflow ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages diff --git a/circle.yml b/circle.yml index 4d9eb3a4a4..c8c93dac47 100644 --- a/circle.yml +++ b/circle.yml @@ -10,4 +10,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.5.2' + - docker run puckel/docker-airflow version |grep '1.6.0' diff --git a/config/airflow.cfg b/config/airflow.cfg index 3092e9bc68..b3ffe25ac9 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -6,12 +6,15 @@ airflow_home = /usr/local/airflow # subfolder in a code repository dags_folder = /usr/local/airflow/dags -# The folder where airflow should store its log files +# The folder where airflow should store its log files. This location base_log_folder = /usr/local/airflow/logs +# An S3 location can be provided for log backups +# For S3, use the full URL to the base folder (starting with "s3://...") +s3_log_folder = None # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor -executor = SequentialExecutor +executor = CeleryExecutor # The SqlAlchemy connection string to the metadata database. # SqlAlchemy supports many different database engine, more information @@ -23,6 +26,12 @@ sql_alchemy_conn = mysql://airflow:airflow@mysql/airflow # on this airflow installation parallelism = 32 +# The number of task instances allowed to run concurrently by the scheduler +dag_concurrency = 16 + +# The maximum number of active DAG runs per DAG +max_active_runs_per_dag = 16 + # Whether to load the examples that ship with Airflow. It's good to # get started, but you probably want to set this to False in a production # environment @@ -52,8 +61,12 @@ web_server_port = 8080 # Secret key used to run your flask app secret_key = temporary_key -# number of threads to run the Gunicorn web server -thread = 4 +# Number of workers to run the Gunicorn web server +workers = 4 + +# The worker class gunicorn should use. Choices include +# sync (default), eventlet, gevent +worker_class = sync # Expose the configuration file in the web server expose_config = true @@ -161,4 +174,3 @@ authenticate = False # Mesos credentials, if authentication is enabled # default_principal = admin # default_secret = admin - From 1dac2d89562bd607b5b6354b9592556c82b4992b Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 13 Nov 2015 16:05:41 +0100 Subject: [PATCH 033/163] Update README --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 50cd57b95c..ee7697f7ef 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/a * Based on Debian Wheezy official Image [debian:wheezy](https://registry.hub.docker.com/_/debian/) * Install [Docker](https://www.docker.com/) -* Install [Docker-compose](https://docs.docker.com/compose/install/) +* Install [Docker Compose](https://docs.docker.com/compose/install/) ## Installation @@ -21,10 +21,12 @@ For example, if you need to install [Extra Packages](http://pythonhosted.org/air # Usage -Start the stack (mysql, rabbitmq, airflow-webserver, airflow-flower & airflow-worker) : +Start the stack (mysql, rabbitmq, airflow-webserver, airflow-scheduler airflow-flower & airflow-worker) : docker-compose up -d +Check [Airflow Documentation](http://pythonhosted.org/airflow/) + ## UI Links - Airflow: [localhost:8080](http://localhost:8080/) @@ -33,9 +35,6 @@ Start the stack (mysql, rabbitmq, airflow-webserver, airflow-flower & airflow-wo (with boot2docker, use: open http://$(boot2docker ip):8080) -## To scale the number of workers - - docker-compose scale worker=5 ## Run the test "tutorial" From 4952ca436f703f5954f1d306c1d06aed4f0e186a Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 13 Nov 2015 16:06:22 +0100 Subject: [PATCH 034/163] Remove container name in docker-compose.yml --- docker-compose.yml | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f1225e4ae4..737b55b85d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,4 @@ mysql: - container_name: mysql image: tutum/mysql restart: always ports: @@ -10,7 +9,6 @@ mysql: - ON_CREATE_DB=airflow rabbitmq: - container_name: rabbitmq image: rabbitmq:3-management restart: always ports: @@ -22,7 +20,6 @@ rabbitmq: - RABBITMQ_DEFAULT_VHOST=airflow webserver: - container_name: webserver image: puckel/docker-airflow restart: always # volumes: @@ -39,7 +36,6 @@ webserver: command: webserver flower: - container_name: flower image: puckel/docker-airflow restart: always environment: @@ -50,22 +46,24 @@ flower: - rabbitmq:rabbitmq command: flower -worker: - container_name: worker +scheduler: image: puckel/docker-airflow restart: always environment: - AIRFLOW_HOME=/usr/local/airflow - ports: - - "8793:8793" links: + - mysql:mysql - rabbitmq:rabbitmq - command: worker + command: scheduler -scheduler: - container_name: scheduler +worker: image: puckel/docker-airflow restart: always environment: - AIRFLOW_HOME=/usr/local/airflow - command: scheduler + ports: + - "8793:8793" + links: + - mysql:mysql + - rabbitmq:rabbitmq + command: worker From d86cfc1826effc64cdb1cabdecc9ecf0f544f734 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 13 Nov 2015 16:06:57 +0100 Subject: [PATCH 035/163] Rename DB_LOOP to TRY_LOOP --- script/entrypoint.sh | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 815dabdf97..55acbfb5c5 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash CMD="airflow" -DB_LOOPS="10" +TRY_LOOP="10" MYSQL_HOST="mysql" MYSQL_PORT="3306" RABBITMQ_HOST="rabbitmq" @@ -16,30 +16,26 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] || [ j=0 while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do j=`expr $j + 1` - if [ $j -ge $DB_LOOPS ]; then + if [ $j -ge $TRY_LOOP ]; then echo "$(date) - $RABBITMQ_HOST still not reachable, giving up" exit 1 fi - echo "$(date) - waiting for RabbitMQ... $j/$DB_LOOPS" - sleep 2 + echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP" + sleep 5 done fi -if [ "$@" = "flower" ]; then - sleep 10 -fi - # wait for DB if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; then i=0 while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do i=`expr $i + 1` - if [ $i -ge $DB_LOOPS ]; then + if [ $i -ge $TRY_LOOP ]; then echo "$(date) - ${MYSQL_HOST}:${MYSQL_PORT} still not reachable, giving up" exit 1 fi - echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$DB_LOOPS" - sleep 1 + echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$TRY_LOOP" + sleep 5 done if [ "$@" = "webserver" ]; then echo "Initialize database..." From f6c70b43ed8da9a875cc60afcf8b8d9e9d90b591 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 13 Nov 2015 16:07:27 +0100 Subject: [PATCH 036/163] Set locale --- Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Dockerfile b/Dockerfile index ff78f7beb0..941ee6d8f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,12 @@ ENV AIRFLOW_VERSION 1.6.0 ENV AIRFLOW_HOME /usr/local/airflow ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages +# Set the locale +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + # Add airflow user RUN useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow From d60f65af408518132f10a6a37e7a5fcc81fe53a2 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 13 Nov 2015 16:15:23 +0100 Subject: [PATCH 037/163] Remove locale definition --- Dockerfile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 941ee6d8f9..ff78f7beb0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,12 +16,6 @@ ENV AIRFLOW_VERSION 1.6.0 ENV AIRFLOW_HOME /usr/local/airflow ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages -# Set the locale -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - # Add airflow user RUN useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow From ec3c850d189755ffc1ffbb82d9f4489339b4864c Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 16 Nov 2015 11:40:53 +0100 Subject: [PATCH 038/163] v1.6.1 --- Dockerfile | 2 +- circle.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index ff78f7beb0..5d8632c6b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No -ENV AIRFLOW_VERSION 1.6.0 +ENV AIRFLOW_VERSION 1.6.1 ENV AIRFLOW_HOME /usr/local/airflow ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages diff --git a/circle.yml b/circle.yml index c8c93dac47..28d2e1c1ca 100644 --- a/circle.yml +++ b/circle.yml @@ -10,4 +10,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.6.0' + - docker run puckel/docker-airflow version |grep '1.6.1' From ac2515c43f38f3dbd1f6139391989c9ce0a91ce1 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 16 Nov 2015 11:45:57 +0100 Subject: [PATCH 039/163] Update readme for adhoc query informations --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index ee7697f7ef..507da9b32d 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,13 @@ Start the stack (mysql, rabbitmq, airflow-webserver, airflow-scheduler airflow-f docker-compose up -d +If you want to use Ad hoc query, make sure you've configured connections : +Go to Admin -> Connections and Edit "mysql_default" set this values (equivalent to values in airflow.cfg/docker-compose.yml) : +- Host : mysql +- Schema : airflow +- Login : airflow +- Password : airflow + Check [Airflow Documentation](http://pythonhosted.org/airflow/) ## UI Links From 6ea438ee9f01a51a6f9c4c276b6efd1e0aecc16b Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 7 Dec 2015 15:41:04 +0100 Subject: [PATCH 040/163] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 507da9b32d..9394d8cc23 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Airflow Dockerfile -[![Circle CI](https://circleci.com/gh/puckel/docker-airflow.svg?style=svg)](https://circleci.com/gh/puckel/docker-airflow) +Circle CI : [![](https://circleci.com/gh/puckel/docker-airflow.svg?style=svg)](https://circleci.com/gh/puckel/docker-airflow) + +ImageLayers : [![](https://badge.imagelayers.io/puckel/docker-airflow:latest.svg)](https://imagelayers.io/?images=puckel/docker-airflow:latest) This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). @@ -8,6 +10,7 @@ This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/a * Based on Debian Wheezy official Image [debian:wheezy](https://registry.hub.docker.com/_/debian/) * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) +* Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/airflow) ## Installation From cdd3b5213ca0c902966e4b5f1de56a645ee76f74 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 7 Jan 2016 11:07:29 +0100 Subject: [PATCH 041/163] v1.6.2 --- Dockerfile | 4 ++-- circle.yml | 2 +- config/airflow.cfg | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5d8632c6b3..9c418b1d14 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.1 +# VERSION 1.6.2 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Work around initramfs-tools running on kernel 'upgrade': ENV INITRD No -ENV AIRFLOW_VERSION 1.6.1 +ENV AIRFLOW_VERSION 1.6.2 ENV AIRFLOW_HOME /usr/local/airflow ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages diff --git a/circle.yml b/circle.yml index 28d2e1c1ca..c4f32748e5 100644 --- a/circle.yml +++ b/circle.yml @@ -10,4 +10,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.6.1' + - docker run puckel/docker-airflow version |grep '1.6.2' diff --git a/config/airflow.cfg b/config/airflow.cfg index b3ffe25ac9..7802b868c0 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -86,7 +86,7 @@ smtp_starttls = True smtp_user = airflow smtp_port = 25 smtp_password = airflow -smtp_mail_from = airflow@airflow.com +smtp_mail_from = airflow@airflow.local [celery] # This section only applies if you are using the CeleryExecutor in From 08dcfdf17677f9b6715dd00dc3b9c3d3ffa8780f Mon Sep 17 00:00:00 2001 From: "Jonathon W. Marshall" Date: Fri, 8 Jan 2016 11:52:48 -0500 Subject: [PATCH 042/163] fix some ssl/certificate errors --- Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile b/Dockerfile index 9c418b1d14..b4b1995a48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,7 @@ RUN useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow RUN apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ + ca-certificates \ netcat \ curl \ python-pip \ @@ -32,6 +33,9 @@ RUN apt-get update -yqq \ libffi-dev \ build-essential \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" cryptography \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" pyOpenSSL \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" ndg-httpsclient \ + && pip install --install-option="--install-purelib=$PYTHONLIBPATH" pyasn1 \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==${AIRFLOW_VERSION} \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[celery]==${AIRFLOW_VERSION} \ && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==${AIRFLOW_VERSION} \ From d635f47c4fa50dab9d5437d21697b01e04c0392a Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 28 Jan 2016 10:40:50 +0100 Subject: [PATCH 043/163] Migrate to Debian Jessie --- Dockerfile | 41 +++++++++++++++++++++++++---------------- docker-compose.yml | 11 ++++++++--- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index b4b1995a48..901d354797 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,27 +1,30 @@ -# VERSION 1.6.2 +# VERSION 1.6.2-1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow # SOURCE: https://github.com/puckel/docker-airflow -FROM debian:wheezy +FROM debian:jessie MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages ENV DEBIAN_FRONTEND noninteractive ENV TERM linux -# Work around initramfs-tools running on kernel 'upgrade': -ENV INITRD No ENV AIRFLOW_VERSION 1.6.2 ENV AIRFLOW_HOME /usr/local/airflow -ENV PYTHONLIBPATH /usr/lib/python2.7/dist-packages -# Add airflow user -RUN useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow +# Define en_US. +ENV LANGUAGE en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 +ENV LC_CTYPE en_US.UTF-8 +ENV LC_MESSAGES en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 -RUN apt-get update -yqq \ +RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sources.list.d/backports.list \ + && apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ - ca-certificates \ + apt-utils\ netcat \ curl \ python-pip \ @@ -32,13 +35,19 @@ RUN apt-get update -yqq \ libssl-dev \ libffi-dev \ build-essential \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" cryptography \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" pyOpenSSL \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" ndg-httpsclient \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" pyasn1 \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow==${AIRFLOW_VERSION} \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[celery]==${AIRFLOW_VERSION} \ - && pip install --install-option="--install-purelib=$PYTHONLIBPATH" airflow[mysql]==${AIRFLOW_VERSION} \ + locales \ + && apt-get install -yqq -t jessie-backports python-requests \ + && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ + && locale-gen \ + && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ + && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ + && pip install cryptography \ + && pip install pyOpenSSL \ + && pip install ndg-httpsclient \ + && pip install pyasn1 \ + && pip install airflow==${AIRFLOW_VERSION} \ + && pip install airflow[celery]==${AIRFLOW_VERSION} \ + && pip install airflow[mysql]==${AIRFLOW_VERSION} \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ diff --git a/docker-compose.yml b/docker-compose.yml index 737b55b85d..63af4a8721 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,12 +1,13 @@ mysql: - image: tutum/mysql + image: mysql restart: always ports: - "3306:3306" environment: + - MYSQL_RANDOM_ROOT_PASSWORD=true - MYSQL_USER=airflow - - MYSQL_PASS=airflow - - ON_CREATE_DB=airflow + - MYSQL_PASSWORD=airflow + - MYSQL_DATABASE=airflow rabbitmq: image: rabbitmq:3-management @@ -49,6 +50,8 @@ flower: scheduler: image: puckel/docker-airflow restart: always + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags environment: - AIRFLOW_HOME=/usr/local/airflow links: @@ -59,6 +62,8 @@ scheduler: worker: image: puckel/docker-airflow restart: always + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags environment: - AIRFLOW_HOME=/usr/local/airflow ports: From 3d382d6afa398f04f350ea3e2c55f9666054c586 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 28 Jan 2016 13:22:51 +0100 Subject: [PATCH 044/163] Update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9394d8cc23..5c64b9a0bb 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/a ## Informations -* Based on Debian Wheezy official Image [debian:wheezy](https://registry.hub.docker.com/_/debian/) +* Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/airflow) From 15645b7f3f989376d82a66323fd54edee687241c Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 24 Mar 2016 13:03:52 +0100 Subject: [PATCH 045/163] Fix issue #19 --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 901d354797..7684788b61 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.6.2-1 +# VERSION 1.6.2-2 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow @@ -41,6 +41,7 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ + && pip install pytz==2015.7 \ && pip install cryptography \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ From 70850042586d0e40244609c829f641e7ddc919d8 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 29 Mar 2016 15:56:36 +0200 Subject: [PATCH 046/163] Bump to version 1.7.0 --- Dockerfile | 5 +++-- circle.yml | 2 +- config/airflow.cfg | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7684788b61..a8bea000bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.6.2-2 +# VERSION 1.7.0 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow @@ -10,7 +10,7 @@ MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages ENV DEBIAN_FRONTEND noninteractive ENV TERM linux -ENV AIRFLOW_VERSION 1.6.2 +ENV AIRFLOW_VERSION 1.7.0 ENV AIRFLOW_HOME /usr/local/airflow # Define en_US. @@ -49,6 +49,7 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou && pip install airflow==${AIRFLOW_VERSION} \ && pip install airflow[celery]==${AIRFLOW_VERSION} \ && pip install airflow[mysql]==${AIRFLOW_VERSION} \ + && apt-get remove --purge -yqq build-essential python-pip python-dev libmysqlclient-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ diff --git a/circle.yml b/circle.yml index c4f32748e5..f741fcf0ae 100644 --- a/circle.yml +++ b/circle.yml @@ -10,4 +10,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.6.2' + - docker run puckel/docker-airflow version |grep '1.7.0' diff --git a/config/airflow.cfg b/config/airflow.cfg index 7802b868c0..a7c7ef62f8 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -21,6 +21,15 @@ executor = CeleryExecutor # their website sql_alchemy_conn = mysql://airflow:airflow@mysql/airflow +# The SqlAlchemy pool size is the maximum number of database connections +# in the pool. +sql_alchemy_pool_size = 5 + +# The SqlAlchemy pool recycle is the number of seconds a connection +# can be idle in the pool before it is invalidated. This config does +# not apply to sqlite. +sql_alchemy_pool_recycle = 3600 + # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously # on this airflow installation @@ -29,6 +38,9 @@ parallelism = 32 # The number of task instances allowed to run concurrently by the scheduler dag_concurrency = 16 +# Are DAGs paused by default at creation +dags_are_paused_at_creation = False + # The maximum number of active DAG runs per DAG max_active_runs_per_dag = 16 @@ -46,6 +58,9 @@ fernet_key = $FERNET_KEY # Whether to disable pickling dags donot_pickle = False +# How long before timing out a python file import while filling the DagBag +dagbag_import_timeout = 30 + [webserver] # The base url of your website as airflow cannot guess what domain or # cname you are using. This is use in automated emails that @@ -77,12 +92,16 @@ authenticate = False # Filter the list of dags by owner name (requires authentication to be enabled) filter_by_owner = False +[email] +email_backend = airflow.utils.send_email_smtp + [smtp] # If you want airflow to send emails on retries, failure, and you want to # the airflow.utils.send_email function, you have to configure an smtp # server here smtp_host = localhost smtp_starttls = True +smtp_ssl = False smtp_user = airflow smtp_port = 25 smtp_password = airflow From 0b2a8a2a0190b5f1d5cd4f0ee6bb4a09adbd8e99 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 31 Mar 2016 17:05:53 +0200 Subject: [PATCH 047/163] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 5c64b9a0bb..bf74c0efd1 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # Airflow Dockerfile Circle CI : [![](https://circleci.com/gh/puckel/docker-airflow.svg?style=svg)](https://circleci.com/gh/puckel/docker-airflow) -ImageLayers : [![](https://badge.imagelayers.io/puckel/docker-airflow:latest.svg)](https://imagelayers.io/?images=puckel/docker-airflow:latest) - This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). ## Informations From 050dfa185a082112391f7a565db55ce46f44aaeb Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 23 May 2016 11:09:26 +0200 Subject: [PATCH 048/163] Release v1.7.1.2 --- Dockerfile | 12 +++++++----- README.md | 2 +- circle.yml | 2 +- config/airflow.cfg | 28 +++++++++++++++++++++++----- docker-compose.yml | 2 +- 5 files changed, 33 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index a8bea000bb..0fb4a915bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.7.0 +# VERSION 1.7.1.2 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow @@ -10,7 +10,9 @@ MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages ENV DEBIAN_FRONTEND noninteractive ENV TERM linux -ENV AIRFLOW_VERSION 1.7.0 + +# Airflow +ARG AIRFLOW_VERSION=1.7.1.2 ENV AIRFLOW_HOME /usr/local/airflow # Define en_US. @@ -46,9 +48,9 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install airflow==${AIRFLOW_VERSION} \ - && pip install airflow[celery]==${AIRFLOW_VERSION} \ - && pip install airflow[mysql]==${AIRFLOW_VERSION} \ + && pip install airflow==$AIRFLOW_VERSION \ + && pip install airflow[celery]==$AIRFLOW_VERSION \ + && pip install airflow[mysql]==$AIRFLOW_VERSION \ && apt-get remove --purge -yqq build-essential python-pip python-dev libmysqlclient-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev \ && apt-get clean \ && rm -rf \ diff --git a/README.md b/README.md index bf74c0efd1..ceb1b970c6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Airflow Dockerfile Circle CI : [![](https://circleci.com/gh/puckel/docker-airflow.svg?style=svg)](https://circleci.com/gh/puckel/docker-airflow) -This repository contains **Dockerfile** of [airflow](https://github.com/airbnb/airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). +This repository contains **Dockerfile** of [airflow](https://github.com/apache/incubator-airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). ## Informations diff --git a/circle.yml b/circle.yml index f741fcf0ae..29649531ab 100644 --- a/circle.yml +++ b/circle.yml @@ -10,4 +10,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.7.0' + - docker run puckel/docker-airflow version |grep '1.7.1.2' diff --git a/config/airflow.cfg b/config/airflow.cfg index a7c7ef62f8..37594c147e 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -8,9 +8,17 @@ dags_folder = /usr/local/airflow/dags # The folder where airflow should store its log files. This location base_log_folder = /usr/local/airflow/logs -# An S3 location can be provided for log backups -# For S3, use the full URL to the base folder (starting with "s3://...") -s3_log_folder = None + +# Airflow can store logs remotely in AWS S3 or Google Cloud Storage. Users +# must supply a remote location URL (starting with either 's3://...' or +# 'gs://...') and an Airflow connection id that provides access to the storage +# location. +remote_base_log_folder = +remote_log_conn_id = +# Use server-side encryption for logs stored in S3 +encrypt_s3_logs = False +# deprecated option for remote log storage, use remote_base_log_folder instead! +# s3_log_folder = # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor @@ -73,6 +81,9 @@ web_server_host = 0.0.0.0 # The port on which to run the web server web_server_port = 8080 +# The time the gunicorn webserver waits before timing out on a worker +web_server_worker_timeout = 120 + # Secret key used to run your flask app secret_key = temporary_key @@ -159,6 +170,11 @@ scheduler_heartbeat_sec = 5 # statsd_port = 8125 # statsd_prefix = airflow +# The scheduler can run multiple threads in parallel to schedule dags. +# This defines how many threads will run. However airflow will never +# use more threads than the amount of cpu cores available. +max_threads = 2 + [mesos] # Mesos master address which MesosExecutor will connect to. master = localhost:5050 @@ -181,8 +197,10 @@ task_memory = 256 checkpoint = False # Failover timeout in milliseconds. -# When checkpointing is enabled and this option is set, Mesos waits until the configured timeout for -# the MesosExecutor framework to re-register after a failover. Mesos shuts down running tasks if the +# When checkpointing is enabled and this option is set, Mesos waits +# until the configured timeout for +# the MesosExecutor framework to re-register after a failover. Mesos +# shuts down running tasks if the # MesosExecutor framework fails to re-register within this timeframe. # failover_timeout = 604800 diff --git a/docker-compose.yml b/docker-compose.yml index 63af4a8721..00696bca56 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -57,7 +57,7 @@ scheduler: links: - mysql:mysql - rabbitmq:rabbitmq - command: scheduler + command: scheduler -n 5 worker: image: puckel/docker-airflow From 9708f5ddbafa1cff2a9dd1ad59361a6c48328ccd Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 23 May 2016 11:14:27 +0200 Subject: [PATCH 049/163] Use Docker 1.10 in CircleCI --- circle.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/circle.yml b/circle.yml index 29649531ab..be85db50b8 100644 --- a/circle.yml +++ b/circle.yml @@ -1,4 +1,6 @@ machine: + pre: + - curl -sSL https://s3.amazonaws.com/circle-downloads/install-circleci-docker.sh | bash -s -- 1.10.0 services: - docker From 8e3c321daa6badb4ce1dbfc05c0ed34985d9f966 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 30 May 2016 11:50:46 +0200 Subject: [PATCH 050/163] Update README.md badges --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ceb1b970c6..1e8dc57d41 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ -# Airflow Dockerfile -Circle CI : [![](https://circleci.com/gh/puckel/docker-airflow.svg?style=svg)](https://circleci.com/gh/puckel/docker-airflow) +# docker-airflow +[![CircleCI](https://img.shields.io/circleci/project/puckel/docker-airflow.svg?maxAge=2592000)](https://circleci.com/gh/puckel/docker-airflow) +[![Docker Hub](https://img.shields.io/badge/docker-ready-blue.svg)](https://hub.docker.com/r/puckel/docker-airflow/) +[![Docker Pulls](https://img.shields.io/docker/pulls/puckel/docker-airflow.svg?maxAge=2592000)]() +[![Docker Stars](https://img.shields.io/docker/stars/puckel/docker-airflow.svg?maxAge=2592000)]() This repository contains **Dockerfile** of [airflow](https://github.com/apache/incubator-airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). @@ -20,7 +23,7 @@ For example, if you need to install [Extra Packages](http://pythonhosted.org/air docker build --rm -t puckel/docker-airflow . -# Usage +## Usage Start the stack (mysql, rabbitmq, airflow-webserver, airflow-scheduler airflow-flower & airflow-worker) : From e08120578f5026cc2dd11cd51a66647cd2b0576a Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 2 Jun 2016 13:24:27 +0200 Subject: [PATCH 051/163] Added missing packages --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 0fb4a915bc..46fb2ee7ce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,8 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou libffi-dev \ build-essential \ locales \ + libblas-dev \ + liblapack-dev \ && apt-get install -yqq -t jessie-backports python-requests \ && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ && locale-gen \ @@ -51,6 +53,7 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou && pip install airflow==$AIRFLOW_VERSION \ && pip install airflow[celery]==$AIRFLOW_VERSION \ && pip install airflow[mysql]==$AIRFLOW_VERSION \ + && pip install airflow[hive]==$AIRFLOW_VERSION \ && apt-get remove --purge -yqq build-essential python-pip python-dev libmysqlclient-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev \ && apt-get clean \ && rm -rf \ From 3ea44916d467a45cfc8665fe5acf95f95c06903d Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 2 Jun 2016 13:27:17 +0200 Subject: [PATCH 052/163] Only use the first argument --- script/entrypoint.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 55acbfb5c5..30aee2f263 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -12,7 +12,7 @@ FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fer sed -i "s/{FERNET_KEY}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg # wait for rabbitmq -if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] || [ "$@" = "flower" ] ; then +if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then j=0 while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do j=`expr $j + 1` @@ -26,7 +26,7 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] || [ fi # wait for DB -if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; then +if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then i=0 while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do i=`expr $i + 1` @@ -37,7 +37,7 @@ if [ "$@" = "webserver" ] || [ "$@" = "worker" ] || [ "$@" = "scheduler" ] ; the echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$TRY_LOOP" sleep 5 done - if [ "$@" = "webserver" ]; then + if [ "$1" = "webserver" ]; then echo "Initialize database..." $CMD initdb fi From 8c0085af3e1d783de284e3eec0ce7e6918188b4d Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 6 Jun 2016 22:34:37 +0200 Subject: [PATCH 053/163] Replaced MySQL with Postgresql --- Dockerfile | 14 +++++--------- README.md | 19 ++++++++++++++----- config/airflow.cfg | 2 +- docker-compose.yml | 30 ++++++++++++------------------ script/entrypoint.sh | 10 +++++----- 5 files changed, 37 insertions(+), 38 deletions(-) diff --git a/Dockerfile b/Dockerfile index 46fb2ee7ce..cdd41f7808 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,7 +31,6 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou curl \ python-pip \ python-dev \ - libmysqlclient-dev \ libkrb5-dev \ libsasl2-dev \ libssl-dev \ @@ -40,7 +39,7 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou locales \ libblas-dev \ liblapack-dev \ - && apt-get install -yqq -t jessie-backports python-requests \ + && apt-get install -yqq -t jessie-backports python-requests libpq-dev \ && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ @@ -50,11 +49,9 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install airflow==$AIRFLOW_VERSION \ - && pip install airflow[celery]==$AIRFLOW_VERSION \ - && pip install airflow[mysql]==$AIRFLOW_VERSION \ - && pip install airflow[hive]==$AIRFLOW_VERSION \ - && apt-get remove --purge -yqq build-essential python-pip python-dev libmysqlclient-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev \ + && pip install psycopg2 \ + && pip install airflow[celery,postgresql,hive]==$AIRFLOW_VERSION \ + && apt-get remove --purge -yqq build-essential python-pip python-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ @@ -67,8 +64,7 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou ADD script/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh ADD config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg -RUN \ - chown -R airflow: ${AIRFLOW_HOME} \ +RUN chown -R airflow: ${AIRFLOW_HOME} \ && chmod +x ${AIRFLOW_HOME}/entrypoint.sh EXPOSE 8080 5555 8793 diff --git a/README.md b/README.md index 1e8dc57d41..ca7b0d8c3c 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,15 @@ This repository contains **Dockerfile** of [airflow](https://github.com/apache/i ## Informations -* Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) +* Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [RabbitMQ](https://hub.docker.com/_/rabbitmq/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/airflow) ## Installation +Pull the image from the Docker repository. + docker pull puckel/docker-airflow ## Build @@ -25,11 +27,11 @@ For example, if you need to install [Extra Packages](http://pythonhosted.org/air ## Usage -Start the stack (mysql, rabbitmq, airflow-webserver, airflow-scheduler airflow-flower & airflow-worker) : +Start the stack (postgres, rabbitmq, airflow-webserver, airflow-scheduler airflow-flower & airflow-worker): docker-compose up -d -If you want to use Ad hoc query, make sure you've configured connections : +If you want to use Ad hoc query, make sure you've configured connections: Go to Admin -> Connections and Edit "mysql_default" set this values (equivalent to values in airflow.cfg/docker-compose.yml) : - Host : mysql - Schema : airflow @@ -44,13 +46,20 @@ Check [Airflow Documentation](http://pythonhosted.org/airflow/) - Flower: [localhost:5555](http://localhost:5555/) - RabbitMQ: [localhost:15672](http://localhost:15672/) -(with boot2docker, use: open http://$(boot2docker ip):8080) - +When using OSX with boot2docker, use: open http://$(boot2docker ip):8080 ## Run the test "tutorial" docker exec dockerairflow_webserver_1 airflow backfill tutorial -s 2015-05-01 -e 2015-06-01 +## Scale the number of workers + +Easy scaling using docker-compose: + + docker-compose scale worker=5 + +This can be used to scale to a multi node setup using docker swarm. + # Wanna help? Fork, improve and PR. ;-) diff --git a/config/airflow.cfg b/config/airflow.cfg index 37594c147e..2fcbd5468a 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -27,7 +27,7 @@ executor = CeleryExecutor # The SqlAlchemy connection string to the metadata database. # SqlAlchemy supports many different database engine, more information # their website -sql_alchemy_conn = mysql://airflow:airflow@mysql/airflow +sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow # The SqlAlchemy pool size is the maximum number of database connections # in the pool. diff --git a/docker-compose.yml b/docker-compose.yml index 00696bca56..ab9140db6f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,9 @@ -mysql: - image: mysql - restart: always - ports: - - "3306:3306" +postgres: + image: postgres environment: - - MYSQL_RANDOM_ROOT_PASSWORD=true - - MYSQL_USER=airflow - - MYSQL_PASSWORD=airflow - - MYSQL_DATABASE=airflow + - POSTGRES_USER=airflow + - POSTGRES_PASSWORD=airflow + - POSTGRES_DB=airflow rabbitmq: image: rabbitmq:3-management @@ -21,7 +17,7 @@ rabbitmq: - RABBITMQ_DEFAULT_VHOST=airflow webserver: - image: puckel/docker-airflow + build: . restart: always # volumes: # - /localpath/to/dags:/usr/local/airflow/dags @@ -30,14 +26,14 @@ webserver: ports: - "8080:8080" links: - - mysql:mysql + - postgres:postgres - rabbitmq:rabbitmq - worker:worker - scheduler:scheduler command: webserver flower: - image: puckel/docker-airflow + build: . restart: always environment: - AIRFLOW_HOME=/usr/local/airflow @@ -48,27 +44,25 @@ flower: command: flower scheduler: - image: puckel/docker-airflow + build: . restart: always # volumes: # - /localpath/to/dags:/usr/local/airflow/dags environment: - AIRFLOW_HOME=/usr/local/airflow links: - - mysql:mysql + - postgres:postgres - rabbitmq:rabbitmq command: scheduler -n 5 worker: - image: puckel/docker-airflow + build: . restart: always # volumes: # - /localpath/to/dags:/usr/local/airflow/dags environment: - AIRFLOW_HOME=/usr/local/airflow - ports: - - "8793:8793" links: - - mysql:mysql + - postgres:postgres - rabbitmq:rabbitmq command: worker diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 30aee2f263..19d75abb5d 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -2,8 +2,8 @@ CMD="airflow" TRY_LOOP="10" -MYSQL_HOST="mysql" -MYSQL_PORT="3306" +POSTGRES_HOST="postgres" +POSTGRES_PORT="5432" RABBITMQ_HOST="rabbitmq" RABBITMQ_CREDS="airflow:airflow" FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY") @@ -28,13 +28,13 @@ fi # wait for DB if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then i=0 - while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do + while ! nc $POSTGRES_HOST $POSTGRES_PORT >/dev/null 2>&1 < /dev/null; do i=`expr $i + 1` if [ $i -ge $TRY_LOOP ]; then - echo "$(date) - ${MYSQL_HOST}:${MYSQL_PORT} still not reachable, giving up" + echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up" exit 1 fi - echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$TRY_LOOP" + echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP" sleep 5 done if [ "$1" = "webserver" ]; then From 9470f4fa0d5b730b77751490f3e159e47ff9270c Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 14 Jun 2016 16:26:42 +0200 Subject: [PATCH 054/163] Bump Airflow version to 1.7.1.3 --- Dockerfile | 4 ++-- docker-compose.yml | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index cdd41f7808..39c65967ae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.7.1.2 +# VERSION 1.7.1.3 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.7.1.2 +ARG AIRFLOW_VERSION=1.7.1.3 ENV AIRFLOW_HOME /usr/local/airflow # Define en_US. diff --git a/docker-compose.yml b/docker-compose.yml index ab9140db6f..82ae5a9e60 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,7 +17,7 @@ rabbitmq: - RABBITMQ_DEFAULT_VHOST=airflow webserver: - build: . + image: puckel/docker-airflow:1.7.1.3 restart: always # volumes: # - /localpath/to/dags:/usr/local/airflow/dags @@ -33,7 +33,7 @@ webserver: command: webserver flower: - build: . + image: puckel/docker-airflow:1.7.1.3 restart: always environment: - AIRFLOW_HOME=/usr/local/airflow @@ -44,7 +44,7 @@ flower: command: flower scheduler: - build: . + image: puckel/docker-airflow:1.7.1.3 restart: always # volumes: # - /localpath/to/dags:/usr/local/airflow/dags @@ -56,7 +56,7 @@ scheduler: command: scheduler -n 5 worker: - build: . + image: puckel/docker-airflow:1.7.1.3 restart: always # volumes: # - /localpath/to/dags:/usr/local/airflow/dags From 4c558ba4a32e33ef2ba81ed0c13032e1e13048d5 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 14 Jun 2016 16:38:38 +0200 Subject: [PATCH 055/163] Update circleci version --- circle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/circle.yml b/circle.yml index be85db50b8..227db63519 100644 --- a/circle.yml +++ b/circle.yml @@ -12,4 +12,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.7.1.2' + - docker run puckel/docker-airflow version |grep '1.7.1.3' From 3ddf7a8d828d507131da254f5311df73212239e7 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 21 Jun 2016 14:36:54 +0200 Subject: [PATCH 056/163] Clean Dockerfile --- Dockerfile | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index 39c65967ae..ea9fc837cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,22 +23,26 @@ ENV LC_CTYPE en_US.UTF-8 ENV LC_MESSAGES en_US.UTF-8 ENV LC_ALL en_US.UTF-8 -RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sources.list.d/backports.list \ +RUN set -ex \ + && buildDeps=' \ + python-pip \ + python-dev \ + libkrb5-dev \ + libsasl2-dev \ + libssl-dev \ + libffi-dev \ + build-essential \ + libblas-dev \ + liblapack-dev \ + ' \ + && echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sources.list.d/backports.list \ && apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ - apt-utils\ - netcat \ - curl \ - python-pip \ - python-dev \ - libkrb5-dev \ - libsasl2-dev \ - libssl-dev \ - libffi-dev \ - build-essential \ - locales \ - libblas-dev \ - liblapack-dev \ + $buildDeps \ + apt-utils \ + curl \ + netcat \ + locales \ && apt-get install -yqq -t jessie-backports python-requests libpq-dev \ && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ && locale-gen \ @@ -51,18 +55,18 @@ RUN echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sou && pip install pyasn1 \ && pip install psycopg2 \ && pip install airflow[celery,postgresql,hive]==$AIRFLOW_VERSION \ - && apt-get remove --purge -yqq build-essential python-pip python-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev \ + && apt-get remove --purge -yqq $buildDeps libpq-dev \ && apt-get clean \ && rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* \ - /usr/share/man \ - /usr/share/doc \ - /usr/share/doc-base + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* \ + /usr/share/man \ + /usr/share/doc \ + /usr/share/doc-base -ADD script/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh -ADD config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg +COPY script/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh +COPY config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg RUN chown -R airflow: ${AIRFLOW_HOME} \ && chmod +x ${AIRFLOW_HOME}/entrypoint.sh From 9171280f2e83998fe28efed19c83467b32a73ce6 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DALIDO Date: Tue, 19 Jul 2016 12:07:13 +0200 Subject: [PATCH 057/163] compose: bump to v2 file --- docker-compose.yml | 113 ++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 63 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 82ae5a9e60..f39647891a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,68 +1,55 @@ -postgres: - image: postgres - environment: - - POSTGRES_USER=airflow - - POSTGRES_PASSWORD=airflow - - POSTGRES_DB=airflow +version: '2' +services: + postgres: + image: postgres + environment: + - POSTGRES_USER=airflow + - POSTGRES_PASSWORD=airflow + - POSTGRES_DB=airflow -rabbitmq: - image: rabbitmq:3-management - restart: always - ports: - - "15672:15672" - - "5672:5672" - environment: - - RABBITMQ_DEFAULT_USER=airflow - - RABBITMQ_DEFAULT_PASS=airflow - - RABBITMQ_DEFAULT_VHOST=airflow + rabbitmq: + image: rabbitmq:3-management + restart: always + ports: + - "15672:15672" + - "5672:5672" + environment: + - RABBITMQ_DEFAULT_USER=airflow + - RABBITMQ_DEFAULT_PASS=airflow + - RABBITMQ_DEFAULT_VHOST=airflow -webserver: - image: puckel/docker-airflow:1.7.1.3 - restart: always - # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags - environment: - - AIRFLOW_HOME=/usr/local/airflow - ports: - - "8080:8080" - links: - - postgres:postgres - - rabbitmq:rabbitmq - - worker:worker - - scheduler:scheduler - command: webserver + webserver: + image: puckel/docker-airflow:1.7.1.3 + restart: always + environment: + - AIRFLOW_HOME=/usr/local/airflow + ports: + - "8080:8080" + command: webserver -flower: - image: puckel/docker-airflow:1.7.1.3 - restart: always - environment: - - AIRFLOW_HOME=/usr/local/airflow - ports: - - "5555:5555" - links: - - rabbitmq:rabbitmq - command: flower + flower: + image: puckel/docker-airflow:1.7.1.3 + restart: always + environment: + - AIRFLOW_HOME=/usr/local/airflow + ports: + - "5555:5555" + command: flower -scheduler: - image: puckel/docker-airflow:1.7.1.3 - restart: always - # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags - environment: - - AIRFLOW_HOME=/usr/local/airflow - links: - - postgres:postgres - - rabbitmq:rabbitmq - command: scheduler -n 5 + scheduler: + image: puckel/docker-airflow:1.7.1.3 + restart: always + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags + environment: + - AIRFLOW_HOME=/usr/local/airflow + command: scheduler -n 5 -worker: - image: puckel/docker-airflow:1.7.1.3 - restart: always - # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags - environment: - - AIRFLOW_HOME=/usr/local/airflow - links: - - postgres:postgres - - rabbitmq:rabbitmq - command: worker + worker: + image: puckel/docker-airflow:1.7.1.3 + restart: always + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags + environment: + - AIRFLOW_HOME=/usr/local/airflow + command: worker From bdd1b70aceb5b87ce308c02b42c3a80aab44058d Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 26 Jul 2016 10:24:58 +0200 Subject: [PATCH 058/163] Add Links section --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ca7b0d8c3c..a1c0de9c8e 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,10 @@ Easy scaling using docker-compose: This can be used to scale to a multi node setup using docker swarm. +## Links + + - Airflow on Kubernetes [kube-airflow](https://github.com/mumoshu/kube-airflow) + # Wanna help? Fork, improve and PR. ;-) From 6f47c91b184d44cbf1816134b6f058486b13e221 Mon Sep 17 00:00:00 2001 From: hadsed-genapsys Date: Wed, 27 Jul 2016 10:05:23 -0700 Subject: [PATCH 059/163] Update docker-compose.yml for webserver The webserver service also needs the DAGs folder mapped into its container, otherwise it will show that there is a DAG but with none of the other features available in the UI for that DAG. The new additions here leave the volumes commented out, but do indicate to a user that it is required. --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index f39647891a..61c68a2d4f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,8 @@ services: restart: always environment: - AIRFLOW_HOME=/usr/local/airflow + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags ports: - "8080:8080" command: webserver From f8010c8b79aaa62248ca85da2bf53c1d3fd4bae1 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 12 Aug 2016 15:27:27 +0200 Subject: [PATCH 060/163] Modify entrypoint.sh to select the type of executor. --- Dockerfile | 2 +- README.md | 24 ++++++--- ...e.yml => docker-compose-CeleryExecutor.yml | 41 ++++++++------ docker-compose-LocalExecutor.yml | 34 ++++++++++++ script/entrypoint.sh | 53 +++++++++++++------ 5 files changed, 115 insertions(+), 39 deletions(-) rename docker-compose.yml => docker-compose-CeleryExecutor.yml (67%) create mode 100644 docker-compose-LocalExecutor.yml diff --git a/Dockerfile b/Dockerfile index ea9fc837cc..d81a3bfb45 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.7.1.3 +# VERSION 1.7.1.3-2 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow diff --git a/README.md b/README.md index a1c0de9c8e..35f464b49f 100644 --- a/README.md +++ b/README.md @@ -27,9 +27,25 @@ For example, if you need to install [Extra Packages](http://pythonhosted.org/air ## Usage -Start the stack (postgres, rabbitmq, airflow-webserver, airflow-scheduler airflow-flower & airflow-worker): +By default, docker-airflow run Airflow with **SequentialExecutor** : - docker-compose up -d + docker run -d -p 8080:8080 puckel/docker-airflow + +If you want to run other executor, you've to use the docker-compose.yml files provided in this repository. + +For **LocalExecutor** : + + docker-compose -f docker-compose-LocalExecutor.yml up -d + +For **CeleryExecutor** : + + docker-compose -f docker-compose-CeleryExecutor.yml up -d + +NB : If you don't want to have DAGs example loaded (default=True), you've to set the following environment variable : + +`LOAD_EX=n` + + docker run -d -p 8080:8080 -e LOAD_EX=n puckel/docker-airflow If you want to use Ad hoc query, make sure you've configured connections: Go to Admin -> Connections and Edit "mysql_default" set this values (equivalent to values in airflow.cfg/docker-compose.yml) : @@ -48,10 +64,6 @@ Check [Airflow Documentation](http://pythonhosted.org/airflow/) When using OSX with boot2docker, use: open http://$(boot2docker ip):8080 -## Run the test "tutorial" - - docker exec dockerairflow_webserver_1 airflow backfill tutorial -s 2015-05-01 -e 2015-06-01 - ## Scale the number of workers Easy scaling using docker-compose: diff --git a/docker-compose.yml b/docker-compose-CeleryExecutor.yml similarity index 67% rename from docker-compose.yml rename to docker-compose-CeleryExecutor.yml index 61c68a2d4f..a8ce0f3d7d 100644 --- a/docker-compose.yml +++ b/docker-compose-CeleryExecutor.yml @@ -1,12 +1,5 @@ version: '2' services: - postgres: - image: postgres - environment: - - POSTGRES_USER=airflow - - POSTGRES_PASSWORD=airflow - - POSTGRES_DB=airflow - rabbitmq: image: rabbitmq:3-management restart: always @@ -18,11 +11,22 @@ services: - RABBITMQ_DEFAULT_PASS=airflow - RABBITMQ_DEFAULT_VHOST=airflow + postgres: + image: postgres + environment: + - POSTGRES_USER=airflow + - POSTGRES_PASSWORD=airflow + - POSTGRES_DB=airflow + webserver: - image: puckel/docker-airflow:1.7.1.3 + image: puckel/docker-airflow:1.7.1.3-2 restart: always + depends_on: + - postgres + - rabbitmq environment: - - AIRFLOW_HOME=/usr/local/airflow + # - LOAD_EX=n + - EXECUTOR=Celery # volumes: # - /localpath/to/dags:/usr/local/airflow/dags ports: @@ -30,28 +34,35 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.7.1.3 + image: puckel/docker-airflow:1.7.1.3-2 restart: always + depends_on: + - rabbitmq environment: - - AIRFLOW_HOME=/usr/local/airflow + - EXECUTOR=Celery ports: - "5555:5555" command: flower scheduler: - image: puckel/docker-airflow:1.7.1.3 + image: puckel/docker-airflow:1.7.1.3-2 restart: always + depends_on: + - webserver # volumes: # - /localpath/to/dags:/usr/local/airflow/dags environment: - - AIRFLOW_HOME=/usr/local/airflow + # - LOAD_EX=n + - EXECUTOR=Celery command: scheduler -n 5 worker: - image: puckel/docker-airflow:1.7.1.3 + image: puckel/docker-airflow:1.7.1.3-2 restart: always + depends_on: + - scheduler # volumes: # - /localpath/to/dags:/usr/local/airflow/dags environment: - - AIRFLOW_HOME=/usr/local/airflow + - EXECUTOR=Celery command: worker diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml new file mode 100644 index 0000000000..d8b409bf92 --- /dev/null +++ b/docker-compose-LocalExecutor.yml @@ -0,0 +1,34 @@ +version: '2' +services: + postgres: + image: postgres + environment: + - POSTGRES_USER=airflow + - POSTGRES_PASSWORD=airflow + - POSTGRES_DB=airflow + + webserver: + image: puckel/docker-airflow:1.7.1.3-2 + restart: always + depends_on: + - postgres + environment: + # - LOAD_EX=n + - EXECUTOR=Local + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags + ports: + - "8080:8080" + command: webserver + + scheduler: + image: puckel/docker-airflow:1.7.1.3-2 + restart: always + depends_on: + - webserver + # volumes: + # - /localpath/to/dags:/usr/local/airflow/dags + environment: + # - LOAD_EX=n + - EXECUTOR=Local + command: scheduler -n 5 diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 19d75abb5d..53a2419d14 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash +AIRFLOW_HOME="/usr/local/airflow" CMD="airflow" TRY_LOOP="10" POSTGRES_HOST="postgres" @@ -8,28 +9,19 @@ RABBITMQ_HOST="rabbitmq" RABBITMQ_CREDS="airflow:airflow" FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY") -# Generate Fernet key -sed -i "s/{FERNET_KEY}/${FERNET_KEY}/" $AIRFLOW_HOME/airflow.cfg - -# wait for rabbitmq -if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then - j=0 - while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do - j=`expr $j + 1` - if [ $j -ge $TRY_LOOP ]; then - echo "$(date) - $RABBITMQ_HOST still not reachable, giving up" - exit 1 - fi - echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP" - sleep 5 - done +# Load DAGs exemples (default: Yes) +if [ "x$LOAD_EX" = "xn" ]; then + sed -i "s/load_examples = True/load_examples = False/" "$AIRFLOW_HOME"/airflow.cfg fi +# Generate Fernet key +sed -i "s|\$FERNET_KEY|$FERNET_KEY|" "$AIRFLOW_HOME"/airflow.cfg + # wait for DB if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then i=0 while ! nc $POSTGRES_HOST $POSTGRES_PORT >/dev/null 2>&1 < /dev/null; do - i=`expr $i + 1` + i=$((i+1)) if [ $i -ge $TRY_LOOP ]; then echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up" exit 1 @@ -44,4 +36,31 @@ if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; the sleep 5 fi -exec $CMD "$@" +# If we use docker-compose, we use Celery (rabbitmq container). +if [ "x$EXECUTOR" = "xCelery" ] +then +# wait for rabbitmq + if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then + j=0 + while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do + j=$((j+1)) + if [ $j -ge $TRY_LOOP ]; then + echo "$(date) - $RABBITMQ_HOST still not reachable, giving up" + exit 1 + fi + echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP" + sleep 5 + done + fi + exec $CMD "$@" +elif [ "x$EXECUTOR" = "xLocal" ] +then + sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg + exec $CMD "$@" +else + sed -i "s/executor = CeleryExecutor/executor = SequentialExecutor/" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = sqlite:////usr/local/airflow/airflow.db#" "$AIRFLOW_HOME"/airflow.cfg + echo "Initialize database..." + $CMD initdb + exec $CMD webserver +fi From 9839fca2c8c9bdb8f9231d2052adf6da44118b93 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 12 Aug 2016 16:57:49 +0200 Subject: [PATCH 061/163] =?UTF-8?q?Add=20test=20case=20for=20cmd=20version?= =?UTF-8?q?=20(circleCi=20test=C3=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/entrypoint.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 53a2419d14..41319cea57 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -58,6 +58,9 @@ then sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg exec $CMD "$@" else + if [ "$1" = "version" ]; then + exec $CMD version + fi sed -i "s/executor = CeleryExecutor/executor = SequentialExecutor/" "$AIRFLOW_HOME"/airflow.cfg sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = sqlite:////usr/local/airflow/airflow.db#" "$AIRFLOW_HOME"/airflow.cfg echo "Initialize database..." From c179c4f2dcdcea26e5468353b58d58eef0b6122e Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 22 Aug 2016 11:47:04 +0200 Subject: [PATCH 062/163] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 35f464b49f..a35eac9ee8 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # docker-airflow -[![CircleCI](https://img.shields.io/circleci/project/puckel/docker-airflow.svg?maxAge=2592000)](https://circleci.com/gh/puckel/docker-airflow) +[![CircleCI branch](https://img.shields.io/circleci/project/puckel/docker-airflow/master.svg?maxAge=2592000)](https://circleci.com/gh/puckel/docker-airflow/tree/master) [![Docker Hub](https://img.shields.io/badge/docker-ready-blue.svg)](https://hub.docker.com/r/puckel/docker-airflow/) [![Docker Pulls](https://img.shields.io/docker/pulls/puckel/docker-airflow.svg?maxAge=2592000)]() [![Docker Stars](https://img.shields.io/docker/stars/puckel/docker-airflow.svg?maxAge=2592000)]() From 129ffd6dbbf23b6a588bdf9df3db6244b54d9d53 Mon Sep 17 00:00:00 2001 From: "antonin.ribeaud" Date: Wed, 5 Oct 2016 16:54:37 +0200 Subject: [PATCH 063/163] Adding Cython to the Dockerfile install list --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index d81a3bfb45..3686f99397 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,6 +48,7 @@ RUN set -ex \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ + && pip install Cython \ && pip install pytz==2015.7 \ && pip install cryptography \ && pip install pyOpenSSL \ From e4fd5a69ab59e5489604519ce4532d3c8dbe9984 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 14 Oct 2016 16:13:22 +0200 Subject: [PATCH 064/163] Add pip install at run for custom python package without rebuild --- Dockerfile | 4 ++-- README.md | 6 ++++++ config/airflow.cfg | 3 +-- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 7 ++++--- script/entrypoint.sh | 5 +++++ 6 files changed, 22 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3686f99397..56ae07520e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.7.1.3-2 +# VERSION 1.7.1.3-3 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow @@ -25,7 +25,6 @@ ENV LC_ALL en_US.UTF-8 RUN set -ex \ && buildDeps=' \ - python-pip \ python-dev \ libkrb5-dev \ libsasl2-dev \ @@ -39,6 +38,7 @@ RUN set -ex \ && apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ $buildDeps \ + python-pip \ apt-utils \ curl \ netcat \ diff --git a/README.md b/README.md index a35eac9ee8..d7af6edb39 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,12 @@ Go to Admin -> Connections and Edit "mysql_default" set this values (equivalent Check [Airflow Documentation](http://pythonhosted.org/airflow/) +## Install custom python package + +- Create a file "requirements.txt" with the dedired python modules +- Mount this file as a volume `-v $(pwd)/requirements.txt:/requirements.txt` +- The entrypoint.sh script execute the pip install command (with --user option) + ## UI Links - Airflow: [localhost:8080](http://localhost:8080/) diff --git a/config/airflow.cfg b/config/airflow.cfg index 2fcbd5468a..9705a0dfb3 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -17,8 +17,7 @@ remote_base_log_folder = remote_log_conn_id = # Use server-side encryption for logs stored in S3 encrypt_s3_logs = False -# deprecated option for remote log storage, use remote_base_log_folder instead! -# s3_log_folder = +# remote_base_log_folder = # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index a8ce0f3d7d..de2ee0ab99 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -19,7 +19,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-2 + image: puckel/docker-airflow:1.7.1.3-3 restart: always depends_on: - postgres @@ -34,7 +34,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.7.1.3-2 + image: puckel/docker-airflow:1.7.1.3-3 restart: always depends_on: - rabbitmq @@ -45,7 +45,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.7.1.3-2 + image: puckel/docker-airflow:1.7.1.3-3 restart: always depends_on: - webserver @@ -57,7 +57,7 @@ services: command: scheduler -n 5 worker: - image: puckel/docker-airflow:1.7.1.3-2 + image: puckel/docker-airflow:1.7.1.3-3 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index d8b409bf92..24efb688f2 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-2 + image: puckel/docker-airflow:1.7.1.3-3 restart: always depends_on: - postgres @@ -22,12 +22,13 @@ services: command: webserver scheduler: - image: puckel/docker-airflow:1.7.1.3-2 + image: puckel/docker-airflow:1.7.1.3-3 restart: always depends_on: - webserver # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags + # - ./requirements.txt:/requirements.txt:ro + # - /localpath/to/dags:/usr/local/airflow/dags environment: # - LOAD_EX=n - EXECUTOR=Local diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 41319cea57..39b177306a 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -14,6 +14,11 @@ if [ "x$LOAD_EX" = "xn" ]; then sed -i "s/load_examples = True/load_examples = False/" "$AIRFLOW_HOME"/airflow.cfg fi +# Install custome python package if requirements.txt is present +if [ -e "/requirements.txt" ]; then + $(which pip) install --user -r /requirements.txt +fi + # Generate Fernet key sed -i "s|\$FERNET_KEY|$FERNET_KEY|" "$AIRFLOW_HOME"/airflow.cfg From 5477ea72f56bb511338d1445754c9715e11adfcc Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 17 Oct 2016 16:06:56 +0200 Subject: [PATCH 065/163] Fix issue #39 --- README.md | 5 +++++ docker-compose-LocalExecutor.yml | 2 ++ script/entrypoint.sh | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d7af6edb39..16b9cef475 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,13 @@ Go to Admin -> Connections and Edit "mysql_default" set this values (equivalent - Login : airflow - Password : airflow +For encrypted connection passwords (in Local or Celery Executor), you must have the same fernet_key. By default docker-airflow generates the fernet_key at startup, you have to set an environment variable in the docker-compose (ie: docker-compose-LocalExecutor.yml) file to set the same key accross containers. To generate a fernet_key : + + python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY" + Check [Airflow Documentation](http://pythonhosted.org/airflow/) + ## Install custom python package - Create a file "requirements.txt" with the dedired python modules diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 24efb688f2..ba3ed255b3 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -15,6 +15,7 @@ services: environment: # - LOAD_EX=n - EXECUTOR=Local + # - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA # volumes: # - /localpath/to/dags:/usr/local/airflow/dags ports: @@ -31,5 +32,6 @@ services: # - /localpath/to/dags:/usr/local/airflow/dags environment: # - LOAD_EX=n + # - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Local command: scheduler -n 5 diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 39b177306a..b54f479f06 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -7,7 +7,8 @@ POSTGRES_HOST="postgres" POSTGRES_PORT="5432" RABBITMQ_HOST="rabbitmq" RABBITMQ_CREDS="airflow:airflow" -FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY") +: ${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY")} +# FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY") # Load DAGs exemples (default: Yes) if [ "x$LOAD_EX" = "xn" ]; then From 52f60e3c69aac2bcac1831df3087a45bef994550 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 14 Nov 2016 11:53:45 +0100 Subject: [PATCH 066/163] Fix issue #48 --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 56ae07520e..1da2ac0b69 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,7 +55,8 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install psycopg2 \ - && pip install airflow[celery,postgresql,hive]==$AIRFLOW_VERSION \ + && pip install airflow[postgresql,hive]==$AIRFLOW_VERSION \ + && pip install celery==3.1.23 \ && apt-get remove --purge -yqq $buildDeps libpq-dev \ && apt-get clean \ && rm -rf \ From de3803dbec84837fe5bb69b29fedd9f61d4893d0 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 14 Nov 2016 11:54:48 +0100 Subject: [PATCH 067/163] Update airflow.cfg --- config/airflow.cfg | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/config/airflow.cfg b/config/airflow.cfg index 9705a0dfb3..1dcf3477eb 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -17,7 +17,8 @@ remote_base_log_folder = remote_log_conn_id = # Use server-side encryption for logs stored in S3 encrypt_s3_logs = False -# remote_base_log_folder = +# deprecated option for remote log storage, use remote_base_log_folder instead! +# s3_log_folder = # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor @@ -46,7 +47,11 @@ parallelism = 32 dag_concurrency = 16 # Are DAGs paused by default at creation -dags_are_paused_at_creation = False +dags_are_paused_at_creation = True + +# When not using pools, tasks are run in the "default pool", +# whose size is guided by this config element +non_pooled_task_slot_count = 128 # The maximum number of active DAG runs per DAG max_active_runs_per_dag = 16 @@ -68,9 +73,14 @@ donot_pickle = False # How long before timing out a python file import while filling the DagBag dagbag_import_timeout = 30 +[operators] +# The default owner assigned to each new operator, unless +# provided explicitly or passed via `default_args` +default_owner = Airflow + [webserver] # The base url of your website as airflow cannot guess what domain or -# cname you are using. This is use in automated emails that +# cname you are using. This is used in automated emails that # airflow sends to point links to the right web server base_url = http://localhost:8080 @@ -96,18 +106,19 @@ worker_class = sync # Expose the configuration file in the web server expose_config = true -# Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication +# Set to true to turn on authentication: +# http://pythonhosted.org/airflow/installation.html#web-authentication authenticate = False # Filter the list of dags by owner name (requires authentication to be enabled) filter_by_owner = False [email] -email_backend = airflow.utils.send_email_smtp +email_backend = airflow.utils.email.send_email_smtp [smtp] -# If you want airflow to send emails on retries, failure, and you want to -# the airflow.utils.send_email function, you have to configure an smtp +# If you want airflow to send emails on retries, failure, and you want to use +# the airflow.utils.email.send_email_smtp function, you have to configure an smtp # server here smtp_host = localhost smtp_starttls = True From ce463e112acbdbaa15930424840fc892481841bf Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 16 Nov 2016 13:17:11 +0100 Subject: [PATCH 068/163] Bump to v1.7.1.3-5 --- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1da2ac0b69..211b62a29f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.7.1.3-3 +# VERSION 1.7.1.3-5 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index de2ee0ab99..9ac845bbd7 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -19,7 +19,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-3 + image: puckel/docker-airflow:1.7.1.3-5 restart: always depends_on: - postgres @@ -34,7 +34,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.7.1.3-3 + image: puckel/docker-airflow:1.7.1.3-5 restart: always depends_on: - rabbitmq @@ -45,7 +45,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.7.1.3-3 + image: puckel/docker-airflow:1.7.1.3-5 restart: always depends_on: - webserver @@ -57,7 +57,7 @@ services: command: scheduler -n 5 worker: - image: puckel/docker-airflow:1.7.1.3-3 + image: puckel/docker-airflow:1.7.1.3-5 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index ba3ed255b3..bc09491591 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-3 + image: puckel/docker-airflow:1.7.1.3-5 restart: always depends_on: - postgres @@ -23,7 +23,7 @@ services: command: webserver scheduler: - image: puckel/docker-airflow:1.7.1.3-3 + image: puckel/docker-airflow:1.7.1.3-5 restart: always depends_on: - webserver From 26c2dcaa812d36e92ce2690460ddfca0e585e789 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 16 Nov 2016 13:17:57 +0100 Subject: [PATCH 069/163] Fix issue #45 and #49 --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 211b62a29f..97fc335669 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,8 +55,9 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install psycopg2 \ - && pip install airflow[postgresql,hive]==$AIRFLOW_VERSION \ + && pip install pandas==0.18.1 \ && pip install celery==3.1.23 \ + && pip install airflow[celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ && apt-get remove --purge -yqq $buildDeps libpq-dev \ && apt-get clean \ && rm -rf \ From 7a26f7d801027e15c4dcf0234a8efecbff5025b0 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 16 Nov 2016 13:18:43 +0100 Subject: [PATCH 070/163] Set external images version --- docker-compose-CeleryExecutor.yml | 4 ++-- docker-compose-LocalExecutor.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 9ac845bbd7..59810d2eb5 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -1,7 +1,7 @@ version: '2' services: rabbitmq: - image: rabbitmq:3-management + image: rabbitmq:3.6-management restart: always ports: - "15672:15672" @@ -12,7 +12,7 @@ services: - RABBITMQ_DEFAULT_VHOST=airflow postgres: - image: postgres + image: postgres:9.6 environment: - POSTGRES_USER=airflow - POSTGRES_PASSWORD=airflow diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index bc09491591..321fb55af7 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -1,7 +1,7 @@ version: '2' services: postgres: - image: postgres + image: postgres:9.6 environment: - POSTGRES_USER=airflow - POSTGRES_PASSWORD=airflow From fd55a69a386da90a3a09e4adb282f183cdb915ad Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 16 Nov 2016 15:22:32 +0100 Subject: [PATCH 071/163] Add FERNET_KEY in docker-compose files --- docker-compose-CeleryExecutor.yml | 3 +++ docker-compose-LocalExecutor.yml | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 59810d2eb5..ac9eaab8f3 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -26,6 +26,7 @@ services: - rabbitmq environment: # - LOAD_EX=n + - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery # volumes: # - /localpath/to/dags:/usr/local/airflow/dags @@ -53,6 +54,7 @@ services: # - /localpath/to/dags:/usr/local/airflow/dags environment: # - LOAD_EX=n + - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery command: scheduler -n 5 @@ -64,5 +66,6 @@ services: # volumes: # - /localpath/to/dags:/usr/local/airflow/dags environment: + - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery command: worker diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 321fb55af7..2540aab5fd 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -15,7 +15,7 @@ services: environment: # - LOAD_EX=n - EXECUTOR=Local - # - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA # volumes: # - /localpath/to/dags:/usr/local/airflow/dags ports: @@ -32,6 +32,6 @@ services: # - /localpath/to/dags:/usr/local/airflow/dags environment: # - LOAD_EX=n - # - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Local command: scheduler -n 5 From fdf60ee8aecab13d333e503f53989433aea80099 Mon Sep 17 00:00:00 2001 From: Leonid Evdokimov Date: Tue, 6 Dec 2016 17:05:01 +0300 Subject: [PATCH 072/163] Speedup restart using `nc -z` (scan mode) Successful(!) connection to postgresql is hung for 1 minute without `-z`. --- script/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index b54f479f06..6dd25d184b 100644 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -26,7 +26,7 @@ sed -i "s|\$FERNET_KEY|$FERNET_KEY|" "$AIRFLOW_HOME"/airflow.cfg # wait for DB if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then i=0 - while ! nc $POSTGRES_HOST $POSTGRES_PORT >/dev/null 2>&1 < /dev/null; do + while ! nc -z $POSTGRES_HOST $POSTGRES_PORT >/dev/null 2>&1 < /dev/null; do i=$((i+1)) if [ $i -ge $TRY_LOOP ]; then echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up" From 7a9ac51b4307e90ed43d1755cddbc02381e7b751 Mon Sep 17 00:00:00 2001 From: Leonid Evdokimov Date: Tue, 6 Dec 2016 17:06:38 +0300 Subject: [PATCH 073/163] Fix stale link & point pythonhosted.org links to https --- README.md | 4 ++-- config/airflow.cfg | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 16b9cef475..f42eab17dc 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Pull the image from the Docker repository. ## Build -For example, if you need to install [Extra Packages](http://pythonhosted.org/airflow/installation.html#extra-package), edit the Dockerfile and than build-it. +For example, if you need to install [Extra Packages](https://pythonhosted.org/airflow/installation.html#extra-package), edit the Dockerfile and than build-it. docker build --rm -t puckel/docker-airflow . @@ -58,7 +58,7 @@ For encrypted connection passwords (in Local or Celery Executor), you must have python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY" -Check [Airflow Documentation](http://pythonhosted.org/airflow/) +Check [Airflow Documentation](https://pythonhosted.org/airflow/) ## Install custom python package diff --git a/config/airflow.cfg b/config/airflow.cfg index 1dcf3477eb..f1c63d1414 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -107,7 +107,7 @@ worker_class = sync expose_config = true # Set to true to turn on authentication: -# http://pythonhosted.org/airflow/installation.html#web-authentication +# https://pythonhosted.org/airflow/security.html#web-authentication authenticate = False # Filter the list of dags by owner name (requires authentication to be enabled) From 3527b8e53a439ed0b21ba6d688a77a213c2d2f2a Mon Sep 17 00:00:00 2001 From: Leonid Evdokimov Date: Thu, 8 Dec 2016 14:57:35 +0300 Subject: [PATCH 074/163] Fix typo --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 97fc335669..2f7057b57b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # VERSION 1.7.1.3-5 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container -# BUILD: docker build --rm -t puckel/docker-airflow +# BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow FROM debian:jessie From e196c85995365d76ed4ca2e758c779da07ffa940 Mon Sep 17 00:00:00 2001 From: Leonid Evdokimov Date: Thu, 8 Dec 2016 16:38:52 +0300 Subject: [PATCH 075/163] Make entrypoint.sh root-owned There is no obvious reason to make entrypoint.sh owned by airflow. --- Dockerfile | 7 +++---- script/entrypoint.sh | 0 2 files changed, 3 insertions(+), 4 deletions(-) mode change 100644 => 100755 script/entrypoint.sh diff --git a/Dockerfile b/Dockerfile index 2f7057b57b..c76761596a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,14 +68,13 @@ RUN set -ex \ /usr/share/doc \ /usr/share/doc-base -COPY script/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh +COPY script/entrypoint.sh /entrypoint.sh COPY config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg -RUN chown -R airflow: ${AIRFLOW_HOME} \ - && chmod +x ${AIRFLOW_HOME}/entrypoint.sh +RUN chown -R airflow: ${AIRFLOW_HOME} EXPOSE 8080 5555 8793 USER airflow WORKDIR ${AIRFLOW_HOME} -ENTRYPOINT ["./entrypoint.sh"] +ENTRYPOINT ["/entrypoint.sh"] diff --git a/script/entrypoint.sh b/script/entrypoint.sh old mode 100644 new mode 100755 From e8de28dfabd085aa99922f3c4ee29bb09873d19a Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 2 Feb 2017 17:17:30 +0100 Subject: [PATCH 076/163] Move from rabbitmq to Redis --- Dockerfile | 15 +++++++-------- README.md | 7 +++---- config/airflow.cfg | 4 ++-- docker-compose-CeleryExecutor.yml | 24 ++++++++---------------- docker-compose-LocalExecutor.yml | 4 ++-- script/entrypoint.sh | 16 +++++++--------- 6 files changed, 29 insertions(+), 41 deletions(-) diff --git a/Dockerfile b/Dockerfile index c76761596a..019455b608 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.7.1.3-5 +# VERSION 1.7.1.3-6 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -33,6 +33,7 @@ RUN set -ex \ build-essential \ libblas-dev \ liblapack-dev \ + libpq-dev \ ' \ && echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sources.list.d/backports.list \ && apt-get update -yqq \ @@ -43,22 +44,20 @@ RUN set -ex \ curl \ netcat \ locales \ - && apt-get install -yqq -t jessie-backports python-requests libpq-dev \ + && apt-get install -yqq -t jessie-backports python-requests \ && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ + && python -m pip install -U pip \ && pip install Cython \ && pip install pytz==2015.7 \ - && pip install cryptography \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install psycopg2 \ - && pip install pandas==0.18.1 \ - && pip install celery==3.1.23 \ - && pip install airflow[celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ - && apt-get remove --purge -yqq $buildDeps libpq-dev \ + && pip install airflow[crypto,celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ + && pip install celery[redis]==3.1.17 \ + && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ diff --git a/README.md b/README.md index f42eab17dc..0a33efe611 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This repository contains **Dockerfile** of [airflow](https://github.com/apache/i ## Informations -* Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [RabbitMQ](https://hub.docker.com/_/rabbitmq/) as queue +* Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/airflow) @@ -48,8 +48,8 @@ NB : If you don't want to have DAGs example loaded (default=True), you've to set docker run -d -p 8080:8080 -e LOAD_EX=n puckel/docker-airflow If you want to use Ad hoc query, make sure you've configured connections: -Go to Admin -> Connections and Edit "mysql_default" set this values (equivalent to values in airflow.cfg/docker-compose.yml) : -- Host : mysql +Go to Admin -> Connections and Edit "postgres_default" set this values (equivalent to values in airflow.cfg/docker-compose*.yml) : +- Host : postgres - Schema : airflow - Login : airflow - Password : airflow @@ -71,7 +71,6 @@ Check [Airflow Documentation](https://pythonhosted.org/airflow/) - Airflow: [localhost:8080](http://localhost:8080/) - Flower: [localhost:5555](http://localhost:5555/) -- RabbitMQ: [localhost:15672](http://localhost:15672/) When using OSX with boot2docker, use: open http://$(boot2docker ip):8080 diff --git a/config/airflow.cfg b/config/airflow.cfg index f1c63d1414..1b7dbd028c 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -151,10 +151,10 @@ worker_log_server_port = 8793 # The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally # a sqlalchemy database. Refer to the Celery documentation for more # information. -broker_url = amqp://airflow:airflow@rabbitmq:5672/airflow +broker_url = redis://redis:6379/1 # Another key Celery setting -celery_result_backend = amqp://airflow:airflow@rabbitmq:5672/airflow +celery_result_backend = redis://redis:6379/1 # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start # it `airflow flower`. This defines the port that Celery Flower runs on diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index ac9eaab8f3..095f275781 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -1,15 +1,7 @@ version: '2' services: - rabbitmq: - image: rabbitmq:3.6-management - restart: always - ports: - - "15672:15672" - - "5672:5672" - environment: - - RABBITMQ_DEFAULT_USER=airflow - - RABBITMQ_DEFAULT_PASS=airflow - - RABBITMQ_DEFAULT_VHOST=airflow + redis: + image: 'redis:3.2.7' postgres: image: postgres:9.6 @@ -19,11 +11,11 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-5 + image: puckel/docker-airflow:1.7.1.3-6 restart: always depends_on: - postgres - - rabbitmq + - redis environment: # - LOAD_EX=n - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA @@ -35,10 +27,10 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.7.1.3-5 + image: puckel/docker-airflow:1.7.1.3-6 restart: always depends_on: - - rabbitmq + - redis environment: - EXECUTOR=Celery ports: @@ -46,7 +38,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.7.1.3-5 + image: puckel/docker-airflow:1.7.1.3-6 restart: always depends_on: - webserver @@ -59,7 +51,7 @@ services: command: scheduler -n 5 worker: - image: puckel/docker-airflow:1.7.1.3-5 + image: puckel/docker-airflow:1.7.1.3-6 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 2540aab5fd..7fdfda9eee 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-5 + image: puckel/docker-airflow:1.7.1.3-6 restart: always depends_on: - postgres @@ -23,7 +23,7 @@ services: command: webserver scheduler: - image: puckel/docker-airflow:1.7.1.3-5 + image: puckel/docker-airflow:1.7.1.3-6 restart: always depends_on: - webserver diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 6dd25d184b..c32e96a53b 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -5,10 +5,9 @@ CMD="airflow" TRY_LOOP="10" POSTGRES_HOST="postgres" POSTGRES_PORT="5432" -RABBITMQ_HOST="rabbitmq" -RABBITMQ_CREDS="airflow:airflow" +REDIS_HOST="redis" +REDIS_PORT="6379" : ${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY")} -# FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY") # Load DAGs exemples (default: Yes) if [ "x$LOAD_EX" = "xn" ]; then @@ -33,7 +32,7 @@ if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; the exit 1 fi echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP" - sleep 5 + sleep 10 done if [ "$1" = "webserver" ]; then echo "Initialize database..." @@ -42,19 +41,18 @@ if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; the sleep 5 fi -# If we use docker-compose, we use Celery (rabbitmq container). +# If we use docker-compose, we use Celery. if [ "x$EXECUTOR" = "xCelery" ] then -# wait for rabbitmq if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then j=0 - while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do + while ! nc -z $REDIS_HOST $REDIS_PORT >/dev/null 2>&1 < /dev/null; do j=$((j+1)) if [ $j -ge $TRY_LOOP ]; then - echo "$(date) - $RABBITMQ_HOST still not reachable, giving up" + echo "$(date) - $REDIS_HOST still not reachable, giving up" exit 1 fi - echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP" + echo "$(date) - waiting for Redis... $j/$TRY_LOOP" sleep 5 done fi From 98f43348c3143a4f408e73a179b51f4989747ff1 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 2 Feb 2017 17:29:20 +0100 Subject: [PATCH 077/163] Bump to 1.7.1.3-7 --- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 019455b608..f30c61c0c7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.7.1.3-6 +# VERSION 1.7.1.3-7 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 095f275781..16079a34c2 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,7 +11,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-6 + image: puckel/docker-airflow:1.7.1.3-7 restart: always depends_on: - postgres @@ -27,7 +27,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.7.1.3-6 + image: puckel/docker-airflow:1.7.1.3-7 restart: always depends_on: - redis @@ -38,7 +38,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.7.1.3-6 + image: puckel/docker-airflow:1.7.1.3-7 restart: always depends_on: - webserver @@ -51,7 +51,7 @@ services: command: scheduler -n 5 worker: - image: puckel/docker-airflow:1.7.1.3-6 + image: puckel/docker-airflow:1.7.1.3-7 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 7fdfda9eee..fc3ea8f58c 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-6 + image: puckel/docker-airflow:1.7.1.3-7 restart: always depends_on: - postgres @@ -23,7 +23,7 @@ services: command: webserver scheduler: - image: puckel/docker-airflow:1.7.1.3-6 + image: puckel/docker-airflow:1.7.1.3-7 restart: always depends_on: - webserver From 7318083881205bad25a5c262bdf6490c2e70209d Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 16 Feb 2017 17:47:55 +0100 Subject: [PATCH 078/163] Try Airflow branch 1.8-stable --- Dockerfile | 23 +++-- config/airflow.cfg | 153 ++++++++++++++++++++++++++---- docker-compose-CeleryExecutor.yml | 10 +- script/entrypoint.sh | 2 +- 4 files changed, 153 insertions(+), 35 deletions(-) diff --git a/Dockerfile b/Dockerfile index f30c61c0c7..836ff103b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -# VERSION 1.7.1.3-7 +# VERSION 1.8.0 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow -FROM debian:jessie +FROM python:3.6 MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages @@ -12,8 +12,8 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.7.1.3 -ENV AIRFLOW_HOME /usr/local/airflow +ARG AIRFLOW_VERSION=1.8.0 +ARG AIRFLOW_HOME=/usr/local/airflow # Define en_US. ENV LANGUAGE en_US.UTF-8 @@ -25,7 +25,7 @@ ENV LC_ALL en_US.UTF-8 RUN set -ex \ && buildDeps=' \ - python-dev \ + python3-dev \ libkrb5-dev \ libsasl2-dev \ libssl-dev \ @@ -34,29 +34,28 @@ RUN set -ex \ libblas-dev \ liblapack-dev \ libpq-dev \ + git \ ' \ - && echo "deb http://http.debian.net/debian jessie-backports main" >/etc/apt/sources.list.d/backports.list \ && apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ $buildDeps \ - python-pip \ + python3-pip \ + python3-requests \ apt-utils \ curl \ netcat \ locales \ - && apt-get install -yqq -t jessie-backports python-requests \ && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ - && python -m pip install -U pip \ && pip install Cython \ - && pip install pytz==2015.7 \ + && pip install pytz \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install airflow[crypto,celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ - && pip install celery[redis]==3.1.17 \ + && pip install "git+https://github.com/apache/incubator-airflow.git@v1-8-stable#egg=airflow[crypto,celery,postgres,hive,hdfs,jdbc]" \ + && pip install celery[redis] \ && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ && rm -rf \ diff --git a/config/airflow.cfg b/config/airflow.cfg index 1b7dbd028c..6d313f039f 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -4,9 +4,11 @@ airflow_home = /usr/local/airflow # The folder where your airflow pipelines live, most likely a # subfolder in a code repository +# This path must be absolute dags_folder = /usr/local/airflow/dags -# The folder where airflow should store its log files. This location +# The folder where airflow should store its log files +# This path must be absolute base_log_folder = /usr/local/airflow/logs # Airflow can store logs remotely in AWS S3 or Google Cloud Storage. Users @@ -17,8 +19,8 @@ remote_base_log_folder = remote_log_conn_id = # Use server-side encryption for logs stored in S3 encrypt_s3_logs = False -# deprecated option for remote log storage, use remote_base_log_folder instead! -# s3_log_folder = +# DEPRECATED option for remote log storage, use remote_base_log_folder instead! +s3_log_folder = # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor @@ -73,10 +75,39 @@ donot_pickle = False # How long before timing out a python file import while filling the DagBag dagbag_import_timeout = 30 +# The class to use for running task instances in a subprocess +task_runner = BashTaskRunner + +# If set, tasks without a `run_as_user` argument will be run with this user +# Can be used to de-elevate a sudo user running Airflow when executing tasks +default_impersonation = + +# What security module to use (for example kerberos): +security = + +# Turn unit test mode on (overwrites many configuration options with test +# values at runtime) +unit_test_mode = False + +[cli] +# In what way should the cli access the API. The LocalClient will use the +# database directly, while the json_client will use the api running on the +# webserver +api_client = airflow.api.client.local_client +endpoint_url = http://localhost:8080 + +[api] +# How to authenticate users of the API +auth_backend = airflow.api.auth.backend.default + [operators] # The default owner assigned to each new operator, unless # provided explicitly or passed via `default_args` default_owner = Airflow +default_cpus = 1 +default_ram = 512 +default_disk = 512 +default_gpus = 0 [webserver] # The base url of your website as airflow cannot guess what domain or @@ -90,9 +121,22 @@ web_server_host = 0.0.0.0 # The port on which to run the web server web_server_port = 8080 -# The time the gunicorn webserver waits before timing out on a worker +# Paths to the SSL certificate and key for the web server. When both are +# provided SSL will be enabled. This does not change the web server port. +web_server_ssl_cert = +web_server_ssl_key = + +# Number of seconds the gunicorn webserver waits before timing out on a worker web_server_worker_timeout = 120 +# Number of workers to refresh at a time. When set to 0, worker refresh is +# disabled. When nonzero, airflow periodically refreshes webserver workers by +# bringing up new ones and killing old ones. +worker_refresh_batch_size = 1 + +# Number of seconds to wait before refreshing a batch of workers. +worker_refresh_interval = 30 + # Secret key used to run your flask app secret_key = temporary_key @@ -103,30 +147,58 @@ workers = 4 # sync (default), eventlet, gevent worker_class = sync +# Log files for the gunicorn webserver. '-' means log to stderr. +access_logfile = - +error_logfile = - + # Expose the configuration file in the web server -expose_config = true +expose_config = True # Set to true to turn on authentication: -# https://pythonhosted.org/airflow/security.html#web-authentication +# http://pythonhosted.org/airflow/security.html#web-authentication authenticate = False # Filter the list of dags by owner name (requires authentication to be enabled) filter_by_owner = False +# Filtering mode. Choices include user (default) and ldapgroup. +# Ldap group filtering requires using the ldap backend +# +# Note that the ldap server needs the "memberOf" overlay to be set up +# in order to user the ldapgroup mode. +owner_mode = user + +# Default DAG orientation. Valid values are: +# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) +dag_orientation = LR + +# Puts the webserver in demonstration mode; blurs the names of Operators for +# privacy. +demo_mode = False + +# The amount of time (in secs) webserver will wait for initial handshake +# while fetching logs from other worker machine +log_fetch_timeout_sec = 5 + +# By default, the webserver shows paused DAGs. Flip this to hide paused +# DAGs by default +hide_paused_dags_by_default = False + [email] email_backend = airflow.utils.email.send_email_smtp [smtp] # If you want airflow to send emails on retries, failure, and you want to use -# the airflow.utils.email.send_email_smtp function, you have to configure an smtp -# server here +# the airflow.utils.email.send_email_smtp function, you have to configure an +# smtp server here smtp_host = localhost smtp_starttls = True smtp_ssl = False -smtp_user = airflow +# Uncomment and set the user/pass settings if you want to use SMTP AUTH +# smtp_user = airflow +# smtp_password = airflow smtp_port = 25 -smtp_password = airflow -smtp_mail_from = airflow@airflow.local +smtp_mail_from = airflow@airflow.com [celery] # This section only applies if you are using the CeleryExecutor in @@ -154,10 +226,13 @@ worker_log_server_port = 8793 broker_url = redis://redis:6379/1 # Another key Celery setting -celery_result_backend = redis://redis:6379/1 +celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start -# it `airflow flower`. This defines the port that Celery Flower runs on +# it `airflow flower`. This defines the IP that Celery Flower runs on +flower_host = 0.0.0.0 + +# This defines the port that Celery Flower runs on flower_port = 5555 # Default queue that tasks get assigned to and that worker listen on. @@ -174,17 +249,46 @@ job_heartbeat_sec = 5 # how often the scheduler should run (in seconds). scheduler_heartbeat_sec = 5 +# after how much time should the scheduler terminate in seconds +# -1 indicates to run continuously (see also num_runs) +run_duration = -1 + +# after how much time a new DAGs should be picked up from the filesystem +min_file_process_interval = 0 + +dag_dir_list_interval = 300 + +# How often should stats be printed to the logs +print_stats_interval = 30 + +child_process_log_directory = /usr/local/airflow/logs/scheduler + +# Local task jobs periodically heartbeat to the DB. If the job has +# not heartbeat in this many seconds, the scheduler will mark the +# associated task instance as failed and will re-schedule the task. +scheduler_zombie_task_threshold = 300 + +# Turn off scheduler catchup by setting this to False. +# Default behavior is unchanged and +# Command Line Backfills still work, but the scheduler +# will not do scheduler catchup if this is False, +# however it can be set on a per DAG basis in the +# DAG definition (catchup) +catchup_by_default = True + # Statsd (https://github.com/etsy/statsd) integration settings -# statsd_on = False -# statsd_host = localhost -# statsd_port = 8125 -# statsd_prefix = airflow +statsd_on = False +statsd_host = localhost +statsd_port = 8125 +statsd_prefix = airflow # The scheduler can run multiple threads in parallel to schedule dags. # This defines how many threads will run. However airflow will never # use more threads than the amount of cpu cores available. max_threads = 2 +authenticate = False + [mesos] # Mesos master address which MesosExecutor will connect to. master = localhost:5050 @@ -221,3 +325,18 @@ authenticate = False # Mesos credentials, if authentication is enabled # default_principal = admin # default_secret = admin + +[kerberos] +ccache = /tmp/airflow_krb5_ccache +# gets augmented with fqdn +principal = airflow +reinit_frequency = 3600 +kinit_path = kinit +keytab = airflow.keytab + +[github_enterprise] +api_rev = v3 + +[admin] +# UI to hide sensitive variable fields when set to True +hide_sensitive_variable_fields = True diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 16079a34c2..5fba51dbf9 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,7 +11,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-7 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - postgres @@ -27,7 +27,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.7.1.3-7 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - redis @@ -38,7 +38,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.7.1.3-7 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - webserver @@ -48,10 +48,10 @@ services: # - LOAD_EX=n - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery - command: scheduler -n 5 + command: scheduler # -n 5 worker: - image: puckel/docker-airflow:1.7.1.3-7 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - scheduler diff --git a/script/entrypoint.sh b/script/entrypoint.sh index c32e96a53b..83a6a19ee9 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -7,7 +7,7 @@ POSTGRES_HOST="postgres" POSTGRES_PORT="5432" REDIS_HOST="redis" REDIS_PORT="6379" -: ${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY")} +: ${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")} # Load DAGs exemples (default: Yes) if [ "x$LOAD_EX" = "xn" ]; then From 4727c4405fe16025114c198f77be9c837b4691c6 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 16 Feb 2017 18:07:00 +0100 Subject: [PATCH 079/163] Fix version --- Dockerfile | 4 ++-- circle.yml | 2 +- docker-compose-LocalExecutor.yml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 836ff103b6..040978bde6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,7 @@ ENV LANG en_US.UTF-8 ENV LC_ALL en_US.UTF-8 ENV LC_CTYPE en_US.UTF-8 ENV LC_MESSAGES en_US.UTF-8 -ENV LC_ALL en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 RUN set -ex \ && buildDeps=' \ @@ -55,7 +55,7 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install "git+https://github.com/apache/incubator-airflow.git@v1-8-stable#egg=airflow[crypto,celery,postgres,hive,hdfs,jdbc]" \ - && pip install celery[redis] \ + && pip install celery[redis]==3.1.17 \ && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ && rm -rf \ diff --git a/circle.yml b/circle.yml index 227db63519..282f47e8b9 100644 --- a/circle.yml +++ b/circle.yml @@ -12,4 +12,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.7.1.3' + - docker run puckel/docker-airflow version |grep '1.8.0' diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index fc3ea8f58c..415cb5f328 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.7.1.3-7 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - postgres @@ -23,7 +23,7 @@ services: command: webserver scheduler: - image: puckel/docker-airflow:1.7.1.3-7 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - webserver From ba9a0845836a654229511bf4b015e1820a4cfebd Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 17 Feb 2017 11:23:07 +0100 Subject: [PATCH 080/163] Rollback to python2 (issue with hdfs client). Optimize entrypoint output. --- Dockerfile | 10 +++++---- script/entrypoint.sh | 49 ++++++++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/Dockerfile b/Dockerfile index 040978bde6..fafc428240 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ # BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow -FROM python:3.6 +FROM debian:jessie MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages @@ -25,7 +25,7 @@ ENV LC_ALL en_US.UTF-8 RUN set -ex \ && buildDeps=' \ - python3-dev \ + python-dev \ libkrb5-dev \ libsasl2-dev \ libssl-dev \ @@ -39,8 +39,8 @@ RUN set -ex \ && apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ $buildDeps \ - python3-pip \ - python3-requests \ + python-pip \ + python-requests \ apt-utils \ curl \ netcat \ @@ -49,12 +49,14 @@ RUN set -ex \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ + && python -m pip install -U pip \ && pip install Cython \ && pip install pytz \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install "git+https://github.com/apache/incubator-airflow.git@v1-8-stable#egg=airflow[crypto,celery,postgres,hive,hdfs,jdbc]" \ + && pip install https://dist.apache.org/repos/dist/dev/incubator/airflow/airflow-1.8.0rc4+apache.incubating.tar.gz \ && pip install celery[redis]==3.1.17 \ && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 83a6a19ee9..5e9d9fe1b4 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -2,7 +2,7 @@ AIRFLOW_HOME="/usr/local/airflow" CMD="airflow" -TRY_LOOP="10" +TRY_LOOP="20" POSTGRES_HOST="postgres" POSTGRES_PORT="5432" REDIS_HOST="redis" @@ -10,7 +10,7 @@ REDIS_PORT="6379" : ${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")} # Load DAGs exemples (default: Yes) -if [ "x$LOAD_EX" = "xn" ]; then +if [ "$LOAD_EX" = "n" ]; then sed -i "s/load_examples = True/load_examples = False/" "$AIRFLOW_HOME"/airflow.cfg fi @@ -19,31 +19,29 @@ if [ -e "/requirements.txt" ]; then $(which pip) install --user -r /requirements.txt fi -# Generate Fernet key +# Update airflow config - Fernet key sed -i "s|\$FERNET_KEY|$FERNET_KEY|" "$AIRFLOW_HOME"/airflow.cfg -# wait for DB +# Wait for Postresql if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then i=0 while ! nc -z $POSTGRES_HOST $POSTGRES_PORT >/dev/null 2>&1 < /dev/null; do i=$((i+1)) - if [ $i -ge $TRY_LOOP ]; then - echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up" - exit 1 + if [ "$1" = "webserver" ]; then + echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP" + if [ $i -ge $TRY_LOOP ]; then + echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up" + exit 1 + fi fi - echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP" sleep 10 done - if [ "$1" = "webserver" ]; then - echo "Initialize database..." - $CMD initdb - fi - sleep 5 fi -# If we use docker-compose, we use Celery. -if [ "x$EXECUTOR" = "xCelery" ] +# Update configuration depending the type of Executor +if [ "$EXECUTOR" = "Celery" ] then + # Wait for Redis if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then j=0 while ! nc -z $REDIS_HOST $REDIS_PORT >/dev/null 2>&1 < /dev/null; do @@ -56,11 +54,26 @@ then sleep 5 done fi - exec $CMD "$@" -elif [ "x$EXECUTOR" = "xLocal" ] + if [ "$1" = "webserver" ]; then + echo "Initialize database..." + $CMD initdb + exec $CMD webserver + else + sleep 10 + exec $CMD "$@" + fi +elif [ "$EXECUTOR" = "Local" ] then sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg - exec $CMD "$@" + if [ "$1" = "webserver" ]; then + echo "Initialize database..." + $CMD initdb + exec $CMD webserver + else + sleep 10 + exec $CMD "$@" + fi +# By default we use SequentialExecutor else if [ "$1" = "version" ]; then exec $CMD version From 5aaf70ff7179817a00489499cdf7c992de3de7ef Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 17 Feb 2017 13:58:23 +0100 Subject: [PATCH 081/163] Airflow rc4 --- Dockerfile | 1 - docker-compose-LocalExecutor.yml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index fafc428240..bd262dff13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,7 +56,6 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install "git+https://github.com/apache/incubator-airflow.git@v1-8-stable#egg=airflow[crypto,celery,postgres,hive,hdfs,jdbc]" \ - && pip install https://dist.apache.org/repos/dist/dev/incubator/airflow/airflow-1.8.0rc4+apache.incubating.tar.gz \ && pip install celery[redis]==3.1.17 \ && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 415cb5f328..3c27043e15 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -34,4 +34,4 @@ services: # - LOAD_EX=n - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Local - command: scheduler -n 5 + command: scheduler From b319ff952101c3985d85e0a1263f15a7eeafb30d Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 17 Feb 2017 17:44:25 +0100 Subject: [PATCH 082/163] Update readme --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index 0a33efe611..ee0dc3e175 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ Check [Airflow Documentation](https://pythonhosted.org/airflow/) ## Install custom python package -- Create a file "requirements.txt" with the dedired python modules +- Create a file "requirements.txt" with the desired python modules - Mount this file as a volume `-v $(pwd)/requirements.txt:/requirements.txt` - The entrypoint.sh script execute the pip install command (with --user option) @@ -82,10 +82,6 @@ Easy scaling using docker-compose: This can be used to scale to a multi node setup using docker swarm. -## Links - - - Airflow on Kubernetes [kube-airflow](https://github.com/mumoshu/kube-airflow) - # Wanna help? Fork, improve and PR. ;-) From 126224743eadcc2204f34ee76a53da19ac9027ca Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 20 Feb 2017 17:32:31 +0100 Subject: [PATCH 083/163] Set version to rc4 --- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 24 ++++++++++++------------ docker-compose-LocalExecutor.yml | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Dockerfile b/Dockerfile index bd262dff13..49556a28a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.8.0 +# VERSION 1.8.0rc4 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 5fba51dbf9..48987333a6 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,23 +11,23 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.0rc4 restart: always depends_on: - postgres - redis environment: - # - LOAD_EX=n + - LOAD_EX=n - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery - # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags + volumes: + - /home/puckel/github/docker-airflow/dags:/usr/local/airflow/dags ports: - "8080:8080" command: webserver flower: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.0rc4 restart: always depends_on: - redis @@ -38,25 +38,25 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.0rc4 restart: always depends_on: - webserver - # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags + volumes: + - /home/puckel/github/docker-airflow/dags:/usr/local/airflow/dags environment: - # - LOAD_EX=n + - LOAD_EX=n - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery command: scheduler # -n 5 worker: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.0rc4 restart: always depends_on: - scheduler - # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags + volumes: + - /home/puckel/github/docker-airflow/dags:/usr/local/airflow/dags environment: - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 3c27043e15..570bc4400a 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.0rc4 restart: always depends_on: - postgres @@ -23,7 +23,7 @@ services: command: webserver scheduler: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.0rc4 restart: always depends_on: - webserver From d4636f9fde903dbd706fef98592bfb17ad8f9a7f Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 22 Feb 2017 11:46:45 +0100 Subject: [PATCH 084/163] Fix issue #60 - Run the scheduler and webserver on the same container --- docker-compose-LocalExecutor.yml | 15 --------------- script/entrypoint.sh | 8 ++------ 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 570bc4400a..310249fd50 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -15,23 +15,8 @@ services: environment: # - LOAD_EX=n - EXECUTOR=Local - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA # volumes: # - /localpath/to/dags:/usr/local/airflow/dags ports: - "8080:8080" command: webserver - - scheduler: - image: puckel/docker-airflow:1.8.0rc4 - restart: always - depends_on: - - webserver - # volumes: - # - ./requirements.txt:/requirements.txt:ro - # - /localpath/to/dags:/usr/local/airflow/dags - environment: - # - LOAD_EX=n - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - - EXECUTOR=Local - command: scheduler diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 5e9d9fe1b4..601a8342a6 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -65,14 +65,10 @@ then elif [ "$EXECUTOR" = "Local" ] then sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg - if [ "$1" = "webserver" ]; then echo "Initialize database..." $CMD initdb - exec $CMD webserver - else - sleep 10 - exec $CMD "$@" - fi + exec $CMD webserver & + exec $CMD scheduler # By default we use SequentialExecutor else if [ "$1" = "version" ]; then From 45e861f9141f24a1e857e9c4e85bdb8c66b2659b Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 22 Feb 2017 11:47:20 +0100 Subject: [PATCH 085/163] Use home foler as volume path for dags --- docker-compose-CeleryExecutor.yml | 14 +++++++------- docker-compose-LocalExecutor.yml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 48987333a6..3b8c268f57 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -20,8 +20,8 @@ services: - LOAD_EX=n - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery - volumes: - - /home/puckel/github/docker-airflow/dags:/usr/local/airflow/dags + # volumes: + # - ~/github/docker-airflow/dags:/usr/local/airflow/dags ports: - "8080:8080" command: webserver @@ -42,21 +42,21 @@ services: restart: always depends_on: - webserver - volumes: - - /home/puckel/github/docker-airflow/dags:/usr/local/airflow/dags + # volumes: + # - ~/github/docker-airflow/dags:/usr/local/airflow/dags environment: - LOAD_EX=n - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery - command: scheduler # -n 5 + command: scheduler worker: image: puckel/docker-airflow:1.8.0rc4 restart: always depends_on: - scheduler - volumes: - - /home/puckel/github/docker-airflow/dags:/usr/local/airflow/dags + # volumes: + # - ~/github/docker-airflow/dags:/usr/local/airflow/dags environment: - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA - EXECUTOR=Celery diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 310249fd50..fc5eedb708 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -16,7 +16,7 @@ services: # - LOAD_EX=n - EXECUTOR=Local # volumes: - # - /localpath/to/dags:/usr/local/airflow/dags + # - ~/github/docker-airflow/dags:/usr/local/airflow/dags ports: - "8080:8080" command: webserver From fbbf504c7b25e2e4b37ec86bd31cea96f58ab15d Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 27 Feb 2017 11:08:12 +0100 Subject: [PATCH 086/163] Update fernet_key in compose file --- docker-compose-CeleryExecutor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 3b8c268f57..169ae62179 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -18,7 +18,7 @@ services: - redis environment: - LOAD_EX=n - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery # volumes: # - ~/github/docker-airflow/dags:/usr/local/airflow/dags @@ -46,7 +46,7 @@ services: # - ~/github/docker-airflow/dags:/usr/local/airflow/dags environment: - LOAD_EX=n - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery command: scheduler @@ -58,6 +58,6 @@ services: # volumes: # - ~/github/docker-airflow/dags:/usr/local/airflow/dags environment: - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery command: worker From aa44b8e262a38bc12dbb661805ea85ceaf7e4f02 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 27 Feb 2017 11:10:30 +0100 Subject: [PATCH 087/163] Update fernet_key in compose file --- docker-compose-CeleryExecutor.yml | 6 +++--- docker-compose-LocalExecutor.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 16079a34c2..725f54d128 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -18,7 +18,7 @@ services: - redis environment: # - LOAD_EX=n - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery # volumes: # - /localpath/to/dags:/usr/local/airflow/dags @@ -46,7 +46,7 @@ services: # - /localpath/to/dags:/usr/local/airflow/dags environment: # - LOAD_EX=n - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery command: scheduler -n 5 @@ -58,6 +58,6 @@ services: # volumes: # - /localpath/to/dags:/usr/local/airflow/dags environment: - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery command: worker diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index fc3ea8f58c..fbc1dbd7dc 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -15,7 +15,7 @@ services: environment: # - LOAD_EX=n - EXECUTOR=Local - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= # volumes: # - /localpath/to/dags:/usr/local/airflow/dags ports: @@ -32,6 +32,6 @@ services: # - /localpath/to/dags:/usr/local/airflow/dags environment: # - LOAD_EX=n - - FERNET_KEY=9IoTgQ_EJ0hCsamBU3Mctc7F9OkODnndOKCwkwXCAA + - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Local command: scheduler -n 5 From c4668a15771a90a00593e4f70655784f58c85100 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 1 Mar 2017 15:58:32 +0100 Subject: [PATCH 088/163] Fix #69 - Specify via ENV variable postgresql and redis variable --- docker-compose-CeleryExecutor.yml | 9 +++++++++ script/entrypoint.sh | 28 ++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 169ae62179..5d74266fec 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -20,6 +20,9 @@ services: - LOAD_EX=n - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery + # - POSTGRES_USER=airflow + # - POSTGRES_PASSWORD=airflow + # - POSTGRES_DB=airflow # volumes: # - ~/github/docker-airflow/dags:/usr/local/airflow/dags ports: @@ -48,6 +51,9 @@ services: - LOAD_EX=n - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery + # - POSTGRES_USER=airflow + # - POSTGRES_PASSWORD=airflow + # - POSTGRES_DB=airflow command: scheduler worker: @@ -60,4 +66,7 @@ services: environment: - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery + # - POSTGRES_USER=airflow + # - POSTGRES_PASSWORD=airflow + # - POSTGRES_DB=airflow command: worker diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 601a8342a6..01bd51bd52 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -3,10 +3,16 @@ AIRFLOW_HOME="/usr/local/airflow" CMD="airflow" TRY_LOOP="20" -POSTGRES_HOST="postgres" -POSTGRES_PORT="5432" -REDIS_HOST="redis" -REDIS_PORT="6379" + +: ${REDIS_HOST:="redis"} +: ${REDIS_PORT:="6379"} + +: ${POSTGRES_HOST:="postgres"} +: ${POSTGRES_PORT:="5432"} +: ${POSTGRES_USER:="airflow"} +: ${POSTGRES_PASSWORD:="airflow"} +: ${POSTGRES_DB:="airflow"} + : ${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")} # Load DAGs exemples (default: Yes) @@ -54,6 +60,9 @@ then sleep 5 done fi + sed -i "s#celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow#celery_result_backend = db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg if [ "$1" = "webserver" ]; then echo "Initialize database..." $CMD initdb @@ -65,14 +74,17 @@ then elif [ "$EXECUTOR" = "Local" ] then sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg - echo "Initialize database..." - $CMD initdb - exec $CMD webserver & - exec $CMD scheduler + sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg + echo "Initialize database..." + $CMD initdb + exec $CMD webserver & + exec $CMD scheduler # By default we use SequentialExecutor else if [ "$1" = "version" ]; then exec $CMD version + exit fi sed -i "s/executor = CeleryExecutor/executor = SequentialExecutor/" "$AIRFLOW_HOME"/airflow.cfg sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = sqlite:////usr/local/airflow/airflow.db#" "$AIRFLOW_HOME"/airflow.cfg From e5ff7f2174ffd56acea4314c69d5fe9757faa2fd Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 14 Mar 2017 10:56:42 +0100 Subject: [PATCH 089/163] Bump to rc5 --- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 49556a28a7..c0254ee90c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.8.0rc4 +# VERSION 1.8.0rc5 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 5d74266fec..0ff8a01bd2 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,7 +11,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0rc4 + image: puckel/docker-airflow:1.8.0rc5 restart: always depends_on: - postgres @@ -30,7 +30,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.8.0rc4 + image: puckel/docker-airflow:1.8.0rc5 restart: always depends_on: - redis @@ -41,7 +41,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.0rc4 + image: puckel/docker-airflow:1.8.0rc5 restart: always depends_on: - webserver @@ -57,7 +57,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.8.0rc4 + image: puckel/docker-airflow:1.8.0rc5 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index fc5eedb708..473254afef 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0rc4 + image: puckel/docker-airflow:1.8.0rc5 restart: always depends_on: - postgres From 2e4fee593b3959e9dcd68650358bda821efc7708 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 14 Mar 2017 10:59:10 +0100 Subject: [PATCH 090/163] Bump to rc5 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c0254ee90c..c7ea15d5c0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.8.0 +ARG AIRFLOW_VERSION=1.8.0rc5 ARG AIRFLOW_HOME=/usr/local/airflow # Define en_US. From 8354ea69bfd1b45dd585de40566881a994810f5d Mon Sep 17 00:00:00 2001 From: mendhak Date: Tue, 21 Mar 2017 06:55:51 +0000 Subject: [PATCH 091/163] Use Postgres Port env var when building connection string --- script/entrypoint.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 01bd51bd52..e1c741d6d0 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -60,8 +60,8 @@ then sleep 5 done fi - sed -i "s#celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow#celery_result_backend = db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow#celery_result_backend = db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg if [ "$1" = "webserver" ]; then echo "Initialize database..." @@ -74,7 +74,7 @@ then elif [ "$EXECUTOR" = "Local" ] then sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg echo "Initialize database..." $CMD initdb From 748c00915734027ddebd3b6e8e9f39e9ec776d46 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 21 Mar 2017 10:35:03 +0100 Subject: [PATCH 092/163] Bump to 1.8.0 --- Dockerfile | 6 +++--- docker-compose-CeleryExecutor.yml | 12 ++++++------ docker-compose-LocalExecutor.yml | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index c7ea15d5c0..ac7333d5ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.8.0rc5 +# VERSION 1.8.0 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.8.0rc5 +ARG AIRFLOW_VERSION=1.8.0 ARG AIRFLOW_HOME=/usr/local/airflow # Define en_US. @@ -55,7 +55,7 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install "git+https://github.com/apache/incubator-airflow.git@v1-8-stable#egg=airflow[crypto,celery,postgres,hive,hdfs,jdbc]" \ + && pip install airflow[crypto,celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ && pip install celery[redis]==3.1.17 \ && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 0ff8a01bd2..3051292da0 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,13 +11,13 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0rc5 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - postgres - redis environment: - - LOAD_EX=n + - LOAD_EX=y - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery # - POSTGRES_USER=airflow @@ -30,7 +30,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.8.0rc5 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - redis @@ -41,14 +41,14 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.0rc5 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - webserver # volumes: # - ~/github/docker-airflow/dags:/usr/local/airflow/dags environment: - - LOAD_EX=n + - LOAD_EX=y - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery # - POSTGRES_USER=airflow @@ -57,7 +57,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.8.0rc5 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 473254afef..a085c0651a 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,12 +8,12 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0rc5 + image: puckel/docker-airflow:1.8.0 restart: always depends_on: - postgres environment: - # - LOAD_EX=n + - LOAD_EX=y - EXECUTOR=Local # volumes: # - ~/github/docker-airflow/dags:/usr/local/airflow/dags From c24b09c9aef8822f3433b8705e9c1d9c5a09ad05 Mon Sep 17 00:00:00 2001 From: Kristi Date: Sat, 25 Mar 2017 16:31:02 -0700 Subject: [PATCH 093/163] minor grammar changes in Readme.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ee0dc3e175..618a39b900 100644 --- a/README.md +++ b/README.md @@ -21,17 +21,17 @@ Pull the image from the Docker repository. ## Build -For example, if you need to install [Extra Packages](https://pythonhosted.org/airflow/installation.html#extra-package), edit the Dockerfile and than build-it. +For example, if you need to install [Extra Packages](https://pythonhosted.org/airflow/installation.html#extra-package), edit the Dockerfile and then build it. docker build --rm -t puckel/docker-airflow . ## Usage -By default, docker-airflow run Airflow with **SequentialExecutor** : +By default, docker-airflow runs Airflow with **SequentialExecutor** : docker run -d -p 8080:8080 puckel/docker-airflow -If you want to run other executor, you've to use the docker-compose.yml files provided in this repository. +If you want to run another executor, use the other docker-compose.yml files provided in this repository. For **LocalExecutor** : From 238b9edf9fb34e55668941fe66717f4b4dbc13fa Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 10 May 2017 11:01:29 +0200 Subject: [PATCH 094/163] Bump to 1.8.1 --- Dockerfile | 6 +++--- circle.yml | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index ac7333d5ad..5f5b903ca1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.8.0 +# VERSION 1.8.1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.8.0 +ARG AIRFLOW_VERSION=1.8.1 ARG AIRFLOW_HOME=/usr/local/airflow # Define en_US. @@ -55,7 +55,7 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install airflow[crypto,celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ + && pip install apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ && pip install celery[redis]==3.1.17 \ && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ diff --git a/circle.yml b/circle.yml index 282f47e8b9..e7805a523b 100644 --- a/circle.yml +++ b/circle.yml @@ -12,4 +12,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.8.0' + - docker run puckel/docker-airflow version |grep '1.8.1' diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index aff209701f..19ba080cf3 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,7 +11,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.1 restart: always depends_on: - postgres @@ -30,7 +30,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.1 restart: always depends_on: - redis @@ -41,7 +41,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.1 restart: always depends_on: - webserver @@ -57,7 +57,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.1 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index a085c0651a..09b1774a3b 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.0 + image: puckel/docker-airflow:1.8.1 restart: always depends_on: - postgres From ccb73bddc6fda8af0f4760e21f6e19993eefeb13 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Thu, 11 May 2017 14:19:04 +0200 Subject: [PATCH 095/163] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 618a39b900..fe77b96232 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Docker Pulls](https://img.shields.io/docker/pulls/puckel/docker-airflow.svg?maxAge=2592000)]() [![Docker Stars](https://img.shields.io/docker/stars/puckel/docker-airflow.svg?maxAge=2592000)]() -This repository contains **Dockerfile** of [airflow](https://github.com/apache/incubator-airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). +This repository contains **Dockerfile** of [apache-airflow](https://github.com/apache/incubator-airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). ## Informations @@ -72,7 +72,6 @@ Check [Airflow Documentation](https://pythonhosted.org/airflow/) - Airflow: [localhost:8080](http://localhost:8080/) - Flower: [localhost:5555](http://localhost:5555/) -When using OSX with boot2docker, use: open http://$(boot2docker ip):8080 ## Scale the number of workers From 73de181ef8217afdb19377f1bd0cc71b3193c82e Mon Sep 17 00:00:00 2001 From: Ian Burrell Date: Tue, 9 May 2017 12:40:25 -0700 Subject: [PATCH 096/163] Add support for setting redis password with REDIS_PASSWORD --- script/entrypoint.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index e1c741d6d0..91539d4dd0 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -6,6 +6,7 @@ TRY_LOOP="20" : ${REDIS_HOST:="redis"} : ${REDIS_PORT:="6379"} +: ${REDIS_PASSWORD:=""} : ${POSTGRES_HOST:="postgres"} : ${POSTGRES_PORT:="5432"} @@ -28,6 +29,10 @@ fi # Update airflow config - Fernet key sed -i "s|\$FERNET_KEY|$FERNET_KEY|" "$AIRFLOW_HOME"/airflow.cfg +if [ -n "$REDIS_PASSWORD" ]; then + REDIS_HOST=:${REDIS_PASSWORD}@${REDIS_HOST} +fi + # Wait for Postresql if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then i=0 @@ -75,7 +80,7 @@ elif [ "$EXECUTOR" = "Local" ] then sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg echo "Initialize database..." $CMD initdb exec $CMD webserver & From 5b52b28768ba24b98dc36a109ac8914f861ffc6c Mon Sep 17 00:00:00 2001 From: Ian Burrell Date: Thu, 11 May 2017 14:39:19 -0700 Subject: [PATCH 097/163] Change REDIS_PASSWORD to set REDIS_PREFIX Redis host detection doesn't work if REDIS_HOST changes --- script/entrypoint.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 91539d4dd0..66409cbb53 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -30,7 +30,9 @@ fi sed -i "s|\$FERNET_KEY|$FERNET_KEY|" "$AIRFLOW_HOME"/airflow.cfg if [ -n "$REDIS_PASSWORD" ]; then - REDIS_HOST=:${REDIS_PASSWORD}@${REDIS_HOST} + REDIS_PREFIX=:${REDIS_PASSWORD}@ +else + REDIS_PREFIX= fi # Wait for Postresql @@ -67,7 +69,7 @@ then fi sed -i "s#celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow#celery_result_backend = db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg + sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg if [ "$1" = "webserver" ]; then echo "Initialize database..." $CMD initdb From 9ef5f5598335e10c4fcdec4acc08738ddad555dd Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Tue, 16 May 2017 09:56:38 +0200 Subject: [PATCH 098/163] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fe77b96232..e2f9a531f8 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ # docker-airflow [![CircleCI branch](https://img.shields.io/circleci/project/puckel/docker-airflow/master.svg?maxAge=2592000)](https://circleci.com/gh/puckel/docker-airflow/tree/master) +[![Docker Build Status](https://img.shields.io/docker/build/puckel/docker-airflow.svg)]() + [![Docker Hub](https://img.shields.io/badge/docker-ready-blue.svg)](https://hub.docker.com/r/puckel/docker-airflow/) -[![Docker Pulls](https://img.shields.io/docker/pulls/puckel/docker-airflow.svg?maxAge=2592000)]() -[![Docker Stars](https://img.shields.io/docker/stars/puckel/docker-airflow.svg?maxAge=2592000)]() +[![Docker Pulls](https://img.shields.io/docker/pulls/puckel/docker-airflow.svg)]() +[![Docker Stars](https://img.shields.io/docker/stars/puckel/docker-airflow.svg)]() This repository contains **Dockerfile** of [apache-airflow](https://github.com/apache/incubator-airflow) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/puckel/docker-airflow/) published to the public [Docker Hub Registry](https://registry.hub.docker.com/). From 9c647c881b5105a983c2222a66b803d1d18419e4 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Thu, 22 Jun 2017 11:04:40 +0200 Subject: [PATCH 099/163] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e2f9a531f8..8e2852345c 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a * Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) -* Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/airflow) +* Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) ## Installation From 62009f2fdfc5f1f3ccafdaaa2b8dc0f4f8146f82 Mon Sep 17 00:00:00 2001 From: Ian Burrell Date: Thu, 29 Jun 2017 11:04:01 -0700 Subject: [PATCH 100/163] Change base to debian:stretch Stretch has the latest pip but not setuptools or wheel, so updating pip doesn't update the others. Not updating setuptools or wheel causes build failures. --- Dockerfile | 4 ++-- README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5f5b903ca1..0f0a9aa9c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ # BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow -FROM debian:jessie +FROM debian:stretch MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages @@ -49,7 +49,7 @@ RUN set -ex \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ - && python -m pip install -U pip \ + && python -m pip install -U pip setuptools wheel \ && pip install Cython \ && pip install pytz \ && pip install pyOpenSSL \ diff --git a/README.md b/README.md index 8e2852345c..6ccc8997dc 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a ## Informations -* Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue +* Based on Debian Stretch official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) From 5f9cb15c99c8f8ead26125910b63c144c6e53421 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Thu, 27 Jul 2017 10:35:32 +0200 Subject: [PATCH 101/163] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6ccc8997dc..59900a4ad2 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a ## Informations -* Based on Debian Stretch official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue +* Based on Debian Stretch official Image [debian:stretch](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) From 87db6f5d788c78cf96c1792ebecc75f9e1bc9ea6 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 27 Jul 2017 16:06:58 +0200 Subject: [PATCH 102/163] Update to python 3 --- Dockerfile | 12 ++++++------ README.md | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0f0a9aa9c5..6cf730317b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -# VERSION 1.8.1 +# VERSION 1.8.1-1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow -FROM debian:stretch +FROM python:3.6-stretch MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages @@ -25,7 +25,7 @@ ENV LC_ALL en_US.UTF-8 RUN set -ex \ && buildDeps=' \ - python-dev \ + python3-dev \ libkrb5-dev \ libsasl2-dev \ libssl-dev \ @@ -39,8 +39,8 @@ RUN set -ex \ && apt-get update -yqq \ && apt-get install -yqq --no-install-recommends \ $buildDeps \ - python-pip \ - python-requests \ + python3-pip \ + python3-requests \ apt-utils \ curl \ netcat \ @@ -55,7 +55,7 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==$AIRFLOW_VERSION \ + && pip install apache-airflow[crypto,celery,postgres,hive,jdbc]==$AIRFLOW_VERSION \ && pip install celery[redis]==3.1.17 \ && apt-get remove --purge -yqq $buildDeps \ && apt-get clean \ diff --git a/README.md b/README.md index 59900a4ad2..625b96e1b1 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a ## Informations -* Based on Debian Stretch official Image [debian:stretch](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue +* Based on Python (3.6-stretch) official Image [python:3.6-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 19ba080cf3..ef100f767b 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,7 +11,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.1 + image: puckel/docker-airflow:1.8.1-1 restart: always depends_on: - postgres @@ -30,7 +30,7 @@ services: command: webserver flower: - image: puckel/docker-airflow:1.8.1 + image: puckel/docker-airflow:1.8.1-1 restart: always depends_on: - redis @@ -41,7 +41,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.1 + image: puckel/docker-airflow:1.8.1-1 restart: always depends_on: - webserver @@ -57,7 +57,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.8.1 + image: puckel/docker-airflow:1.8.1-1 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 09b1774a3b..21352869fc 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.1 + image: puckel/docker-airflow:1.8.1-1 restart: always depends_on: - postgres From 9085909fa927181c8343dd16f607e8deb468a45f Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Tue, 8 Aug 2017 09:50:16 +0200 Subject: [PATCH 103/163] Create LICENSE --- LICENSE | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..917c8efe0d --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 Matthieu "Puckel_" Roisil + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From c104e2b85c7396fc232714ebd133bfe61290abe7 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Thu, 17 Aug 2017 14:52:32 +0200 Subject: [PATCH 104/163] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 625b96e1b1..9d6bcaafc7 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) +/!\ If you want to use Airflow using Python 2, use TAG [1.8.1](https://github.com/puckel/docker-airflow/releases/tag/1.8.1) + ## Installation Pull the image from the Docker repository. From 60c2a645ad1696ff8fc6b93f458cafb5d25feef9 Mon Sep 17 00:00:00 2001 From: eshizhan Date: Tue, 29 Aug 2017 14:25:35 +0800 Subject: [PATCH 105/163] purge the depends of packages like build-essential using `purge --auto-remove` instead of `remove` --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 6cf730317b..eb4fcb08cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,7 +57,7 @@ RUN set -ex \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc]==$AIRFLOW_VERSION \ && pip install celery[redis]==3.1.17 \ - && apt-get remove --purge -yqq $buildDeps \ + && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ From f423735a563eadbfc7a669e32c4bdc8642da9625 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 5 Sep 2017 11:22:03 +0200 Subject: [PATCH 106/163] Bump to 1.8.2 --- Dockerfile | 4 +-- circle.yml | 2 +- dags/tuto.py | 55 +++++++++++++++++++++++++++++++ docker-compose-CeleryExecutor.yml | 24 +++++++------- docker-compose-LocalExecutor.yml | 8 ++--- 5 files changed, 74 insertions(+), 19 deletions(-) create mode 100644 dags/tuto.py diff --git a/Dockerfile b/Dockerfile index eb4fcb08cc..067bcdd6f2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ # BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow -FROM python:3.6-stretch +FROM python:3.6-slim MAINTAINER Puckel_ # Never prompts the user for choices on installation/configuration of packages @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.8.1 +ARG AIRFLOW_VERSION=1.8.2 ARG AIRFLOW_HOME=/usr/local/airflow # Define en_US. diff --git a/circle.yml b/circle.yml index e7805a523b..5981fe23ae 100644 --- a/circle.yml +++ b/circle.yml @@ -12,4 +12,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.8.1' + - docker run puckel/docker-airflow version |grep '1.8.2' diff --git a/dags/tuto.py b/dags/tuto.py new file mode 100644 index 0000000000..40a0845385 --- /dev/null +++ b/dags/tuto.py @@ -0,0 +1,55 @@ +""" +Code that goes along with the Airflow located at: +http://airflow.readthedocs.org/en/latest/tutorial.html +""" +from airflow import DAG +from airflow.operators.bash_operator import BashOperator +from datetime import datetime, timedelta + + +default_args = { + 'owner': 'airflow', + 'depends_on_past': False, + 'start_date': datetime(2015, 6, 1), + 'email': ['airflow@airflow.com'], + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': timedelta(minutes=5), + # 'queue': 'bash_queue', + # 'pool': 'backfill', + # 'priority_weight': 10, + # 'end_date': datetime(2016, 1, 1), +} + +dag = DAG( + 'tutorial', default_args=default_args, schedule_interval=timedelta(1)) + +# t1, t2 and t3 are examples of tasks created by instantiating operators +t1 = BashOperator( + task_id='print_date', + bash_command='date', + dag=dag) + +t2 = BashOperator( + task_id='sleep', + bash_command='sleep 5', + retries=3, + dag=dag) + +templated_command = """ + {% for i in range(5) %} + echo "{{ ds }}" + echo "{{ macros.ds_add(ds, 7)}}" + echo "{{ params.my_param }}" + {% endfor %} +""" + +t3 = BashOperator( + task_id='templated', + bash_command=templated_command, + params={'my_param': 'Parameter I passed in'}, + dag=dag) + +t2.set_upstream(t1) +t3.set_upstream(t1) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index ef100f767b..ce0a49561b 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -11,26 +11,26 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.1-1 + image: puckel/docker-airflow:1.8.2 restart: always depends_on: - postgres - redis environment: - - LOAD_EX=y + - LOAD_EX=n - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery # - POSTGRES_USER=airflow # - POSTGRES_PASSWORD=airflow # - POSTGRES_DB=airflow - # volumes: - # - ~/docker-airflow/dags:/usr/local/airflow/dags + volumes: + - ./dags:/usr/local/airflow/dags ports: - "8080:8080" command: webserver flower: - image: puckel/docker-airflow:1.8.1-1 + image: puckel/docker-airflow:1.8.2 restart: always depends_on: - redis @@ -41,14 +41,14 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.1-1 + image: puckel/docker-airflow:1.8.2 restart: always depends_on: - webserver - # volumes: - # - ~/docker-airflow/dags:/usr/local/airflow/dags + volumes: + - ./dags:/usr/local/airflow/dags environment: - - LOAD_EX=y + - LOAD_EX=n - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery # - POSTGRES_USER=airflow @@ -57,12 +57,12 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.8.1-1 + image: puckel/docker-airflow:1.8.2 restart: always depends_on: - scheduler - # volumes: - # - ~/docker-airflow/dags:/usr/local/airflow/dags + volumes: + - ./dags:/usr/local/airflow/dags environment: - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 21352869fc..d4f620d0c0 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,15 +8,15 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.1-1 + image: puckel/docker-airflow:1.8.2 restart: always depends_on: - postgres environment: - - LOAD_EX=y + - LOAD_EX=n - EXECUTOR=Local - # volumes: - # - ~/github/docker-airflow/dags:/usr/local/airflow/dags + volumes: + - ./dags:/usr/local/airflow/dags ports: - "8080:8080" command: webserver From 0131e6aa789da44f3583573e88c2edf07c8a0bde Mon Sep 17 00:00:00 2001 From: Danny Carrillo Date: Wed, 13 Sep 2017 10:28:27 -0700 Subject: [PATCH 107/163] Adds rsync as a default package --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 067bcdd6f2..24920d5eb6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,6 +43,7 @@ RUN set -ex \ python3-requests \ apt-utils \ curl \ + rsync \ netcat \ locales \ && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \ From 19c93b77dbe6a6afbc7099159486becae261705f Mon Sep 17 00:00:00 2001 From: nolan emirot Date: Thu, 28 Sep 2017 16:08:26 -0700 Subject: [PATCH 108/163] Update python base image --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9d6bcaafc7..351cda6988 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a ## Informations -* Based on Python (3.6-stretch) official Image [python:3.6-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue +* Based on Python (3.6-slim) official Image [python:3.6-slim](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) From 650f5f476ce84a558f426dbf3ac94d994249bbb0 Mon Sep 17 00:00:00 2001 From: Matthew Housley Date: Fri, 29 Sep 2017 15:44:15 -0600 Subject: [PATCH 109/163] Add options to compose file Add code to docker-compose-CeleryExecutor.yml to set a Redis password and persist pgsql data locally. --- docker-compose-CeleryExecutor.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index ce0a49561b..928f450b47 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -2,6 +2,7 @@ version: '2' services: redis: image: 'redis:3.2.7' + # command: redis-server --requirepass redispass postgres: image: postgres:9.6 @@ -9,6 +10,10 @@ services: - POSTGRES_USER=airflow - POSTGRES_PASSWORD=airflow - POSTGRES_DB=airflow + # Uncomment these lines to persist data on the local filesystem. + # - PGDATA=/var/lib/postgresql/data/pgdata + # volumes: + # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: image: puckel/docker-airflow:1.8.2 @@ -23,6 +28,7 @@ services: # - POSTGRES_USER=airflow # - POSTGRES_PASSWORD=airflow # - POSTGRES_DB=airflow + # - REDIS_PASSWORD=redispass volumes: - ./dags:/usr/local/airflow/dags ports: @@ -36,6 +42,7 @@ services: - redis environment: - EXECUTOR=Celery + # - REDIS_PASSWORD=redispass ports: - "5555:5555" command: flower @@ -54,6 +61,7 @@ services: # - POSTGRES_USER=airflow # - POSTGRES_PASSWORD=airflow # - POSTGRES_DB=airflow + # - REDIS_PASSWORD=redispass command: scheduler worker: @@ -69,4 +77,5 @@ services: # - POSTGRES_USER=airflow # - POSTGRES_PASSWORD=airflow # - POSTGRES_DB=airflow + # - REDIS_PASSWORD=redispass command: worker From 305a90f1baafebddd603a6205a656543ebb36869 Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Thu, 7 Sep 2017 14:49:01 +0100 Subject: [PATCH 110/163] Quote variables to remove shellcheck warnings. If any of these had spaces in them they could have caused interesting behaviour. As a safety measure lets just quote them. --- script/entrypoint.sh | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 66409cbb53..dd1fefa2d7 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -4,17 +4,17 @@ AIRFLOW_HOME="/usr/local/airflow" CMD="airflow" TRY_LOOP="20" -: ${REDIS_HOST:="redis"} -: ${REDIS_PORT:="6379"} -: ${REDIS_PASSWORD:=""} +: "${REDIS_HOST:="redis"}" +: "${REDIS_PORT:="6379"}" +: "${REDIS_PASSWORD:=""}" -: ${POSTGRES_HOST:="postgres"} -: ${POSTGRES_PORT:="5432"} -: ${POSTGRES_USER:="airflow"} -: ${POSTGRES_PASSWORD:="airflow"} -: ${POSTGRES_DB:="airflow"} +: "${POSTGRES_HOST:="postgres"}" +: "${POSTGRES_PORT:="5432"}" +: "${POSTGRES_USER:="airflow"}" +: "${POSTGRES_PASSWORD:="airflow"}" +: "${POSTGRES_DB:="airflow"}" -: ${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")} +: "${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}" # Load DAGs exemples (default: Yes) if [ "$LOAD_EX" = "n" ]; then @@ -38,7 +38,7 @@ fi # Wait for Postresql if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then i=0 - while ! nc -z $POSTGRES_HOST $POSTGRES_PORT >/dev/null 2>&1 < /dev/null; do + while ! nc -z "$POSTGRES_HOST" "$POSTGRES_PORT" >/dev/null 2>&1 < /dev/null; do i=$((i+1)) if [ "$1" = "webserver" ]; then echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP" @@ -57,7 +57,7 @@ then # Wait for Redis if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then j=0 - while ! nc -z $REDIS_HOST $REDIS_PORT >/dev/null 2>&1 < /dev/null; do + while ! nc -z "$REDIS_HOST" "$REDIS_PORT" >/dev/null 2>&1 < /dev/null; do j=$((j+1)) if [ $j -ge $TRY_LOOP ]; then echo "$(date) - $REDIS_HOST still not reachable, giving up" @@ -73,10 +73,10 @@ then if [ "$1" = "webserver" ]; then echo "Initialize database..." $CMD initdb - exec $CMD webserver + exec "$CMD" webserver else sleep 10 - exec $CMD "$@" + exec "$CMD" "$@" fi elif [ "$EXECUTOR" = "Local" ] then @@ -85,17 +85,17 @@ then sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg echo "Initialize database..." $CMD initdb - exec $CMD webserver & - exec $CMD scheduler + exec "$CMD" webserver & + exec "$CMD" scheduler # By default we use SequentialExecutor else if [ "$1" = "version" ]; then - exec $CMD version + exec "$CMD" version exit fi sed -i "s/executor = CeleryExecutor/executor = SequentialExecutor/" "$AIRFLOW_HOME"/airflow.cfg sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = sqlite:////usr/local/airflow/airflow.db#" "$AIRFLOW_HOME"/airflow.cfg echo "Initialize database..." $CMD initdb - exec $CMD webserver + exec "$CMD" webserver fi From 48e7b773c4c5ef58aebd8ebc8e6dbaa80c35cb81 Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Thu, 7 Sep 2017 15:00:19 +0100 Subject: [PATCH 111/163] Set Airflow config via environment variables, rather than sed Airfow will look at "AIRFLOW__${section}__${setting}" in preference to anything in the config file, so using this we can 1) avoid having to run sed, and 2) means we don't have to change the config file at run time --- README.md | 8 +++++++- script/entrypoint.sh | 38 +++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 351cda6988..f07e8ea2a9 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,13 @@ For encrypted connection passwords (in Local or Celery Executor), you must have python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY" -Check [Airflow Documentation](https://pythonhosted.org/airflow/) +## Configurating Airflow + +It is possible to set any configuration value for Airflow from environment variables, which are used over values from the airflow.cfg. The general rule is the environment variable should be named `AIRFLOW__
__`, for example `AIRFLOW__CORE__SQL_ALCHEMY_CONN` sets the `sql_alchemy_conn` config option in the `[core]` section. + +Check out the [Airflow documentation](http://airflow.readthedocs.io/en/latest/configuration.html?highlight=__CORE__#setting-configuration-options) for more details + +You can also define connections via environment variables by prefixing them with `AIRFLOW_CONN_` - for example `AIRFLOW_CONN_POSTGRES_MASTER=postgres://user:password@localhost:5432/master` for a connection called "postgres_master". The value is parsed as a URI. This will work for hooks etc, but won't show up in the "Ad-hoc Query" section unless an (empty) connection is also created in the DB ## Install custom python package diff --git a/script/entrypoint.sh b/script/entrypoint.sh index dd1fefa2d7..3edc21bbdb 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -AIRFLOW_HOME="/usr/local/airflow" CMD="airflow" TRY_LOOP="20" @@ -14,11 +13,23 @@ TRY_LOOP="20" : "${POSTGRES_PASSWORD:="airflow"}" : "${POSTGRES_DB:="airflow"}" -: "${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}" +# Defaults and back-compat +: "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}" +: "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}" + +export \ + AIRFLOW__CELERY__BROKER_URL \ + AIRFLOW__CELERY__CELERY_RESULT_BACKEND \ + AIRFLOW__CORE__EXECUTOR \ + AIRFLOW__CORE__FERNET_KEY \ + AIRFLOW__CORE__LOAD_EXAMPLES \ + AIRFLOW__CORE__SQL_ALCHEMY_CONN \ + # Load DAGs exemples (default: Yes) -if [ "$LOAD_EX" = "n" ]; then - sed -i "s/load_examples = True/load_examples = False/" "$AIRFLOW_HOME"/airflow.cfg +if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]] +then + AIRFLOW__CORE__LOAD_EXAMPLES=False fi # Install custome python package if requirements.txt is present @@ -26,9 +37,6 @@ if [ -e "/requirements.txt" ]; then $(which pip) install --user -r /requirements.txt fi -# Update airflow config - Fernet key -sed -i "s|\$FERNET_KEY|$FERNET_KEY|" "$AIRFLOW_HOME"/airflow.cfg - if [ -n "$REDIS_PASSWORD" ]; then REDIS_PREFIX=:${REDIS_PASSWORD}@ else @@ -51,8 +59,12 @@ if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; the done fi +AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" +AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" +AIRFLOW__CELERY__CELERY_RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" + # Update configuration depending the type of Executor -if [ "$EXECUTOR" = "Celery" ] +if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ] then # Wait for Redis if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then @@ -67,9 +79,6 @@ then sleep 5 done fi - sed -i "s#celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow#celery_result_backend = db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg if [ "$1" = "webserver" ]; then echo "Initialize database..." $CMD initdb @@ -78,11 +87,8 @@ then sleep 10 exec "$CMD" "$@" fi -elif [ "$EXECUTOR" = "Local" ] +elif [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ] then - sed -i "s/executor = CeleryExecutor/executor = LocalExecutor/" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB#" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#broker_url = redis://redis:6379/1#broker_url = redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1#" "$AIRFLOW_HOME"/airflow.cfg echo "Initialize database..." $CMD initdb exec "$CMD" webserver & @@ -93,8 +99,6 @@ else exec "$CMD" version exit fi - sed -i "s/executor = CeleryExecutor/executor = SequentialExecutor/" "$AIRFLOW_HOME"/airflow.cfg - sed -i "s#sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow#sql_alchemy_conn = sqlite:////usr/local/airflow/airflow.db#" "$AIRFLOW_HOME"/airflow.cfg echo "Initialize database..." $CMD initdb exec "$CMD" webserver From fab44cd14f1ca616a619c77776a7978d0763d190 Mon Sep 17 00:00:00 2001 From: Adam Hodges Date: Wed, 20 Dec 2017 11:30:49 -0500 Subject: [PATCH 112/163] Add a healthcheck to the webserver This change is to reduce downtime when the issue described in https://issues.apache.org/jira/browse/AIRFLOW-1235 is encountered. (The master gunicorn process dies but the airflow CLI does not fail or restart, resulting in a dead webserver). Currently when this condition is met, the webserver has to be manually killed or restarted. This change will poll for the airflow-webserver pid file and the webserver container will restart if it is not present (which is the case when the gunicorn master dies) Related to #125 --- docker-compose-CeleryExecutor.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index ce0a49561b..9842ce09e6 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -1,4 +1,4 @@ -version: '2' +version: '2.1' services: redis: image: 'redis:3.2.7' @@ -28,6 +28,11 @@ services: ports: - "8080:8080" command: webserver + healthcheck: + test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"] + interval: 30s + timeout: 30s + retries: 3 flower: image: puckel/docker-airflow:1.8.2 From 9c01d95ed33387bb0a58abfd74e21ebf6f80b3bf Mon Sep 17 00:00:00 2001 From: Adam Hodges Date: Wed, 20 Dec 2017 11:33:34 -0500 Subject: [PATCH 113/163] Add a healthcheck to the webserver --- docker-compose-LocalExecutor.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index d4f620d0c0..d05c06dde7 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -1,4 +1,4 @@ -version: '2' +version: '2.1' services: postgres: image: postgres:9.6 @@ -20,3 +20,8 @@ services: ports: - "8080:8080" command: webserver + healthcheck: + test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"] + interval: 30s + timeout: 30s + retries: 3 From 4a8c02d2b36f6db146301e3396c98138e4baf517 Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Fri, 8 Sep 2017 12:11:53 +0100 Subject: [PATCH 114/163] Generalize the wait for port We had two almost identical loops. Lets have one that takes parameters. --- script/entrypoint.sh | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 3edc21bbdb..a7b32241dc 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -43,20 +43,22 @@ else REDIS_PREFIX= fi -# Wait for Postresql -if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then - i=0 - while ! nc -z "$POSTGRES_HOST" "$POSTGRES_PORT" >/dev/null 2>&1 < /dev/null; do - i=$((i+1)) - if [ "$1" = "webserver" ]; then - echo "$(date) - waiting for ${POSTGRES_HOST}:${POSTGRES_PORT}... $i/$TRY_LOOP" - if [ $i -ge $TRY_LOOP ]; then - echo "$(date) - ${POSTGRES_HOST}:${POSTGRES_PORT} still not reachable, giving up" - exit 1 - fi +wait_for_port() { + local name="$1" host="$2" port="$3" + local j=0 + while ! nc -z "$host" "$port" >/dev/null 2>&1 < /dev/null; do + j=$((j+1)) + if [ $j -ge $TRY_LOOP ]; then + echo >&2 "$(date) - $host:$port still not reachable, giving up" + exit 1 fi - sleep 10 + echo "$(date) - waiting for $name... $j/$TRY_LOOP" + sleep 5 done +} + +if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then + wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" fi AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" @@ -68,16 +70,7 @@ if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ] then # Wait for Redis if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then - j=0 - while ! nc -z "$REDIS_HOST" "$REDIS_PORT" >/dev/null 2>&1 < /dev/null; do - j=$((j+1)) - if [ $j -ge $TRY_LOOP ]; then - echo "$(date) - $REDIS_HOST still not reachable, giving up" - exit 1 - fi - echo "$(date) - waiting for Redis... $j/$TRY_LOOP" - sleep 5 - done + wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" fi if [ "$1" = "webserver" ]; then echo "Initialize database..." From e343d0dddc8f8dfa6ed700abbcd1b3d0db818c6f Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Fri, 8 Sep 2017 12:40:17 +0100 Subject: [PATCH 115/163] Entrypoint now supports running `bash` or other airflow subcommands. Sometimes it's nice to be able to get bash shell in the right context (without having to specify a custom entrypoint. This change means we can do: docker run --rm -ti puckel/airflow bash or docker run --rm -ti puckel/airflow airflow clear my_dag task_id -f --- README.md | 15 ++++++++++ script/entrypoint.sh | 71 +++++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index f07e8ea2a9..a7c80891e1 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,21 @@ Easy scaling using docker-compose: This can be used to scale to a multi node setup using docker swarm. +## Running other airflow commands + +If you want to run other airflow sub-commands, such as `list_dags` or `clear` you can do so like this: + + docker run --rm -ti puckel/docker-airflow airflow list_dags + +or with your docker-compose set up like this: + + docker-compose -f docker-compose-CeleryExecutor.yml run --rm webserver airflow list_dags + +You can also use this to run a bash shell or any other command in the same environment that airflow would be run in: + + docker run --rm -ti puckel/docker-airflow bash + docker run --rm -ti puckel/docker-airflow ipython + # Wanna help? Fork, improve and PR. ;-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index a7b32241dc..254a37b3c1 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -CMD="airflow" TRY_LOOP="20" : "${REDIS_HOST:="redis"}" @@ -57,42 +56,46 @@ wait_for_port() { done } -if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] ; then - wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" -fi +wait_for_redis() { + # Wait for Redis iff we are using it + if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ] + then + wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" + fi +} AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" AIRFLOW__CELERY__CELERY_RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" -# Update configuration depending the type of Executor -if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ] -then - # Wait for Redis - if [ "$1" = "webserver" ] || [ "$1" = "worker" ] || [ "$1" = "scheduler" ] || [ "$1" = "flower" ] ; then - wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" - fi - if [ "$1" = "webserver" ]; then - echo "Initialize database..." - $CMD initdb - exec "$CMD" webserver - else +case "$1" in + webserver) + wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" + wait_for_redis + airflow initdb + if [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ]; + then + # With the "Local" executor it should all run in one container. + airflow scheduler & + fi + exec airflow webserver + ;; + worker|scheduler) + wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" + wait_for_redis + # To give the webserver time to run initdb. sleep 10 - exec "$CMD" "$@" - fi -elif [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ] -then - echo "Initialize database..." - $CMD initdb - exec "$CMD" webserver & - exec "$CMD" scheduler -# By default we use SequentialExecutor -else - if [ "$1" = "version" ]; then - exec "$CMD" version - exit - fi - echo "Initialize database..." - $CMD initdb - exec "$CMD" webserver -fi + exec airflow "$@" + ;; + flower) + wait_for_redis + exec airflow "$@" + ;; + version) + exec airflow "$@" + ;; + *) + # The command is something like bash, not an airflow subcommand. Just run it in the right environment. + exec "$@" + ;; +esac From c8f03615415afcb2669b8c004077cba5d48adc87 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 3 Jan 2018 08:40:42 +0100 Subject: [PATCH 116/163] Update to Apache Airflow 1.9 Airflow 1.9 has been released, therefore we would like to update the images. --- .gitignore | 3 ++ Dockerfile | 4 +- circle.yml | 2 +- config/airflow.cfg | 78 +++++++++++++++++++++++++++---- docker-compose-CeleryExecutor.yml | 8 ++-- docker-compose-LocalExecutor.yml | 2 +- 6 files changed, 79 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 9c0f4ca48f..991a0fb04c 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ Session.vim # sftp configuration file sftp-config.json + +# Python +__pycache__ diff --git a/Dockerfile b/Dockerfile index 24920d5eb6..55caedff4e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.8.2 +ARG AIRFLOW_VERSION=1.9.0 ARG AIRFLOW_HOME=/usr/local/airflow # Define en_US. @@ -57,7 +57,7 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc]==$AIRFLOW_VERSION \ - && pip install celery[redis]==3.1.17 \ + && pip install celery[redis]==4.0.2 \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get clean \ && rm -rf \ diff --git a/circle.yml b/circle.yml index 5981fe23ae..cd93ab2f3a 100644 --- a/circle.yml +++ b/circle.yml @@ -12,4 +12,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.8.2' + - docker run puckel/docker-airflow version |grep '1.9.0' diff --git a/config/airflow.cfg b/config/airflow.cfg index 6d313f039f..6da94249fa 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -12,18 +12,26 @@ dags_folder = /usr/local/airflow/dags base_log_folder = /usr/local/airflow/logs # Airflow can store logs remotely in AWS S3 or Google Cloud Storage. Users -# must supply a remote location URL (starting with either 's3://...' or -# 'gs://...') and an Airflow connection id that provides access to the storage +# must supply an Airflow connection id that provides access to the storage # location. -remote_base_log_folder = remote_log_conn_id = -# Use server-side encryption for logs stored in S3 encrypt_s3_logs = False -# DEPRECATED option for remote log storage, use remote_base_log_folder instead! -s3_log_folder = + +# Logging level +logging_level = INFO + +# Logging class +# Specify the class that will specify the logging configuration +# This class has to be on the python classpath +# logging_config_class = my.path.default_local_settings.LOGGING_CONFIG +logging_config_class = + +# Log format +log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s +simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s # The executor class that airflow should use. Choices include -# SequentialExecutor, LocalExecutor, CeleryExecutor +# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor executor = CeleryExecutor # The SqlAlchemy connection string to the metadata database. @@ -89,6 +97,18 @@ security = # values at runtime) unit_test_mode = False +# Name of handler to read task instance logs. +# Default to use file task handler. +task_log_reader = file.task + +# Whether to enable pickling for xcom (note that this is insecure and allows for +# RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False). +enable_xcom_pickling = True + +# When a task is killed forcefully, this is the amount of time in seconds that +# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED +killed_task_cleanup_time = 60 + [cli] # In what way should the cli access the API. The LocalClient will use the # database directly, while the json_client will use the api running on the @@ -168,6 +188,10 @@ filter_by_owner = False # in order to user the ldapgroup mode. owner_mode = user +# Default DAG view. Valid values are: +# tree, graph, duration, gantt, landing_times +dag_default_view = tree + # Default DAG orientation. Valid values are: # LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top) dag_orientation = LR @@ -184,6 +208,9 @@ log_fetch_timeout_sec = 5 # DAGs by default hide_paused_dags_by_default = False +# Consistent page size across all listing views in the UI +page_size = 100 + [email] email_backend = airflow.utils.email.send_email_smtp @@ -198,7 +225,7 @@ smtp_ssl = False # smtp_user = airflow # smtp_password = airflow smtp_port = 25 -smtp_mail_from = airflow@airflow.com +smtp_mail_from = airflow@example.com [celery] # This section only applies if you are using the CeleryExecutor in @@ -238,6 +265,19 @@ flower_port = 5555 # Default queue that tasks get assigned to and that worker listen on. default_queue = default +# Import path for celery configuration options +celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG + +# No SSL +celery_ssl_active = False + +[dask] +# This section only applies if you are using the DaskExecutor in +# [core] section above + +# The IP address and port of the Dask cluster's scheduler. +cluster_address = 127.0.0.1:8786 + [scheduler] # Task instances listen for external kill signal (when you clear tasks # from the CLI or the UI), this defines the frequency at which they should @@ -276,6 +316,11 @@ scheduler_zombie_task_threshold = 300 # DAG definition (catchup) catchup_by_default = True +# This changes the batch size of queries in the scheduling main loop. +# This depends on query length limits and how long you are willing to hold locks. +# 0 for no limit +max_tis_per_query = 0 + # Statsd (https://github.com/etsy/statsd) integration settings statsd_on = False statsd_host = localhost @@ -283,12 +328,25 @@ statsd_port = 8125 statsd_prefix = airflow # The scheduler can run multiple threads in parallel to schedule dags. -# This defines how many threads will run. However airflow will never -# use more threads than the amount of cpu cores available. +# This defines how many threads will run. max_threads = 2 authenticate = False +[ldap] +# set this to ldaps://: +uri = +user_filter = objectClass=* +user_name_attr = uid +group_member_attr = memberOf +superuser_filter = +data_profiler_filter = +bind_user = cn=Manager,dc=example,dc=com +bind_password = insecure +basedn = dc=example,dc=com +cacert = /etc/ca/ldap_ca.crt +search_scope = LEVEL + [mesos] # Mesos master address which MesosExecutor will connect to. master = localhost:5050 diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 950256010f..303fae25e5 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - postgres @@ -41,7 +41,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - redis @@ -53,7 +53,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - webserver @@ -70,7 +70,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index d05c06dde7..fbad63193c 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - postgres From f4cf78c684fe04e3818b7914166fc8bd91df7949 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 3 Jan 2018 17:32:27 +0100 Subject: [PATCH 117/163] v1.9.0 --- .gitignore | 1 + Dockerfile | 6 +++--- circle.yml | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 9c0f4ca48f..fa1e64cc9f 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ Session.vim # sftp configuration file sftp-config.json +*.pyc diff --git a/Dockerfile b/Dockerfile index 24920d5eb6..6c96c8b30a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.8.1-1 +# VERSION 1.9.0-1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.8.2 +ARG AIRFLOW_VERSION=1.9.0 ARG AIRFLOW_HOME=/usr/local/airflow # Define en_US. @@ -57,7 +57,7 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc]==$AIRFLOW_VERSION \ - && pip install celery[redis]==3.1.17 \ + && pip install celery[redis] \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get clean \ && rm -rf \ diff --git a/circle.yml b/circle.yml index 5981fe23ae..cd93ab2f3a 100644 --- a/circle.yml +++ b/circle.yml @@ -12,4 +12,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.8.2' + - docker run puckel/docker-airflow version |grep '1.9.0' diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 950256010f..303fae25e5 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - postgres @@ -41,7 +41,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - redis @@ -53,7 +53,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - webserver @@ -70,7 +70,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index d05c06dde7..fbad63193c 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.8.2 + image: puckel/docker-airflow:1.9.0 restart: always depends_on: - postgres From 962105538a1b0d0e1f608601f608c9a4fadd0d29 Mon Sep 17 00:00:00 2001 From: Theodore Siu Date: Fri, 12 Jan 2018 12:52:53 -0500 Subject: [PATCH 118/163] Adding support and documentation for airflow plugins --- README.md | 9 +++++++++ docker-compose-CeleryExecutor.yml | 6 ++++++ docker-compose-LocalExecutor.yml | 2 ++ 3 files changed, 17 insertions(+) diff --git a/README.md b/README.md index a7c80891e1..4085f2ed3f 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,15 @@ Check out the [Airflow documentation](http://airflow.readthedocs.io/en/latest/co You can also define connections via environment variables by prefixing them with `AIRFLOW_CONN_` - for example `AIRFLOW_CONN_POSTGRES_MASTER=postgres://user:password@localhost:5432/master` for a connection called "postgres_master". The value is parsed as a URI. This will work for hooks etc, but won't show up in the "Ad-hoc Query" section unless an (empty) connection is also created in the DB +## Custom Airflow plugins + +Airflow allows for custom user-created plugins which are typically found in `${AIRFLOW_HOME}/plugins` folder. Documentation on plugins can be found [here](https://airflow.apache.org/plugins.html) + +In order to incorporate plugins into your docker container +- Create the plugins folders `plugins/` with your custom plugins. +- Mount the folder as a volume by doing either of the following: + - Include the folder as a volume in command-line `-v $(pwd)/plugins/:/usr/local/airflow/plugins` + - Use docker-compose-LocalExecutor.yml or docker-compose-CeleryExecutor.yml which contain support for adding the plugins folder as a volume ## Install custom python package diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 303fae25e5..8ef3b8d702 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -31,6 +31,8 @@ services: # - REDIS_PASSWORD=redispass volumes: - ./dags:/usr/local/airflow/dags + # Uncomment to include custom plugins + # - ./plugins:/usr/local/airflow/plugins ports: - "8080:8080" command: webserver @@ -59,6 +61,8 @@ services: - webserver volumes: - ./dags:/usr/local/airflow/dags + # Uncomment to include custom plugins + # - ./plugins:/usr/local/airflow/plugins environment: - LOAD_EX=n - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= @@ -76,6 +80,8 @@ services: - scheduler volumes: - ./dags:/usr/local/airflow/dags + # Uncomment to include custom plugins + # - ./plugins:/usr/local/airflow/plugins environment: - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= - EXECUTOR=Celery diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index fbad63193c..2f59e2a1d0 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -17,6 +17,8 @@ services: - EXECUTOR=Local volumes: - ./dags:/usr/local/airflow/dags + # Uncomment to include custom plugins + # - ./plugins:/usr/local/airflow/dags ports: - "8080:8080" command: webserver From 6a60c0bb67bb7bb2a32bc4c9529548de66b5785d Mon Sep 17 00:00:00 2001 From: Theodore Siu Date: Fri, 12 Jan 2018 13:59:48 -0500 Subject: [PATCH 119/163] Small fixes after testing --- README.md | 2 +- docker-compose-LocalExecutor.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4085f2ed3f..c779c0b3d3 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ In order to incorporate plugins into your docker container - Create the plugins folders `plugins/` with your custom plugins. - Mount the folder as a volume by doing either of the following: - Include the folder as a volume in command-line `-v $(pwd)/plugins/:/usr/local/airflow/plugins` - - Use docker-compose-LocalExecutor.yml or docker-compose-CeleryExecutor.yml which contain support for adding the plugins folder as a volume + - Use docker-compose-LocalExecutor.yml or docker-compose-CeleryExecutor.yml which contains support for adding the plugins folder as a volume ## Install custom python package diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 2f59e2a1d0..f3bff4ee78 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -18,7 +18,7 @@ services: volumes: - ./dags:/usr/local/airflow/dags # Uncomment to include custom plugins - # - ./plugins:/usr/local/airflow/dags + # - ./plugins:/usr/local/airflow/plugins ports: - "8080:8080" command: webserver From caec3fa2544ae4e3a361348bb9a26f806251d806 Mon Sep 17 00:00:00 2001 From: Max Countryman Date: Wed, 17 Jan 2018 09:40:43 -0800 Subject: [PATCH 120/163] custome->custom --- script/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 254a37b3c1..aa74c9fc14 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -31,7 +31,7 @@ then AIRFLOW__CORE__LOAD_EXAMPLES=False fi -# Install custome python package if requirements.txt is present +# Install custom python package if requirements.txt is present if [ -e "/requirements.txt" ]; then $(which pip) install --user -r /requirements.txt fi From a7ab312553d126915a93321ac813fdfde62862f1 Mon Sep 17 00:00:00 2001 From: Diego Rabatone Oliveira Date: Mon, 29 Jan 2018 13:59:53 -0200 Subject: [PATCH 121/163] Fix celery result_backend config variable --- config/airflow.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/airflow.cfg b/config/airflow.cfg index 6da94249fa..abe0b6ff69 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -253,7 +253,7 @@ worker_log_server_port = 8793 broker_url = redis://redis:6379/1 # Another key Celery setting -celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow +result_backend = db+postgresql://airflow:airflow@postgres/airflow # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start # it `airflow flower`. This defines the IP that Celery Flower runs on From 7a99885545cc6343d06af86e5bd192b3ae6cda5c Mon Sep 17 00:00:00 2001 From: Diego Rabatone Oliveira Date: Mon, 29 Jan 2018 15:35:26 -0200 Subject: [PATCH 122/163] Fix typo --- script/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index aa74c9fc14..d888da3dbf 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -57,7 +57,7 @@ wait_for_port() { } wait_for_redis() { - # Wait for Redis iff we are using it + # Wait for Redis if we are using it if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ] then wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" From 3dc5a0f930d1f34dc92bbedd00807887fddff7f6 Mon Sep 17 00:00:00 2001 From: Diego Rabatone Oliveira Date: Mon, 29 Jan 2018 15:53:07 -0200 Subject: [PATCH 123/163] Remove duplicated ENV and simplify pip usage on Dockerfile --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index fc4f5fc807..0d08036e19 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,6 @@ ENV LANG en_US.UTF-8 ENV LC_ALL en_US.UTF-8 ENV LC_CTYPE en_US.UTF-8 ENV LC_MESSAGES en_US.UTF-8 -ENV LC_ALL en_US.UTF-8 RUN set -ex \ && buildDeps=' \ @@ -50,7 +49,7 @@ RUN set -ex \ && locale-gen \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ - && python -m pip install -U pip setuptools wheel \ + && pip install -U pip setuptools wheel \ && pip install Cython \ && pip install pytz \ && pip install pyOpenSSL \ From 0835f2190892ae2c6f1031a7fbe8b788e7a45baf Mon Sep 17 00:00:00 2001 From: Diego Rabatone Oliveira Date: Mon, 29 Jan 2018 15:55:01 -0200 Subject: [PATCH 124/163] Also upgrade docker image on docker build --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 0d08036e19..097b90ae31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,6 +36,7 @@ RUN set -ex \ git \ ' \ && apt-get update -yqq \ + && apt-get upgrade -yqq \ && apt-get install -yqq --no-install-recommends \ $buildDeps \ python3-pip \ From 7872cf19b95742a0c15808e0031446a221bb17a6 Mon Sep 17 00:00:00 2001 From: Diego Rabatone Oliveira Date: Wed, 31 Jan 2018 17:49:27 -0200 Subject: [PATCH 125/163] Fix celery variable on entrypoint --- script/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index d888da3dbf..5944bb9307 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -66,7 +66,7 @@ wait_for_redis() { AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" -AIRFLOW__CELERY__CELERY_RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" +AIRFLOW__CELERY__RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" case "$1" in webserver) From bcd9c6c3d0c343243711fc67f6b552a7c3d793a1 Mon Sep 17 00:00:00 2001 From: rootcss Date: Mon, 19 Feb 2018 12:48:19 +0530 Subject: [PATCH 126/163] fix CELERY_RESULT_BACKEND env var name --- script/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 5944bb9307..d888da3dbf 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -66,7 +66,7 @@ wait_for_redis() { AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" -AIRFLOW__CELERY__RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" +AIRFLOW__CELERY__CELERY_RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" case "$1" in webserver) From 45fc751c48662f714e471dccba3f093422e4acdc Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 1 Mar 2018 08:58:49 +0100 Subject: [PATCH 127/163] Rollback PR#152 --- config/airflow.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/airflow.cfg b/config/airflow.cfg index abe0b6ff69..6da94249fa 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -253,7 +253,7 @@ worker_log_server_port = 8793 broker_url = redis://redis:6379/1 # Another key Celery setting -result_backend = db+postgresql://airflow:airflow@postgres/airflow +celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start # it `airflow flower`. This defines the IP that Celery Flower runs on From ef712bccc2d68994c16f6851a065bb835a5d4f78 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 1 Mar 2018 09:03:27 +0100 Subject: [PATCH 128/163] Bump to 1.9.0-2 --- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 14 +++++++------- docker-compose-LocalExecutor.yml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 097b90ae31..413ad72885 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.9.0-1 +# VERSION 1.9.0-2 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 8ef3b8d702..8733690592 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.9.0 + image: puckel/docker-airflow:1.9.0-2 restart: always depends_on: - postgres @@ -31,7 +31,7 @@ services: # - REDIS_PASSWORD=redispass volumes: - ./dags:/usr/local/airflow/dags - # Uncomment to include custom plugins + # Uncomment to include custom plugins # - ./plugins:/usr/local/airflow/plugins ports: - "8080:8080" @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.9.0 + image: puckel/docker-airflow:1.9.0-2 restart: always depends_on: - redis @@ -55,13 +55,13 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.9.0 + image: puckel/docker-airflow:1.9.0-2 restart: always depends_on: - webserver volumes: - ./dags:/usr/local/airflow/dags - # Uncomment to include custom plugins + # Uncomment to include custom plugins # - ./plugins:/usr/local/airflow/plugins environment: - LOAD_EX=n @@ -74,13 +74,13 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.9.0 + image: puckel/docker-airflow:1.9.0-2 restart: always depends_on: - scheduler volumes: - ./dags:/usr/local/airflow/dags - # Uncomment to include custom plugins + # Uncomment to include custom plugins # - ./plugins:/usr/local/airflow/plugins environment: - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho= diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index f3bff4ee78..e034f47bd2 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.9.0 + image: puckel/docker-airflow:1.9.0-2 restart: always depends_on: - postgres @@ -17,7 +17,7 @@ services: - EXECUTOR=Local volumes: - ./dags:/usr/local/airflow/dags - # Uncomment to include custom plugins + # Uncomment to include custom plugins # - ./plugins:/usr/local/airflow/plugins ports: - "8080:8080" From b70f484a004473949bcdcd1ce3eece07d86461b7 Mon Sep 17 00:00:00 2001 From: Arihant Surana Date: Thu, 29 Mar 2018 18:33:07 +1100 Subject: [PATCH 129/163] Add packages which allow Airflow mysql hook (#158) * Updated dockerfile with dependencies needed for airflow mysql hook to function * added mysql as a dependency * use newer mysql packages as mysql is no longer available * added libmysqlclient-dev to allow setup of mysql_config --- Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 413ad72885..624edaebdc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,9 @@ RUN set -ex \ $buildDeps \ python3-pip \ python3-requests \ + mysql-client \ + mysql-server \ + libmysqlclient-dev \ apt-utils \ curl \ rsync \ @@ -56,7 +59,7 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install apache-airflow[crypto,celery,postgres,hive,jdbc]==$AIRFLOW_VERSION \ + && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql]==$AIRFLOW_VERSION \ && pip install celery[redis]==4.0.2 \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get clean \ From 41faa7849db3e8b7b69006865635f906126f18c1 Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Wed, 11 Apr 2018 14:41:48 +0100 Subject: [PATCH 130/163] Update broken Airflow docs link for Extra packages (#171) - Updated the broken link for Airflow Docs from https://pythonhosted.org/airflow to https://airflow.incubator.apache.org --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c779c0b3d3..691dab0883 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Pull the image from the Docker repository. ## Build -For example, if you need to install [Extra Packages](https://pythonhosted.org/airflow/installation.html#extra-package), edit the Dockerfile and then build it. +For example, if you need to install [Extra Packages](https://airflow.incubator.apache.org/airflow/installation.html#extra-package), edit the Dockerfile and then build it. docker build --rm -t puckel/docker-airflow . From 678d9321114be19b4a4c0de98ea9a44fbd052d76 Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Thu, 12 Apr 2018 11:07:56 +0100 Subject: [PATCH 131/163] Fix the Airflow doc Extra package link (#172) - In the last pull, I forgot to remove "/airflow/" from the path which is now fixed. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 691dab0883..c9e32c7331 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Pull the image from the Docker repository. ## Build -For example, if you need to install [Extra Packages](https://airflow.incubator.apache.org/airflow/installation.html#extra-package), edit the Dockerfile and then build it. +For example, if you need to install [Extra Packages](https://airflow.incubator.apache.org/installation.html#extra-package), edit the Dockerfile and then build it. docker build --rm -t puckel/docker-airflow . From 49401a78ab6a3154d0ecd835c2a2beaa3a942a77 Mon Sep 17 00:00:00 2001 From: Eder Ruiz Date: Tue, 17 Apr 2018 04:16:12 -0300 Subject: [PATCH 132/163] Remove unnecessary packages (#174) * Before: ``` $ docker run -it --rm --user root puckel/docker-airflow:1.9.0-2 bash root@22e9e32775d4:/usr/local/airflow# root@22e9e32775d4:/usr/local/airflow# apt-get update Hit http://security.debian.org jessie/updates InRelease Ign http://deb.debian.org jessie InRelease Get:1 http://security.debian.org jessie/updates/main amd64 Packages [644 kB] Get:2 http://deb.debian.org jessie-updates InRelease [145 kB] Get:3 http://deb.debian.org jessie Release.gpg [2,434 B] Get:4 http://deb.debian.org jessie-updates/main amd64 Packages [23.1 kB] Get:5 http://deb.debian.org jessie Release [148 kB] Get:6 http://deb.debian.org jessie/main amd64 Packages [9,064 kB] Fetched 9,882 kB in 19s (499 kB/s) Reading package lists... Done root@22e9e32775d4:/usr/local/airflow# apt-get upgrade Reading package lists... Done Building dependency tree Reading state information... Done Calculating upgrade... The following packages were automatically installed and are no longer required: binutils bzip2 cpp cpp-4.9 dpkg-dev g++ g++-4.9 gcc gcc-4.9 libasan1 libatomic1 libc-dev-bin libc6-dev libcilkrts5 libcloog-isl4 libdpkg-perl libexpat1-dev libgcc-4.9-dev libgomp1 libisl10 libitm1 liblsan0 libmpc3 libmpfr4 libpython3-dev libpython3.4 libpython3.4-dev libquadmath0 libstdc++-4.9-dev libtimedate-perl libtsan0 libubsan0 linux-libc-dev make patch perl perl-modules python3.4-dev xz-utils Use 'apt-get autoremove' to remove them. Done 0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded. root@22e9e32775d4:/usr/local/airflow# ``` --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 624edaebdc..873d40a1c7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,6 +62,7 @@ RUN set -ex \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql]==$AIRFLOW_VERSION \ && pip install celery[redis]==4.0.2 \ && apt-get purge --auto-remove -yqq $buildDeps \ + && apt-get autoremove -yqq --purge \ && apt-get clean \ && rm -rf \ /var/lib/apt/lists/* \ From 9f00550be9f3365215e2412419544a8301752fdd Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 15 May 2018 11:04:36 +0200 Subject: [PATCH 133/163] Fix issue running docker-airflow without compose (SequentialExecutor) --- Dockerfile | 2 +- README.md | 38 +++++++++++++++++-------------- config/airflow.cfg | 9 +++----- docker-compose-CeleryExecutor.yml | 8 +++---- docker-compose-LocalExecutor.yml | 2 +- script/entrypoint.sh | 27 ++++++++-------------- 6 files changed, 40 insertions(+), 46 deletions(-) diff --git a/Dockerfile b/Dockerfile index 873d40a1c7..2e7eacf3a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.9.0-2 +# VERSION 1.9.0-3 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/README.md b/README.md index c9e32c7331..093dccb3d0 100644 --- a/README.md +++ b/README.md @@ -15,41 +15,43 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) -/!\ If you want to use Airflow using Python 2, use TAG [1.8.1](https://github.com/puckel/docker-airflow/releases/tag/1.8.1) +/!\ If you want to use Airflow using Python 2, use TAG [1.8.1](https://github.com/puckel/docker-airflow/releases/tag/1.8.1). ## Installation Pull the image from the Docker repository. - docker pull puckel/docker-airflow + docker pull puckel/docker-airflow ## Build For example, if you need to install [Extra Packages](https://airflow.incubator.apache.org/installation.html#extra-package), edit the Dockerfile and then build it. - docker build --rm -t puckel/docker-airflow . + docker build --rm -t puckel/docker-airflow . + +Don't forget to update the airflow images in the docker-compose files to puckel/docker-airflow:latest. ## Usage By default, docker-airflow runs Airflow with **SequentialExecutor** : - docker run -d -p 8080:8080 puckel/docker-airflow + docker run -d -p 8080:8080 puckel/docker-airflow If you want to run another executor, use the other docker-compose.yml files provided in this repository. For **LocalExecutor** : - docker-compose -f docker-compose-LocalExecutor.yml up -d + docker-compose -f docker-compose-LocalExecutor.yml up -d For **CeleryExecutor** : - docker-compose -f docker-compose-CeleryExecutor.yml up -d + docker-compose -f docker-compose-CeleryExecutor.yml up -d -NB : If you don't want to have DAGs example loaded (default=True), you've to set the following environment variable : +NB : If you want to have DAGs example loaded (default=False), you've to set the following environment variable : `LOAD_EX=n` - docker run -d -p 8080:8080 -e LOAD_EX=n puckel/docker-airflow + docker run -d -p 8080:8080 -e LOAD_EX=y puckel/docker-airflow If you want to use Ad hoc query, make sure you've configured connections: Go to Admin -> Connections and Edit "postgres_default" set this values (equivalent to values in airflow.cfg/docker-compose*.yml) : @@ -60,11 +62,13 @@ Go to Admin -> Connections and Edit "postgres_default" set this values (equivale For encrypted connection passwords (in Local or Celery Executor), you must have the same fernet_key. By default docker-airflow generates the fernet_key at startup, you have to set an environment variable in the docker-compose (ie: docker-compose-LocalExecutor.yml) file to set the same key accross containers. To generate a fernet_key : - python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY" + docker run puckel/docker-airflow python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)" ## Configurating Airflow -It is possible to set any configuration value for Airflow from environment variables, which are used over values from the airflow.cfg. The general rule is the environment variable should be named `AIRFLOW__
__`, for example `AIRFLOW__CORE__SQL_ALCHEMY_CONN` sets the `sql_alchemy_conn` config option in the `[core]` section. +It's possible to set any configuration value for Airflow from environment variables, which are used over values from the airflow.cfg. + +The general rule is the environment variable should be named `AIRFLOW__
__`, for example `AIRFLOW__CORE__SQL_ALCHEMY_CONN` sets the `sql_alchemy_conn` config option in the `[core]` section. Check out the [Airflow documentation](http://airflow.readthedocs.io/en/latest/configuration.html?highlight=__CORE__#setting-configuration-options) for more details @@ -76,14 +80,14 @@ Airflow allows for custom user-created plugins which are typically found in `${A In order to incorporate plugins into your docker container - Create the plugins folders `plugins/` with your custom plugins. -- Mount the folder as a volume by doing either of the following: +- Mount the folder as a volume by doing either of the following: - Include the folder as a volume in command-line `-v $(pwd)/plugins/:/usr/local/airflow/plugins` - Use docker-compose-LocalExecutor.yml or docker-compose-CeleryExecutor.yml which contains support for adding the plugins folder as a volume ## Install custom python package - Create a file "requirements.txt" with the desired python modules -- Mount this file as a volume `-v $(pwd)/requirements.txt:/requirements.txt` +- Mount this file as a volume `-v $(pwd)/requirements.txt:/requirements.txt` (or add it as a volume in docker-compose file) - The entrypoint.sh script execute the pip install command (with --user option) ## UI Links @@ -96,7 +100,7 @@ In order to incorporate plugins into your docker container Easy scaling using docker-compose: - docker-compose scale worker=5 + docker-compose -f docker-compose-CeleryExecutor.yml scale worker=5 This can be used to scale to a multi node setup using docker swarm. @@ -104,16 +108,16 @@ This can be used to scale to a multi node setup using docker swarm. If you want to run other airflow sub-commands, such as `list_dags` or `clear` you can do so like this: - docker run --rm -ti puckel/docker-airflow airflow list_dags + docker run --rm -ti puckel/docker-airflow airflow list_dags or with your docker-compose set up like this: - docker-compose -f docker-compose-CeleryExecutor.yml run --rm webserver airflow list_dags + docker-compose -f docker-compose-CeleryExecutor.yml run --rm webserver airflow list_dags You can also use this to run a bash shell or any other command in the same environment that airflow would be run in: - docker run --rm -ti puckel/docker-airflow bash - docker run --rm -ti puckel/docker-airflow ipython + docker run --rm -ti puckel/docker-airflow bash + docker run --rm -ti puckel/docker-airflow ipython # Wanna help? diff --git a/config/airflow.cfg b/config/airflow.cfg index 6da94249fa..a4aeb36da8 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -27,17 +27,17 @@ logging_level = INFO logging_config_class = # Log format -log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s +log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor -executor = CeleryExecutor +executor = SequentialExecutor # The SqlAlchemy connection string to the metadata database. # SqlAlchemy supports many different database engine, more information # their website -sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@postgres/airflow +# sql_alchemy_conn = sqlite:////tmp/airflow.db # The SqlAlchemy pool size is the maximum number of database connections # in the pool. @@ -268,9 +268,6 @@ default_queue = default # Import path for celery configuration options celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG -# No SSL -celery_ssl_active = False - [dask] # This section only applies if you are using the DaskExecutor in # [core] section above diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 8733690592..9fe1836759 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.9.0-2 + image: puckel/docker-airflow:1.9.0-3 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.9.0-2 + image: puckel/docker-airflow:1.9.0-3 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.9.0-2 + image: puckel/docker-airflow:1.9.0-3 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.9.0-2 + image: puckel/docker-airflow:1.9.0-3 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index e034f47bd2..1355849303 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.9.0-2 + image: puckel/docker-airflow:1.9.0-3 restart: always depends_on: - postgres diff --git a/script/entrypoint.sh b/script/entrypoint.sh index d888da3dbf..7a5177b541 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -56,39 +56,32 @@ wait_for_port() { done } -wait_for_redis() { - # Wait for Redis if we are using it - if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ] - then - wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" - fi -} +if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then + AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" + AIRFLOW__CELERY__CELERY_RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" + wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" +fi -AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" -AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" -AIRFLOW__CELERY__CELERY_RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" +if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then + AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1" + wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT" +fi case "$1" in webserver) - wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" - wait_for_redis airflow initdb - if [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ]; - then + if [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ]; then # With the "Local" executor it should all run in one container. airflow scheduler & fi exec airflow webserver ;; worker|scheduler) - wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" - wait_for_redis # To give the webserver time to run initdb. sleep 10 exec airflow "$@" ;; flower) - wait_for_redis exec airflow "$@" ;; version) From d709f5403aeb7c748cf7740d76329b25cbd75aef Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Thu, 7 Jun 2018 09:56:01 +0100 Subject: [PATCH 134/163] Fix instructions for running with SequentialExecutor (#191) Without the `webserver` argument the container starts up and immediately exits --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 093dccb3d0..9336681c90 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Don't forget to update the airflow images in the docker-compose files to puckel/ By default, docker-airflow runs Airflow with **SequentialExecutor** : - docker run -d -p 8080:8080 puckel/docker-airflow + docker run -d -p 8080:8080 puckel/docker-airflow webserver If you want to run another executor, use the other docker-compose.yml files provided in this repository. From 4aa90382503f1e54405e5676da781e7ba230bb3d Mon Sep 17 00:00:00 2001 From: Andy Chung Date: Thu, 7 Jun 2018 17:01:52 +0800 Subject: [PATCH 135/163] Update Dockerfile. Fixed Package 'libmysqlclient-dev'. (#188) --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 2e7eacf3a7..3564873fa3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,7 +43,7 @@ RUN set -ex \ python3-requests \ mysql-client \ mysql-server \ - libmysqlclient-dev \ + default-libmysqlclient-dev \ apt-utils \ curl \ rsync \ From 132307ff7b33da7bb4257251deb12e9ac63b983f Mon Sep 17 00:00:00 2001 From: r Date: Thu, 7 Jun 2018 05:20:42 -0400 Subject: [PATCH 136/163] Update celery to 4.1.1 hotfix for kombu 4.2 (#186) Worker gives KeyError async on fresh docker build due to Kombu 4.2. Result is a continually failing/rebooting worker. Tracked it down to this issue. https://github.com/celery/celery/issues/4753 Should upgrade to celery 4.2 when stable --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3564873fa3..a92eff4d9a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -60,7 +60,7 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql]==$AIRFLOW_VERSION \ - && pip install celery[redis]==4.0.2 \ + && pip install celery[redis]==4.1.1 \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get autoremove -yqq --purge \ && apt-get clean \ From 15699d75411a5223ec0a3e6221493b4d4124643f Mon Sep 17 00:00:00 2001 From: msn1444 Date: Thu, 7 Jun 2018 04:20:57 -0500 Subject: [PATCH 137/163] add default arg for docker run to be webserver (#190) --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index a92eff4d9a..880691b873 100644 --- a/Dockerfile +++ b/Dockerfile @@ -82,3 +82,4 @@ EXPOSE 8080 5555 8793 USER airflow WORKDIR ${AIRFLOW_HOME} ENTRYPOINT ["/entrypoint.sh"] +CMD ["webserver"] # set default arg for entrypoint \ No newline at end of file From cc6b0839cf465e042d10393a2130b76baa6cc4a0 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 7 Jun 2018 11:27:16 +0200 Subject: [PATCH 138/163] Bump to 1.9.0-4 --- Dockerfile | 4 ++-- dags/tuto.py | 37 +++++++++++++------------------ docker-compose-CeleryExecutor.yml | 8 +++---- docker-compose-LocalExecutor.yml | 2 +- 4 files changed, 22 insertions(+), 29 deletions(-) diff --git a/Dockerfile b/Dockerfile index 880691b873..8e58c7b769 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,11 @@ -# VERSION 1.9.0-3 +# VERSION 1.9.0-4 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow FROM python:3.6-slim -MAINTAINER Puckel_ +LABEL maintainer="Puckel_" # Never prompts the user for choices on installation/configuration of packages ENV DEBIAN_FRONTEND noninteractive diff --git a/dags/tuto.py b/dags/tuto.py index 40a0845385..cead2b6189 100644 --- a/dags/tuto.py +++ b/dags/tuto.py @@ -8,34 +8,26 @@ default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'start_date': datetime(2015, 6, 1), - 'email': ['airflow@airflow.com'], - 'email_on_failure': False, - 'email_on_retry': False, - 'retries': 1, - 'retry_delay': timedelta(minutes=5), + "owner": "airflow", + "depends_on_past": False, + "start_date": datetime(2015, 6, 1), + "email": ["airflow@airflow.com"], + "email_on_failure": False, + "email_on_retry": False, + "retries": 1, + "retry_delay": timedelta(minutes=5), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } -dag = DAG( - 'tutorial', default_args=default_args, schedule_interval=timedelta(1)) +dag = DAG("tutorial", default_args=default_args, schedule_interval=timedelta(1)) # t1, t2 and t3 are examples of tasks created by instantiating operators -t1 = BashOperator( - task_id='print_date', - bash_command='date', - dag=dag) +t1 = BashOperator(task_id="print_date", bash_command="date", dag=dag) -t2 = BashOperator( - task_id='sleep', - bash_command='sleep 5', - retries=3, - dag=dag) +t2 = BashOperator(task_id="sleep", bash_command="sleep 5", retries=3, dag=dag) templated_command = """ {% for i in range(5) %} @@ -46,10 +38,11 @@ """ t3 = BashOperator( - task_id='templated', + task_id="templated", bash_command=templated_command, - params={'my_param': 'Parameter I passed in'}, - dag=dag) + params={"my_param": "Parameter I passed in"}, + dag=dag, +) t2.set_upstream(t1) t3.set_upstream(t1) diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 9fe1836759..5ba15d99e9 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.9.0-3 + image: puckel/docker-airflow:1.9.0-4 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.9.0-3 + image: puckel/docker-airflow:1.9.0-4 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.9.0-3 + image: puckel/docker-airflow:1.9.0-4 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.9.0-3 + image: puckel/docker-airflow:1.9.0-4 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 1355849303..9d492125b8 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.9.0-3 + image: puckel/docker-airflow:1.9.0-4 restart: always depends_on: - postgres From 05e31ddd975d6381b7ab60b16b5f3dfe53feb298 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Fri, 8 Jun 2018 09:14:50 +0200 Subject: [PATCH 139/163] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 9336681c90..de58730525 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,6 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) -/!\ If you want to use Airflow using Python 2, use TAG [1.8.1](https://github.com/puckel/docker-airflow/releases/tag/1.8.1). - ## Installation Pull the image from the Docker repository. From 6dc31a2cd849d5fce1b1942a12c29f4c0db69e93 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Thu, 14 Jun 2018 11:12:51 +0200 Subject: [PATCH 140/163] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de58730525..9e31c12f46 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ It's possible to set any configuration value for Airflow from environment variab The general rule is the environment variable should be named `AIRFLOW__
__`, for example `AIRFLOW__CORE__SQL_ALCHEMY_CONN` sets the `sql_alchemy_conn` config option in the `[core]` section. -Check out the [Airflow documentation](http://airflow.readthedocs.io/en/latest/configuration.html?highlight=__CORE__#setting-configuration-options) for more details +Check out the [Airflow documentation](http://airflow.readthedocs.io/en/latest/howto/set-config.html#setting-configuration-options) for more details You can also define connections via environment variables by prefixing them with `AIRFLOW_CONN_` - for example `AIRFLOW_CONN_POSTGRES_MASTER=postgres://user:password@localhost:5432/master` for a connection called "postgres_master". The value is parsed as a URI. This will work for hooks etc, but won't show up in the "Ad-hoc Query" section unless an (empty) connection is also created in the DB From fc4e3cdc8b8f0eb77a87571f38b9888df90287ea Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 29 Aug 2018 15:32:18 +0200 Subject: [PATCH 141/163] Bump to Airflow 1.10.0 --- Dockerfile | 9 +- circle.yml | 2 +- config/airflow.cfg | 207 ++++++++++++++++++++++++++++-- docker-compose-CeleryExecutor.yml | 8 +- docker-compose-LocalExecutor.yml | 2 +- 5 files changed, 205 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8e58c7b769..663b2d274b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.9.0-4 +# VERSION 1.10.0 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,8 +12,9 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.9.0 +ARG AIRFLOW_VERSION=1.10.0 ARG AIRFLOW_HOME=/usr/local/airflow +ENV AIRFLOW_GPL_UNIDECODE yes # Define en_US. ENV LANGUAGE en_US.UTF-8 @@ -29,7 +30,6 @@ RUN set -ex \ libsasl2-dev \ libssl-dev \ libffi-dev \ - build-essential \ libblas-dev \ liblapack-dev \ libpq-dev \ @@ -39,6 +39,7 @@ RUN set -ex \ && apt-get upgrade -yqq \ && apt-get install -yqq --no-install-recommends \ $buildDeps \ + build-essential \ python3-pip \ python3-requests \ mysql-client \ @@ -60,7 +61,7 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql]==$AIRFLOW_VERSION \ - && pip install celery[redis]==4.1.1 \ + && pip install 'celery[redis]>=4.1.1,<4.2.0' \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get autoremove -yqq --purge \ && apt-get clean \ diff --git a/circle.yml b/circle.yml index cd93ab2f3a..a0a39deed3 100644 --- a/circle.yml +++ b/circle.yml @@ -12,4 +12,4 @@ test: pre: - sleep 5 override: - - docker run puckel/docker-airflow version |grep '1.9.0' + - docker run puckel/docker-airflow version |grep '1.10.0' diff --git a/config/airflow.cfg b/config/airflow.cfg index a4aeb36da8..fde7537bfb 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -11,14 +11,18 @@ dags_folder = /usr/local/airflow/dags # This path must be absolute base_log_folder = /usr/local/airflow/logs -# Airflow can store logs remotely in AWS S3 or Google Cloud Storage. Users -# must supply an Airflow connection id that provides access to the storage -# location. +# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search. +# Users must supply an Airflow connection id that provides access to the storage +# location. If remote_logging is set to true, see UPDATING.md for additional +# configuration requirements. +remote_logging = False remote_log_conn_id = +remote_base_log_folder = encrypt_s3_logs = False # Logging level logging_level = INFO +fab_logging_level = WARN # Logging class # Specify the class that will specify the logging configuration @@ -27,9 +31,22 @@ logging_level = INFO logging_config_class = # Log format -log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s +# we need to escape the curly braces by adding an additional curly brace +log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s +# Log filename format +# we need to escape the curly braces by adding an additional curly brace +log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log +log_processor_filename_template = {{ filename }}.log + +# Hostname by providing a path to a callable, which will resolve the hostname +hostname_callable = socket:getfqdn + +# Default timezone in case supplied date times are naive +# can be utc (default), system, or any IANA timezone string (e.g. Europe/Amsterdam) +default_timezone = utc + # The executor class that airflow should use. Choices include # SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor executor = SequentialExecutor @@ -39,14 +56,22 @@ executor = SequentialExecutor # their website # sql_alchemy_conn = sqlite:////tmp/airflow.db +# If SqlAlchemy should pool database connections. +sql_alchemy_pool_enabled = True + # The SqlAlchemy pool size is the maximum number of database connections -# in the pool. +# in the pool. 0 indicates no limit. sql_alchemy_pool_size = 5 # The SqlAlchemy pool recycle is the number of seconds a connection # can be idle in the pool before it is invalidated. This config does -# not apply to sqlite. -sql_alchemy_pool_recycle = 3600 +# not apply to sqlite. If the number of DB connections is ever exceeded, +# a lower config value will allow the system to recover faster. +sql_alchemy_pool_recycle = 1800 + +# How many seconds to retry re-establishing a DB connection after +# disconnects. Setting this to 0 disables retries. +sql_alchemy_reconnect_timeout = 300 # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously @@ -93,13 +118,17 @@ default_impersonation = # What security module to use (for example kerberos): security = +# If set to False enables some unsecure features like Charts and Ad Hoc Queries. +# In 2.0 will default to True. +secure_mode = False + # Turn unit test mode on (overwrites many configuration options with test # values at runtime) unit_test_mode = False # Name of handler to read task instance logs. -# Default to use file task handler. -task_log_reader = file.task +# Default to use task handler. +task_log_reader = task # Whether to enable pickling for xcom (note that this is insecure and allows for # RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False). @@ -109,17 +138,36 @@ enable_xcom_pickling = True # it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED killed_task_cleanup_time = 60 +# Whether to override params with dag_run.conf. If you pass some key-value pairs through `airflow backfill -c` or +# `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params. +dag_run_conf_overrides_params = False + [cli] # In what way should the cli access the API. The LocalClient will use the # database directly, while the json_client will use the api running on the # webserver api_client = airflow.api.client.local_client + +# If you set web_server_url_prefix, do NOT forget to append it here, ex: +# endpoint_url = http://localhost:8080/myroot +# So api will look like: http://localhost:8080/myroot/api/experimental/... endpoint_url = http://localhost:8080 [api] # How to authenticate users of the API auth_backend = airflow.api.auth.backend.default +[lineage] +# what lineage backend to use +backend = + +[atlas] +sasl_enabled = False +host = +port = 21000 +username = +password = + [operators] # The default owner assigned to each new operator, unless # provided explicitly or passed via `default_args` @@ -129,6 +177,10 @@ default_ram = 512 default_disk = 512 default_gpus = 0 +[hive] +# Default mapreduce queue for HiveOperator tasks +default_hive_mapred_queue = + [webserver] # The base url of your website as airflow cannot guess what domain or # cname you are using. This is used in automated emails that @@ -146,6 +198,9 @@ web_server_port = 8080 web_server_ssl_cert = web_server_ssl_key = +# Number of seconds the webserver waits before killing gunicorn master that doesn't respond +web_server_master_timeout = 120 + # Number of seconds the gunicorn webserver waits before timing out on a worker web_server_worker_timeout = 120 @@ -172,10 +227,10 @@ access_logfile = - error_logfile = - # Expose the configuration file in the web server -expose_config = True +expose_config = False # Set to true to turn on authentication: -# http://pythonhosted.org/airflow/security.html#web-authentication +# https://airflow.incubator.apache.org/security.html#web-authentication authenticate = False # Filter the list of dags by owner name (requires authentication to be enabled) @@ -211,6 +266,15 @@ hide_paused_dags_by_default = False # Consistent page size across all listing views in the UI page_size = 100 +# Use FAB-based webserver with RBAC feature +rbac = False + +# Define the color of navigation bar +navbar_color = #007A87 + +# Default dagrun to show in UI +default_dag_run_display_number = 25 + [email] email_backend = airflow.utils.email.send_email_smtp @@ -238,7 +302,7 @@ celery_app_name = airflow.executors.celery_executor # "airflow worker" command. This defines the number of task instances that # a worker will take, so size up your workers based on the resources on # your worker box and the nature of your tasks -celeryd_concurrency = 16 +worker_concurrency = 16 # When you start an airflow worker, airflow starts a tiny web server # subprocess to serve the workers local log files to the airflow main @@ -259,6 +323,10 @@ celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow # it `airflow flower`. This defines the IP that Celery Flower runs on flower_host = 0.0.0.0 +# The root URL for Flower +# Ex: flower_url_prefix = /flower +flower_url_prefix = + # This defines the port that Celery Flower runs on flower_port = 5555 @@ -268,12 +336,38 @@ default_queue = default # Import path for celery configuration options celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG +# In case of using SSL +ssl_active = False +ssl_key = +ssl_cert = +ssl_cacert = + +[celery_broker_transport_options] +# This section is for specifying options which can be passed to the +# underlying celery broker transport. See: +# http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_transport_options + +# The visibility timeout defines the number of seconds to wait for the worker +# to acknowledge the task before the message is redelivered to another worker. +# Make sure to increase the visibility timeout to match the time of the longest +# ETA you're planning to use. +# +# visibility_timeout is only supported for Redis and SQS celery brokers. +# See: +# http://docs.celeryproject.org/en/master/userguide/configuration.html#std:setting-broker_transport_options +# +#visibility_timeout = 21600 + [dask] # This section only applies if you are using the DaskExecutor in # [core] section above # The IP address and port of the Dask cluster's scheduler. cluster_address = 127.0.0.1:8786 +# TLS/ SSL settings to access a secured Dask scheduler. +tls_ca = +tls_cert = +tls_key = [scheduler] # Task instances listen for external kill signal (when you clear tasks @@ -293,6 +387,9 @@ run_duration = -1 # after how much time a new DAGs should be picked up from the filesystem min_file_process_interval = 0 +# How many seconds to wait between file-parsing loops to prevent the logs from being spammed. +min_file_parsing_loop_time = 1 + dag_dir_list_interval = 300 # How often should stats be printed to the logs @@ -316,7 +413,7 @@ catchup_by_default = True # This changes the batch size of queries in the scheduling main loop. # This depends on query length limits and how long you are willing to hold locks. # 0 for no limit -max_tis_per_query = 0 +max_tis_per_query = 512 # Statsd (https://github.com/etsy/statsd) integration settings statsd_on = False @@ -381,6 +478,11 @@ authenticate = False # default_principal = admin # default_secret = admin +# Optional Docker Image to run on slave before running the command +# This image should be accessible from mesos slave i.e mesos slave +# should be able to pull this docker image before executing the command. +# docker_image_slave = puckel/docker-airflow + [kerberos] ccache = /tmp/airflow_krb5_ccache # gets augmented with fqdn @@ -389,9 +491,88 @@ reinit_frequency = 3600 kinit_path = kinit keytab = airflow.keytab + [github_enterprise] api_rev = v3 [admin] # UI to hide sensitive variable fields when set to True hide_sensitive_variable_fields = True + +[elasticsearch] +elasticsearch_host = +# we need to escape the curly braces by adding an additional curly brace +elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number} +elasticsearch_end_of_log_mark = end_of_log + +[kubernetes] +# The repository and tag of the Kubernetes Image for the Worker to Run +worker_container_repository = +worker_container_tag = + +# If True (default), worker pods will be deleted upon termination +delete_worker_pods = True + +# The Kubernetes namespace where airflow workers should be created. Defaults to `default` +namespace = default + +# The name of the Kubernetes ConfigMap Containing the Airflow Configuration (this file) +airflow_configmap = + +# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs +dags_volume_subpath = + +# For DAGs mounted via a volume claim (mutually exclusive with volume claim) +dags_volume_claim = + +# For volume mounted logs, the worker will look in this subpath for logs +logs_volume_subpath = + +# A shared volume claim for the logs +logs_volume_claim = + +# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim) +git_repo = +git_branch = +git_user = +git_password = +git_subpath = + +# For cloning DAGs from git repositories into volumes: https://github.com/kubernetes/git-sync +git_sync_container_repository = gcr.io/google-containers/git-sync-amd64 +git_sync_container_tag = v2.0.5 +git_sync_init_container_name = git-sync-clone + +# The name of the Kubernetes service account to be associated with airflow workers, if any. +# Service accounts are required for workers that require access to secrets or cluster resources. +# See the Kubernetes RBAC documentation for more: +# https://kubernetes.io/docs/admin/authorization/rbac/ +worker_service_account_name = + +# Any image pull secrets to be given to worker pods, If more than one secret is +# required, provide a comma separated list: secret_a,secret_b +image_pull_secrets = + +# GCP Service Account Keys to be provided to tasks run on Kubernetes Executors +# Should be supplied in the format: key-name-1:key-path-1,key-name-2:key-path-2 +gcp_service_account_keys = + +# Use the service account kubernetes gives to pods to connect to kubernetes cluster. +# It's intended for clients that expect to be running inside a pod running on kubernetes. +# It will raise an exception if called from a process not running in a kubernetes environment. +in_cluster = True + +[kubernetes_secrets] +# The scheduler mounts the following secrets into your workers as they are launched by the +# scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the +# defined secrets and mount them as secret environment variables in the launched workers. +# Secrets in this section are defined as follows +# = : +# +# For example if you wanted to mount a kubernetes secret key named `postgres_password` from the +# kubernetes secret object `airflow-secret` as the environment variable `POSTGRES_PASSWORD` into +# your workers you would follow the following format: +# POSTGRES_PASSWORD = airflow-secret:postgres_credentials +# +# Additionally you may override worker airflow settings with the AIRFLOW__
__ +# formatting as supported by airflow normally. \ No newline at end of file diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 5ba15d99e9..3602a0cac4 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.9.0-4 + image: puckel/docker-airflow:1.10.0-1 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.9.0-4 + image: puckel/docker-airflow:1.10.0-1 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.9.0-4 + image: puckel/docker-airflow:1.10.0-1 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.9.0-4 + image: puckel/docker-airflow:1.10.0-1 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 9d492125b8..6002430804 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.9.0-4 + image: puckel/docker-airflow:1.10.0-1 restart: always depends_on: - postgres From 45ecb235f495fddea83d67a9df83254f7ffcbd94 Mon Sep 17 00:00:00 2001 From: Adam Unger Date: Fri, 31 Aug 2018 04:06:48 -0400 Subject: [PATCH 142/163] renamed celery_result_backend to result_backend (#227) --- config/airflow.cfg | 2 +- script/entrypoint.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/airflow.cfg b/config/airflow.cfg index fde7537bfb..61018bb03f 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -317,7 +317,7 @@ worker_log_server_port = 8793 broker_url = redis://redis:6379/1 # Another key Celery setting -celery_result_backend = db+postgresql://airflow:airflow@postgres/airflow +result_backend = db+postgresql://airflow:airflow@postgres/airflow # Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start # it `airflow flower`. This defines the IP that Celery Flower runs on diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 7a5177b541..9085927059 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -18,7 +18,7 @@ TRY_LOOP="20" export \ AIRFLOW__CELERY__BROKER_URL \ - AIRFLOW__CELERY__CELERY_RESULT_BACKEND \ + AIRFLOW__CELERY__RESULT_BACKEND \ AIRFLOW__CORE__EXECUTOR \ AIRFLOW__CORE__FERNET_KEY \ AIRFLOW__CORE__LOAD_EXAMPLES \ @@ -58,7 +58,7 @@ wait_for_port() { if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then AIRFLOW__CORE__SQL_ALCHEMY_CONN="postgresql+psycopg2://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" - AIRFLOW__CELERY__CELERY_RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" + AIRFLOW__CELERY__RESULT_BACKEND="db+postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB" wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT" fi From fd2da36d9eff4ef2f69a3adf30340d7e67500382 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Fri, 31 Aug 2018 10:08:20 +0200 Subject: [PATCH 143/163] Bump to 1.10.0-2 --- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 663b2d274b..a4cb3cdec6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.0 +# VERSION 1.10.0-2 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 3602a0cac4..a8ae82ddd7 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.0-1 + image: puckel/docker-airflow:1.10.0-2 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.0-1 + image: puckel/docker-airflow:1.10.0-2 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.0-1 + image: puckel/docker-airflow:1.10.0-2 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.0-1 + image: puckel/docker-airflow:1.10.0-2 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 6002430804..a97c1d1a91 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.0-1 + image: puckel/docker-airflow:1.10.0-2 restart: always depends_on: - postgres From 4076e23c78a838e9ff124dc3ef978898f7369b98 Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 9 Oct 2018 17:22:15 +0900 Subject: [PATCH 144/163] migrate to circleci 2.0 (#243) --- .circleci/config.yml | 18 ++++++++++++++++++ circle.yml | 15 --------------- 2 files changed, 18 insertions(+), 15 deletions(-) create mode 100644 .circleci/config.yml delete mode 100644 circle.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000..bd9cbb46a2 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,18 @@ +version: 2 + +jobs: + build: + docker: + - image: docker:18.06.1-ce-git + steps: + - run: + name: Dependencies + command: docker build -t puckel/docker-airflow . + test: + docker: + - image: docker:18.06.1-ce-git + working_directory: ~/ + steps: + - run: + name: Test + command: docker run puckel/docker-airflow version |grep '1.10.0' diff --git a/circle.yml b/circle.yml deleted file mode 100644 index a0a39deed3..0000000000 --- a/circle.yml +++ /dev/null @@ -1,15 +0,0 @@ -machine: - pre: - - curl -sSL https://s3.amazonaws.com/circle-downloads/install-circleci-docker.sh | bash -s -- 1.10.0 - services: - - docker - -dependencies: - override: - - docker build -t puckel/docker-airflow . - -test: - pre: - - sleep 5 - override: - - docker run puckel/docker-airflow version |grep '1.10.0' From e16a6b4f9fa6db2e5cf351be169b46f69f9e2ff3 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Tue, 9 Oct 2018 10:26:38 +0200 Subject: [PATCH 145/163] Update config.yml --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index bd9cbb46a2..65b1cd9400 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,6 +4,7 @@ jobs: build: docker: - image: docker:18.06.1-ce-git + working_directory: ~/ steps: - run: name: Dependencies From 27bed36b4da24a04a909128da6c07a3e3b343b21 Mon Sep 17 00:00:00 2001 From: Joshua Carp Date: Mon, 22 Oct 2018 09:45:37 -0400 Subject: [PATCH 146/163] Optionally install extra airflow and python dependencies. (#232) --- Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a4cb3cdec6..9fc4b443e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,8 @@ ENV TERM linux # Airflow ARG AIRFLOW_VERSION=1.10.0 ARG AIRFLOW_HOME=/usr/local/airflow +ARG AIRFLOW_DEPS="" +ARG PYTHON_DEPS="" ENV AIRFLOW_GPL_UNIDECODE yes # Define en_US. @@ -60,8 +62,9 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql]==$AIRFLOW_VERSION \ + && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ && pip install 'celery[redis]>=4.1.1,<4.2.0' \ + && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get autoremove -yqq --purge \ && apt-get clean \ @@ -83,4 +86,4 @@ EXPOSE 8080 5555 8793 USER airflow WORKDIR ${AIRFLOW_HOME} ENTRYPOINT ["/entrypoint.sh"] -CMD ["webserver"] # set default arg for entrypoint \ No newline at end of file +CMD ["webserver"] # set default arg for entrypoint From c5f9f9537578e22f48239175c146ee11dca5f035 Mon Sep 17 00:00:00 2001 From: Joshua Carp Date: Mon, 22 Oct 2018 09:46:00 -0400 Subject: [PATCH 147/163] Drop cython (#239) Pandas has shipped wheels from the pypi for a while, so we don't need to build it from source anymore, which means we don't need to install cython to build it. --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 9fc4b443e6..091a2aece6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,7 +57,6 @@ RUN set -ex \ && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \ && useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \ && pip install -U pip setuptools wheel \ - && pip install Cython \ && pip install pytz \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ From f6c73e8e281ab1558c4775ff4dd604c2494cee50 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Mon, 22 Oct 2018 16:15:27 +0200 Subject: [PATCH 148/163] Bump to 1.10.0-3 --- Dockerfile | 2 +- README.md | 9 +++++++-- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 091a2aece6..e377fb8a39 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.0-2 +# VERSION 1.10.0-3 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/README.md b/README.md index 9e31c12f46..3d3b325489 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,14 @@ Pull the image from the Docker repository. ## Build -For example, if you need to install [Extra Packages](https://airflow.incubator.apache.org/installation.html#extra-package), edit the Dockerfile and then build it. +Optionally install [Extra Airflow Packages](https://airflow.incubator.apache.org/installation.html#extra-package) and/or python dependencies at build time : - docker build --rm -t puckel/docker-airflow . + docker build --rm --build-arg AIRFLOW_DEPS="datadog,dask" -t puckel/docker-airflow . + docker build --rm --build-arg PYTHON_DEPS="flask_oauthlib>=0.9" -t puckel/docker-airflow . + +or combined + + docker build --rm --build-arg AIRFLOW_DEPS="datadog,dask" --build-arg PYTHON_DEPS="flask_oauthlib>=0.9" -t puckel/docker-airflow . Don't forget to update the airflow images in the docker-compose files to puckel/docker-airflow:latest. diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index a8ae82ddd7..6f1f8d7625 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.0-2 + image: puckel/docker-airflow:1.10.0-3 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.0-2 + image: puckel/docker-airflow:1.10.0-3 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.0-2 + image: puckel/docker-airflow:1.10.0-3 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.0-2 + image: puckel/docker-airflow:1.10.0-3 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index a97c1d1a91..496e5fe5ef 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.0-2 + image: puckel/docker-airflow:1.10.0-3 restart: always depends_on: - postgres From 74968fc600510c7bc9218c49c75d86d89fec5060 Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Mon, 22 Oct 2018 16:15:56 +0200 Subject: [PATCH 149/163] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 3d3b325489..c4e5afcc63 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ # docker-airflow -[![CircleCI branch](https://img.shields.io/circleci/project/puckel/docker-airflow/master.svg?maxAge=2592000)](https://circleci.com/gh/puckel/docker-airflow/tree/master) [![Docker Build Status](https://img.shields.io/docker/build/puckel/docker-airflow.svg)]() [![Docker Hub](https://img.shields.io/badge/docker-ready-blue.svg)](https://hub.docker.com/r/puckel/docker-airflow/) From 66bf591eb0c3d795af1d63b51d7233091a073b5b Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Tue, 23 Oct 2018 10:23:08 +0200 Subject: [PATCH 150/163] Add airflow extras ssh group --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e377fb8a39..4dada76a48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,7 +61,7 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ - && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ + && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ && pip install 'celery[redis]>=4.1.1,<4.2.0' \ && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \ && apt-get purge --auto-remove -yqq $buildDeps \ From 03eb3b90eee1660c40b6f739004ef00722ed9cc2 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Tue, 23 Oct 2018 10:30:39 +0200 Subject: [PATCH 151/163] Bump to 1.10.0-4 --- .circleci/config.yml | 27 +++++++++++++++++++-------- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 65b1cd9400..384defe40c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,16 +4,27 @@ jobs: build: docker: - image: docker:18.06.1-ce-git - working_directory: ~/ + working_directory: ~/CircleCI/docker-airflow steps: - - run: - name: Dependencies - command: docker build -t puckel/docker-airflow . + - checkout + - setup_remote_docker: + docker_layer_caching: true + - run: | + docker build -t puckel/docker-airflow . + test: docker: - image: docker:18.06.1-ce-git - working_directory: ~/ steps: - - run: - name: Test - command: docker run puckel/docker-airflow version |grep '1.10.0' + - setup_remote_docker + - run: | + docker run puckel/docker-airflow version |grep '1.10.0' + +workflows: + version: 2 + build_and_test: + jobs: + - build + - test: + requires: + - build diff --git a/Dockerfile b/Dockerfile index 4dada76a48..c1deb1026f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.0-3 +# VERSION 1.10.0-4 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 6f1f8d7625..4f43d63cd0 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.0-3 + image: puckel/docker-airflow:1.10.0-4 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.0-3 + image: puckel/docker-airflow:1.10.0-4 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.0-3 + image: puckel/docker-airflow:1.10.0-4 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.0-3 + image: puckel/docker-airflow:1.10.0-4 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 496e5fe5ef..d2da147ab6 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.0-3 + image: puckel/docker-airflow:1.10.0-4 restart: always depends_on: - postgres From 79c6ea7f4e5d309d0c923ee9fde14e5ce4f7eb9e Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Tue, 23 Oct 2018 10:38:28 +0200 Subject: [PATCH 152/163] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c4e5afcc63..d667e90908 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # docker-airflow +[![CircleCI](https://circleci.com/gh/puckel/docker-airflow/tree/master.svg?style=svg)](https://circleci.com/gh/puckel/docker-airflow/tree/master) [![Docker Build Status](https://img.shields.io/docker/build/puckel/docker-airflow.svg)]() [![Docker Hub](https://img.shields.io/badge/docker-ready-blue.svg)](https://hub.docker.com/r/puckel/docker-airflow/) From e154033fc21e134fe68fa796a14b9159d75f34e1 Mon Sep 17 00:00:00 2001 From: JaviOverflow Date: Thu, 25 Oct 2018 17:58:01 +0200 Subject: [PATCH 153/163] (#118) Added missing packages for mssql integration (#205) --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index c1deb1026f..396b4326d3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,7 @@ ENV LC_MESSAGES en_US.UTF-8 RUN set -ex \ && buildDeps=' \ + freetds-dev \ python3-dev \ libkrb5-dev \ libsasl2-dev \ @@ -41,6 +42,7 @@ RUN set -ex \ && apt-get upgrade -yqq \ && apt-get install -yqq --no-install-recommends \ $buildDeps \ + freetds-bin \ build-essential \ python3-pip \ python3-requests \ From e152be2daf0e0e791b06c9f66d4b493d64f60ba8 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 7 Nov 2018 10:03:15 +0100 Subject: [PATCH 154/163] Remove unnecessary packages --- Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 396b4326d3..893c491284 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,8 +46,6 @@ RUN set -ex \ build-essential \ python3-pip \ python3-requests \ - mysql-client \ - mysql-server \ default-libmysqlclient-dev \ apt-utils \ curl \ From 42bc4ba7e49a20a24d7a49f3ae355a2253546821 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 7 Nov 2018 10:04:22 +0100 Subject: [PATCH 155/163] Bump to 1.10.0-5 --- Dockerfile | 2 +- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 893c491284..de3a7e318d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.0-4 +# VERSION 1.10.0-5 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 4f43d63cd0..95b32c1dad 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.0-4 + image: puckel/docker-airflow:1.10.0-5 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.0-4 + image: puckel/docker-airflow:1.10.0-5 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.0-4 + image: puckel/docker-airflow:1.10.0-5 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.0-4 + image: puckel/docker-airflow:1.10.0-5 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index d2da147ab6..dea68094b7 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.0-4 + image: puckel/docker-airflow:1.10.0-5 restart: always depends_on: - postgres From 52165dc533c29318e041d7e58bee54028d78df4a Mon Sep 17 00:00:00 2001 From: Joshua Carp Date: Wed, 7 Nov 2018 04:12:15 -0500 Subject: [PATCH 156/163] Drop build packages (#262) Now that most scientific packages ship with wheels, we shouldn't need to install blas or lapack. --- Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index de3a7e318d..cfe7fb6a76 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,8 +33,6 @@ RUN set -ex \ libsasl2-dev \ libssl-dev \ libffi-dev \ - libblas-dev \ - liblapack-dev \ libpq-dev \ git \ ' \ From 42902d01c61297b0c7559770a8d06f93e5fc7dc0 Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Thu, 22 Nov 2018 10:52:22 +0100 Subject: [PATCH 157/163] Bump to 1.10.1 --- Dockerfile | 9 +++----- config/airflow.cfg | 37 ++++++++++++++++++++++++++----- docker-compose-CeleryExecutor.yml | 8 +++---- docker-compose-LocalExecutor.yml | 2 +- script/entrypoint.sh | 1 + 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/Dockerfile b/Dockerfile index cfe7fb6a76..b46c3a9d80 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.0-5 +# VERSION 1.10.1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.10.0 +ARG AIRFLOW_VERSION=1.10.1 ARG AIRFLOW_HOME=/usr/local/airflow ARG AIRFLOW_DEPS="" ARG PYTHON_DEPS="" @@ -28,7 +28,6 @@ ENV LC_MESSAGES en_US.UTF-8 RUN set -ex \ && buildDeps=' \ freetds-dev \ - python3-dev \ libkrb5-dev \ libsasl2-dev \ libssl-dev \ @@ -42,8 +41,6 @@ RUN set -ex \ $buildDeps \ freetds-bin \ build-essential \ - python3-pip \ - python3-requests \ default-libmysqlclient-dev \ apt-utils \ curl \ @@ -60,7 +57,7 @@ RUN set -ex \ && pip install ndg-httpsclient \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ - && pip install 'celery[redis]>=4.1.1,<4.2.0' \ + && pip install 'redis>=2.10.5,<3' \ && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \ && apt-get purge --auto-remove -yqq $buildDeps \ && apt-get autoremove -yqq --purge \ diff --git a/config/airflow.cfg b/config/airflow.cfg index 61018bb03f..1ea3b371c9 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -39,6 +39,7 @@ simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s # we need to escape the curly braces by adding an additional curly brace log_filename_template = {{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{ try_number }}.log log_processor_filename_template = {{ filename }}.log +dag_processor_manager_log_location = /usr/local/airflow/logs/dag_processor_manager/dag_processor_manager.log # Hostname by providing a path to a callable, which will resolve the hostname hostname_callable = socket:getfqdn @@ -59,6 +60,9 @@ executor = SequentialExecutor # If SqlAlchemy should pool database connections. sql_alchemy_pool_enabled = True +# The encoding for the databases +sql_engine_encoding = utf-8 + # The SqlAlchemy pool size is the maximum number of database connections # in the pool. 0 indicates no limit. sql_alchemy_pool_size = 5 @@ -73,6 +77,10 @@ sql_alchemy_pool_recycle = 1800 # disconnects. Setting this to 0 disables retries. sql_alchemy_reconnect_timeout = 300 +# The schema to use for the metadata database +# SqlAlchemy supports databases with the concept of multiple schemas. +sql_alchemy_schema = + # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously # on this airflow installation @@ -142,6 +150,9 @@ killed_task_cleanup_time = 60 # `airflow trigger_dag -c`, the key-value pairs will override the existing ones in params. dag_run_conf_overrides_params = False +# Worker initialisation check to validate Metadata Database connection +worker_precheck = False + [cli] # In what way should the cli access the API. The LocalClient will use the # database directly, while the json_client will use the api running on the @@ -180,6 +191,9 @@ default_gpus = 0 [hive] # Default mapreduce queue for HiveOperator tasks default_hive_mapred_queue = +# Template for mapred_job_name in HiveOperator, supports the following named parameters: +# hostname, dag_id, task_id, execution_date +mapred_job_name_template = Airflow HiveOperator task for {hostname}.{dag_id}.{task_id}.{execution_date} [webserver] # The base url of your website as airflow cannot guess what domain or @@ -227,7 +241,10 @@ access_logfile = - error_logfile = - # Expose the configuration file in the web server -expose_config = False +# This is only applicable for the flask-admin based web UI (non FAB-based). +# In the FAB-based web UI with RBAC feature, +# access to configuration is controlled by role permissions. +expose_config = True # Set to true to turn on authentication: # https://airflow.incubator.apache.org/security.html#web-authentication @@ -387,9 +404,7 @@ run_duration = -1 # after how much time a new DAGs should be picked up from the filesystem min_file_process_interval = 0 -# How many seconds to wait between file-parsing loops to prevent the logs from being spammed. -min_file_parsing_loop_time = 1 - +# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes. dag_dir_list_interval = 300 # How often should stats be printed to the logs @@ -427,6 +442,10 @@ max_threads = 2 authenticate = False +# Turn off scheduler use of cron intervals by setting this to False. +# DAGs submitted manually in the web UI or with trigger_dag will still run. +use_job_schedule = True + [ldap] # set this to ldaps://: uri = @@ -491,7 +510,6 @@ reinit_frequency = 3600 kinit_path = kinit keytab = airflow.keytab - [github_enterprise] api_rev = v3 @@ -506,9 +524,11 @@ elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number} elasticsearch_end_of_log_mark = end_of_log [kubernetes] -# The repository and tag of the Kubernetes Image for the Worker to Run +# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run worker_container_repository = worker_container_tag = +worker_container_image_pull_policy = IfNotPresent +worker_dags_folder = # If True (default), worker pods will be deleted upon termination delete_worker_pods = True @@ -562,6 +582,11 @@ gcp_service_account_keys = # It will raise an exception if called from a process not running in a kubernetes environment. in_cluster = True +[kubernetes_node_selectors] +# The Key-value pairs to be given to worker pods. +# The worker pods will be scheduled to the nodes of the specified key-value pairs. +# Should be supplied in the format: key = value + [kubernetes_secrets] # The scheduler mounts the following secrets into your workers as they are launched by the # scheduler. You may define as many secrets as needed and the kubernetes launcher will parse the diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 95b32c1dad..3986eaa778 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index dea68094b7..2a5c393ab1 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.0-5 + image: puckel/docker-airflow:1.10.1 restart: always depends_on: - postgres diff --git a/script/entrypoint.sh b/script/entrypoint.sh index 9085927059..fb3f9ad1ed 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -82,6 +82,7 @@ case "$1" in exec airflow "$@" ;; flower) + sleep 10 exec airflow "$@" ;; version) From 40cfa9451deea16635f532761929d246e6dcc26b Mon Sep 17 00:00:00 2001 From: "Matthieu \"Puckel_\" Roisil" Date: Thu, 22 Nov 2018 10:56:07 +0100 Subject: [PATCH 158/163] Update config.yml --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 384defe40c..48871893ae 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,7 @@ jobs: steps: - setup_remote_docker - run: | - docker run puckel/docker-airflow version |grep '1.10.0' + docker run puckel/docker-airflow version |grep '1.10.1' workflows: version: 2 From 024cf2d08d2de1edf63f780c4d60c5d8fc70b265 Mon Sep 17 00:00:00 2001 From: Med Date: Mon, 28 Jan 2019 17:30:55 +0100 Subject: [PATCH 159/163] Bump airflow version (#2) (#304) * Bump Airflow Version to 1.10.2 * Fix CI --- .circleci/config.yml | 26 ++++++++++---------------- Dockerfile | 4 ++-- docker-compose-CeleryExecutor.yml | 8 ++++---- docker-compose-LocalExecutor.yml | 2 +- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 48871893ae..41dc2f61f7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ version: 2 jobs: - build: + build_and_test: docker: - image: docker:18.06.1-ce-git working_directory: ~/CircleCI/docker-airflow @@ -9,22 +9,16 @@ jobs: - checkout - setup_remote_docker: docker_layer_caching: true - - run: | - docker build -t puckel/docker-airflow . - - test: - docker: - - image: docker:18.06.1-ce-git - steps: - - setup_remote_docker - - run: | - docker run puckel/docker-airflow version |grep '1.10.1' - + - run: + name: Build docker image + command: | + docker build -t puckel/docker-airflow . + - run: + name: Test docker image + command: | + docker run puckel/docker-airflow version |grep '1.10.2' workflows: version: 2 build_and_test: jobs: - - build - - test: - requires: - - build + - build_and_test diff --git a/Dockerfile b/Dockerfile index b46c3a9d80..6a7fa015fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.1 +# VERSION 1.10.2 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.10.1 +ARG AIRFLOW_VERSION=1.10.2 ARG AIRFLOW_HOME=/usr/local/airflow ARG AIRFLOW_DEPS="" ARG PYTHON_DEPS="" diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index 3986eaa778..15643ade14 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.1 + image: puckel/docker-airflow:1.10.2 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.1 + image: puckel/docker-airflow:1.10.2 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.1 + image: puckel/docker-airflow:1.10.2 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.1 + image: puckel/docker-airflow:1.10.2 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 2a5c393ab1..15a36c8de2 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.1 + image: puckel/docker-airflow:1.10.2 restart: always depends_on: - postgres From dc54b814a07e0ac6190aee1312938c966935221b Mon Sep 17 00:00:00 2001 From: Gustavo Honorato Nicolau <46831966+Gushono@users.noreply.github.com> Date: Mon, 27 May 2019 13:25:45 -0300 Subject: [PATCH 160/163] Update Dockerfile --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 6a7fa015fa..0a807626f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,6 +56,7 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install pyasn1 \ + && pip install jsondiff \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ && pip install 'redis>=2.10.5,<3' \ && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \ From 43dcfaf446ca84d05224e07952210896985fbd47 Mon Sep 17 00:00:00 2001 From: Gustavo Honorato Nicolau <46831966+Gushono@users.noreply.github.com> Date: Mon, 27 May 2019 13:57:10 -0300 Subject: [PATCH 161/163] Update Dockerfile --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0a807626f6..5fd54d7310 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# VERSION 1.10.2 +# VERSION 1.10.1 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.10.2 +ARG AIRFLOW_VERSION=1.10.1 ARG AIRFLOW_HOME=/usr/local/airflow ARG AIRFLOW_DEPS="" ARG PYTHON_DEPS="" @@ -55,8 +55,8 @@ RUN set -ex \ && pip install pytz \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ - && pip install pyasn1 \ && pip install jsondiff \ + && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ && pip install 'redis>=2.10.5,<3' \ && if [ -n "${PYTHON_DEPS}" ]; then pip install ${PYTHON_DEPS}; fi \ From 51855b92a747b2d279fe1caecbff9d33b1b00e7d Mon Sep 17 00:00:00 2001 From: Gustavo Honorato Nicolau <46831966+Gushono@users.noreply.github.com> Date: Mon, 27 May 2019 13:59:39 -0300 Subject: [PATCH 162/163] Update entrypoint.sh From 62472af7d37191f34fba23ec22d2f0297bb2a70f Mon Sep 17 00:00:00 2001 From: Gustavo Honorato Nicolau <46831966+Gushono@users.noreply.github.com> Date: Mon, 27 May 2019 16:46:48 -0300 Subject: [PATCH 163/163] Update Dockerfile --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 5fd54d7310..977c8e90cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,6 +56,7 @@ RUN set -ex \ && pip install pyOpenSSL \ && pip install ndg-httpsclient \ && pip install jsondiff \ + && pip install deepdiff \ && pip install pyasn1 \ && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \ && pip install 'redis>=2.10.5,<3' \