and
[and
] 














Using your ahem bare metal, FOSS Data Science Env.
LocalExecutor./var/run/docker.sock)
Docker Checklist:
Scripts to run tasks inside images:
#!/usr/bin/env bash
PYTHONPATH="$PIPELINE_DIR" python3 "$PIPELINE_DIR"/my_module/my_task.py
def pipeline_docker_task(task_name):
"""Function inside Airflow DAG"""
docker_name = f"{PIPELINE_PREFIX}-{task_name}-{str(uuid.uuid4())}"
docker_command = f"bash -c '/run_{task_name}.sh'"
operation_run = docker_client.containers.run(
name=docker_name,
command=docker_command,
**docker_run_args
)
return operation_run.decode("utf-8")
# Node in dag
pipeline_operator = PythonOperator(
task_id=TASK_ID,
python_callable=pipeline_docker_task,
op_args=[TASK_NAME],
dag=dag,
)

Kubernetes Checklist:
Scripts to run tasks inside images:
#!/usr/bin/env bash
PYTHONPATH="$PIPELINE_DIR" python3 "$PIPELINE_DIR"/my_module/my_task.py
def pipeline_k8s_operator(task_name, kwargs):
"""Factory for k8sPodOperator"""
name = f"{PIPELINE_PREFIX}-{task_name}"
run_args = {**k8s_run_args.copy(), **kwargs}
run_cmd = f"bash -c '/run_{task_name}.sh'"
operator = KubernetesPodOperator(
cmds=["bash", "-cx"],
arguments=[run_cmd],
name=name, task_id=name,
dag=dag,
**run_args
)
return operator
pipeline_operator = pipeline_k8s_operator(
TASK_NAME,
K8S_KWARGS
)



Bar to beat: 6 months to a year, no dedicated administration
Warning: Speculative