Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/build-push-dev-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
push:
branches:
- develop
- package-refactor
paths-ignore:
- README.md
- .old_cicd/*
Expand Down Expand Up @@ -83,4 +84,4 @@ jobs:
containers.renci.org/${{ github.repository }}:develop
containers.renci.org/${{ github.repository }}:${{ steps.vars.outputs.short_sha }}
cache-from: type=registry,ref=${{ github.repository }}:buildcache-dev
cache-to: type=registry,ref=${{ github.repository }}:buildcache-dev,mode=max
cache-to: type=registry,ref=${{ github.repository }}:buildcache-dev,mode=max
4 changes: 2 additions & 2 deletions .github/workflows/trivy-pr-scan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:

# We will not be concerned with Medium and Low vulnerabilities
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.33.1
uses: aquasecurity/trivy-action@0.36.0
with:
image-ref: '${{ github.repository }}:vuln-test'
format: 'sarif'
Expand All @@ -64,4 +64,4 @@ jobs:
uses: github/codeql-action/upload-sarif@v3
if: '!cancelled()'
with:
sarif_file: 'trivy-results.sarif'
sarif_file: 'trivy-results.sarif'
17 changes: 14 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Use a Debian-based image for better compatibility
FROM python:3.12-slim-trixie
# FROM dhi.io/python:3.12-debian13-dev

# Set Airflow version and home directory

Expand All @@ -15,7 +16,8 @@ ENV AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@po
ENV PYTHONUNBUFFERED=1

# Create airflow user and directories
RUN useradd --uid 50000 --home-dir ${AIRFLOW_HOME} --create-home airflow && \
RUN groupadd -g 50000 airflow
RUN useradd --uid 50000 --home-dir ${AIRFLOW_HOME} -g 50000 --create-home airflow && \
mkdir -p ${AIRFLOW_HOME}/dags ${AIRFLOW_HOME}/logs ${AIRFLOW_HOME}/plugins ${AIRFLOW_HOME}/config

# Install system dependencies
Expand All @@ -37,7 +39,11 @@ RUN pip install --no-cache-dir --upgrade pip setuptools wheel
RUN pip install --no-cache-dir \
"apache-airflow[postgres,celery,redis,fab]==${AIRFLOW_VERSION}" \
"apache-airflow-providers-cncf-kubernetes" \
--constraint "https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.11.txt"
--constraint "https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.12.txt"

# Fix auth rollback bug.
RUN pip install --no-cache-dir \
"apache-airflow-providers-fab==3.3.0rc1"

# Optional: install extra packages
RUN pip install --no-cache-dir psycopg2-binary redis
Expand All @@ -48,7 +54,8 @@ RUN pip install -r /tmp/requirements.txt

RUN rm /tmp/requirements.txt


# COPY . /opt/roger
# RUN pip install /opt/roger

RUN apt-get purge -y --auto-remove \
build-essential \
Expand All @@ -59,13 +66,17 @@ RUN apt-get purge -y --auto-remove \
git && \
apt-get clean

RUN if [ -n "$ROGER_SOURCE" ]; then pip install -e $ROGER_SOURCE; fi

# Set ownership
RUN chown -R airflow:airflow ${AIRFLOW_HOME}

# Switch to airflow user
USER airflow
WORKDIR ${AIRFLOW_HOME}

ENV PYTHONPATH=/opt/airflow/dags/repo/src/

# Expose Airflow webserver port
EXPOSE 8080

Expand Down
6 changes: 3 additions & 3 deletions bin/dug_annotate/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ clean:
$(RM) -rf ${KGX_DIR}

get_input_files:
$(TIME) python ${CLI_WRAPPER} -gd
$(TIME) roger -gd

annotate_and_normalize:
$(TIME) python ${CLI_WRAPPER} -l
$(TIME) roger -l

create_kgx_files:
$(TIME) python ${CLI_WRAPPER} -t
$(TIME) roger -t

all: get_input_files annotate_and_normalize create_kgx_files
11 changes: 5 additions & 6 deletions bin/dug_indexing/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ THIS_MAKEFILE_PATH:=$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
THIS_DIR:=$(shell cd $(dir $(THIS_MAKEFILE_PATH));pwd)

ROGER_HOME=${THIS_DIR}/../..
CLI_WRAPPER=${ROGER_HOME}/cli.py

# Override Roger data dir ENV
INDEXING_DIR=${ROGERENV_DATA__ROOT}/dug/expanded_concepts
Expand All @@ -34,19 +33,19 @@ clean:
$(RM) -rf ${CRAWL_DIR}

crawl_concepts:
$(TIME) python ${CLI_WRAPPER} -C
$(TIME) roger -C

index_concepts: crawl_concepts
$(TIME) python ${CLI_WRAPPER} -ic
$(TIME) roger -ic

index_variables:
$(TIME) python ${CLI_WRAPPER} -iv
$(TIME) roger -iv

validate_indexed_concepts: index_concepts
$(TIME) python ${CLI_WRAPPER} -vc
$(TIME) roger -vc

validate_indexed_variables: index_variables
$(TIME) python ${CLI_WRAPPER} -vv
$(TIME) roger -vv

all: validate_indexed_concepts validate_indexed_variables

Expand Down
112 changes: 0 additions & 112 deletions cli.py

This file was deleted.

47 changes: 11 additions & 36 deletions dags/annotate_and_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,32 @@
import os

from airflow.models import DAG
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import PythonOperator
from roger.tasks import default_args, create_pipeline_taskgroup, logger, create_python_task
from airflow.providers.standard.operators.empty import EmptyOperator
from roger.tasks import default_args, create_pipeline_taskgroup

env_enabled_datasets = os.getenv(
"ROGER_DUG__INPUTS_DATA__SETS", "topmed,anvil").split(",")

with DAG(
dag_id='annotate_and_index',
default_args=default_args,
# incremental state Variables have no compare-and-swap; serialize runs
max_active_runs=1,
params=
{
"repository_id": None,
"branch_name": None,
"commitid_from": None,
"commitid_to": None
"commitid_to": None,
# diff source refs against the last ingested commit and only
# process new/changed files; set false to force a full run
"incremental": True
},
# schedule_interval=None
) as dag:
init = EmptyOperator(task_id="init", dag=dag)
finish = EmptyOperator(task_id="finish", dag=dag)
finish = EmptyOperator(task_id="finish", dag=dag,
trigger_rule="none_failed")


from roger import pipelines
Expand All @@ -52,35 +57,5 @@

init >> create_pipeline_taskgroup(dag, pipeline_class, config) >> finish




with DAG(
dag_id='dag_test',
default_args=default_args,
params=
{
"repository_id": None,
"branch_name": None,
"commitid_from": None,
"commitid_to": None
},
# schedule_interval=None
) as dag:

init = EmptyOperator(task_id="init", dag=dag)
finish = EmptyOperator(task_id="finish", dag=dag)

def print_context(ds=None, **kwargs):
print(">>>All kwargs")
print(kwargs)
print(">>>All ds")
print(ds)


init >> create_python_task(dag, "get_from_lakefs", print_context) >> finish

#run_this = PythonOperator(task_id="print_the_context", python_callable=print_context)

if __name__ == "__main__":
dag.test()
dag.test()
3 changes: 0 additions & 3 deletions dags/dug_helpers/__init__.py

This file was deleted.

Loading
Loading