Merge pull request #260 from JaerongA/datajoint_pipeline

Merge main into datajoint_pipeline & apply pre-commit
SainsburyWellcomeCentre · Oct 2, 2023 · 3930229 · 3930229
2 parents 03f81ea + 7e744d3
commit 3930229
Show file tree

Hide file tree

Showing 79 changed files with 1,736 additions and 1,800 deletions.
diff --git a/.flake8 b/.flake8
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -1,2 +1,3 @@
 # Normalize line endings
-89ea7df05f77475bcfd874f3bccab878d653af6a
+89ea7df05f77475bcfd874f3bccab878d653af6a
+947eb6c1f701050a03d319feee168260f2a485a0
diff --git a/.github/workflows/build_env_run_tests.yml b/.github/workflows/build_env_run_tests.yml
@@ -0,0 +1,89 @@
+# Builds the aeon environment; lints formatting and smells via ruff; checks type annotations via pyright;
+# tests via pytest; reports test coverage via pytest-cov and codecov.
+
+name: build_env_run_tests
+on:
+  pull_request:
+    branches: [ main ]
+    types: [opened, reopened, synchronize]
+  workflow_dispatch:  # allows running manually from Github's 'Actions' tab
+
+jobs:
+  build_env_pip_pyproject:  # checks only for building env using pip and pyproject.toml
+    name: Build env using pip and pyproject.toml
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: [3.11]
+      fail-fast: false
+    defaults:
+      run:
+        shell: bash -l {0}  # reset shell for each step
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Create venv and install dependencies
+        run: |
+          python -m venv .venv
+          source .venv/bin/activate
+          pip install -e .[dev]
+          pip list
+          .venv/bin/python -c "import aeon"
+  
+  build_env_run_tests:  # checks for building env using mamba and runs codebase checks and tests
+    name: Build env and run tests on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    if: github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: [3.11]
+      fail-fast: false
+    defaults:
+      run:
+        shell: bash -l {0}  # reset shell for each step
+    steps:
+      - name: checkout repo
+        uses: actions/checkout@v2
+      - name: set up conda env
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          use-mamba: true
+          miniforge-variant: Mambaforge
+          python-version: ${{ matrix.python-version }}
+          environment-file: ./env_config/env.yml
+          activate-environment: aeon
+      - name: Update conda env with dev reqs
+        run: mamba env update -f ./env_config/env_dev.yml
+
+      # Only run codebase checks and tests for ubuntu.
+      - name: ruff
+        if: matrix.os == 'ubuntu-latest'
+        run: python -m ruff check --config ./pyproject.toml .
+      - name: pyright
+        if: matrix.os == 'ubuntu-latest'
+        run: python -m pyright --level error --project ./pyproject.toml . 
+      - name: pytest
+        if: matrix.os == 'ubuntu-latest'
+        run: python -m pytest tests/
+
+      - name: generate test coverage report
+        if: matrix.os == 'ubuntu-latest'
+        run: |
+          python -m pytest --cov=aeon ./tests/ --cov-report=xml:./tests/test_coverage/test_coverage_report.xml
+          #python -m pytest --cov=aeon ./tests/ --cov-report=html:./tests/test_coverage/test_coverage_report_html
+      - name: upload test coverage report to codecov
+        if: matrix.os == 'ubuntu-latest'
+        uses: codecov/codecov-action@v2
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          directory: ./tests/test_coverage/
+          files: test_coverage_report.xml
+          fail_ci_if_error: true
+          verbose: true
diff --git a/.github/workflows/gpt_pr_review.yml b/.github/workflows/gpt_pr_review.yml
@@ -0,0 +1,59 @@
+# Reviews pull requests with OpenAI's GPT API.
+
+name: gpt_pr_review
+permissions:
+  contents: read
+  pull-requests: write
+on:
+  pull_request:
+    branches: [ main ]
+    types: [opened, reopened, synchronize]
+  pull_request_review_comment:
+    branches: [ main ]
+    types: [created]
+concurrency:
+  group: >
+    ${{ github.repository }}-${{ github.event.number || github.head_ref || github.sha }}-${{ 
+    github.workflow }}-${{ github.event_name == 'pull_request_review_comment' && 'pr_comment' || 'pr' }}
+  cancel-in-progress: ${{ github.event_name != 'pull_request_review_comment' }}
+jobs:
+  gpt_pr_review:
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
+    steps:
+      - uses: coderabbitai/ai-pr-reviewer@latest
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        with:
+          system_message: >
+            Your purpose is to act as a highly experienced 
+            software engineer and provide a thorough review of the code hunks
+            and suggest code snippets to improve key areas such as:
+            - Logic
+            - Security
+            - Performance
+            - Data races
+            - Consistency
+            - Error handling
+            - Maintainability
+            - Modularity
+            - Complexity
+            - Optimization
+            - Best practices: DRY, SOLID, KISS
+            Identify and resolve significant concerns to improve overall code 
+            quality while deliberately disregarding minor issues.
+          summarize: >
+            Provide your final response in markdown with the following content:
+            - **Walkthrough**: A high-level summary of the overall change instead of 
+            specific files within 100 words.
+            - **Changes**: A markdown table of files and their summaries. Group files 
+            with similar changes together into a single row to save space.
+            Avoid additional commentary as this summary will be added as a comment on the 
+            GitHub pull request. Use the titles "Walkthrough" and "Changes" and they must be H2.
+          summarize_release_notes: >
+            Craft concise release notes for the pull request. 
+            Focus on the purpose and user impact, categorizing changes as New Feature, Bug Fix, 
+            Documentation, Refactor, Style, Test, Chore, or Revert. Provide a bullet-point list, 
+            e.g. "- New Feature: Added search functionality to the UI." Limit your response to 50-100 words 
+            and emphasize features visible to the end-user while omitting code-level details.
diff --git a/.github/workflows/squash_merge_to_prod.yml b/.github/workflows/squash_merge_to_prod.yml
@@ -0,0 +1,28 @@
+# Squash merges commits from PRs to 'main' into 'prod'. 'prod' thus serves as a  more 
+# readable history of the project, while 'main' retains the full commit history.
+
+name: squash_merge_to_prod
+on:
+  pull_request:
+    branches: [ main ]
+    types: [ closed ]
+jobs:
+  squash_merge_to_prod:
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.merged == true  # only run if PR merged (subset of 'closed')
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+      with:
+        fetch-depth: 0 
+    - name: Configure Git
+      run: |
+        git config user.name "Jai Bhagat"
+        git config user.email "[email protected]"
+    - name: Merge and squash to prod
+      run: |
+        git checkout prod
+        COMMIT_MSG=$(git log main -1 --pretty=format:%s)  # latest commit message from 'main'
+        git merge --squash -X theirs main
+        git commit -m "$COMMIT_MSG"
+        git push origin prod
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,35 +1,57 @@
+# For info on running pre-commit manually, see `pre-commit run --help`
+
 default_language_version:
-  python: python3.9
+  python: python3.11
 
-default_stages: [commit, push]
-files: "^(docker|aeon\/dj_pipeline)\/.*$"
+files: "^(test|aeon)\/.*$"
 repos:
   - repo: meta
     hooks:
       - id: identity
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.1.0
+    rev: v4.4.0
     hooks:
+      - id: check-json
       - id: check-yaml
+      - id: check-toml
+      - id: check-xml
+      - id: check-merge-conflict
       - id: detect-private-key
       - id: end-of-file-fixer
         exclude: LICENSE
       - id: no-commit-to-branch
+      - id: mixed-line-ending
+      - id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
 
   - repo: https://github.com/psf/black
-    rev: 22.1.0
+    rev: 23.7.0
     hooks:
       - id: black
-        args:
-          - "--config"
-          - "./pyproject.toml"
+        args: [--check, --config, ./pyproject.toml]
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.0.286
+    hooks:
+      - id: ruff
+        args: [--config, ./pyproject.toml]
 
-  - repo: https://github.com/pycqa/isort
-    rev: 5.10.1
+  - repo: https://github.com/RobertCraigie/pyright-python
+    rev: v1.1.324
     hooks:
-      - id: isort
-        name: isort (python)
-        args:
-          - "--settings-file"
-          - "./pyproject.toml"
+      - id: pyright
+        args: [--level, error, --project, ./pyproject.toml]
+
+  # Pytest is expensive, so we show its set-up but leave it commented out.
+  # - repo: local
+  #   hooks:
+  #     - id: pytest-py
+  #       name: pytest-py
+  #       entry: pytest
+  #       language: system
+  #       pass_filenames: false
+  #       args:
+  #         - "-v"
+  #         - "-n"
+  #         - "auto"
diff --git a/.python-version b/.python-version
diff --git a/README.md b/README.md
@@ -1,8 +1,8 @@
 # aeon_mecha
-![aeon_mecha_env_build_and_tests](https://github.com/SainsburyWellcomeCentre/aeon_mecha/actions/workflows/build_env_run_tests.yml/badge.svg?branch=reorg)
-[![aeon_mecha_tests_code_coverage](https://codecov.io/gh/SainsburyWellcomeCentre/aeon_mecha/branch/reorg/graph/badge.svg?token=973EC1CG03)](https://codecov.io/gh/SainsburyWellcomeCentre/aeon_mecha)
+![aeon_mecha_env_build_and_tests](https://github.com/SainsburyWellcomeCentre/aeon_mecha/actions/workflows/build_env_run_tests.yml/badge.svg?branch=main)
+[![aeon_mecha_tests_code_coverage](https://codecov.io/gh/SainsburyWellcomeCentre/aeon_mecha/branch/main/graph/badge.svg?token=973EC1CG03)](https://codecov.io/gh/SainsburyWellcomeCentre/aeon_mecha)
 
-Project Aeon's main repository for manipulating acquired data. Includes preprocessing, querying, plotting, and analysis modules.
+Project Aeon's main repository for manipulating acquired data. Includes modules for loading raw data, performing quality control on raw data, processing raw data, and ingesting processed data into a DataJoint MySQL database.
 
 ## Set-up Instructions
 
@@ -12,52 +12,65 @@ The various set-up tools mentioned below do some combination of python version,
 
 #### Prereqs
 
-1. Ssh into the HPC GW1 node and clone this repo to your home directory.
-
+1. Ssh into the HPC and clone this repository to your home directory.
 ```
 ssh <your_SWC_username>@ssh.swc.ucl.ac.uk
-ssh hpc-gw1
 mkdir ~/ProjectAeon
 cd ~/ProjectAeon
 git clone https://github.com/SainsburyWellcomeCentre/aeon_mecha
+cd aeon_mecha
 ```
 
 #### Set-up
 
 Ensure you stay in the `~/ProjectAeon/aeon_mecha` directory for the rest of the set-up instructions, regardless of which set-up procedure you follow below.
 
-[Option 1](./docs/env_setup/remote/miniconda_conda_remote_setup.md): miniconda (python distribution) and conda (python version manager, environment manager, package manager, and package dependency manager)
+[Option 1](./docs/env_setup/remote/miniconda_conda_remote_setup.md): **miniconda** (python distribution) and **conda** (python version manager, environment manager, package manager, and package dependency manager)
 
-[Option 2](./docs/env_setup/remote/pyenv_poetry_remote_setup.md): pyenv (python version manager) and poetry (python environment manager, package manager, and package dependency manager)
+- *Note*: [mamba](https://mamba.readthedocs.io/en/latest/), a faster alternative to conda, is now installed as a module on the HPC, so the above instructions can be followed using 'mamba' instead of 'conda' if you prefer.
 
-[Option 3](./docs/env_setup/remote/pip_venv_remote_setup.md): pip (python package manager) and venv (python environment manager)
+[Option 2](./docs/env_setup/remote/pip_venv_remote_setup.md): **pip** (python package manager) and **venv** (python environment manager)
 
 ### Local set-up
 
 #### Prereqs
 
-1. Install [git](https://git-scm.com/downloads). If you are not familiar with git, just confirm the default settings during installation.
+All commands below should be run in a bash shell (Windows users can use the 'mingw64' terminal that is included when installing git).
 
-2. Clone this repository: create a 'ProjectAeon' directory in your home directory, clone this repository there, and `cd` into the cloned directory:
+1. Clone this repository: create a 'ProjectAeon' directory in your home directory, clone this repository there, and `cd` into the cloned directory:
 ```
 mkdir ~/ProjectAeon
 cd ~/ProjectAeon
-https://github.com/SainsburyWellcomeCentre/aeon_mecha
+git clone https://github.com/SainsburyWellcomeCentre/aeon_mecha
 cd aeon_mecha
 ```
 
 #### Set-up
 
-Ensure you stay in the `~/ProjectAeon/aeon_mecha` directory for the rest of the set-up instructions, regardless of which set-up procedure you follow below. All commands below should be run in a bash terminal (Windows users can use the 'mingw64' terminal that comes installed with git).
+Ensure you stay in the `~/ProjectAeon/aeon_mecha` directory for the rest of the set-up instructions, regardless of which set-up procedure you follow below.
 
-[Option 1](./docs/env_setup/local/miniconda_conda_local_setup.md): miniconda (python distribution) and conda (python version manager, environment manager, package manager, and package dependency manager)
+[Option 1](./docs/env_setup/local/miniconda_conda_local_setup.md): **miniconda** (python distribution) and **conda** (python version manager, environment manager, package manager, and package dependency manager)
 
-[Option 2](./docs/env_setup/local/pyenv_poetry_local_setup.md): pyenv (python version manager) and poetry (python environment manager, package manager, and package dependency manager)
+- *Note*: **mambaforge** and **mamba** can be used as faster, drop-in replacements for 'miniconda' and 'conda', respectively. You can set up the Aeon environment using them, following roughly the same instructions as above. See [here](https://biapol.github.io/blog/mara_lampert/getting_started_with_mambaforge_and_python/readme.html) for more info.
 
-[Option 3](./docs/env_setup/local/pip_venv_local_setup.md): pip (python package manager) and venv (python environment manager)
+[Option 2](./docs/env_setup/local/pip_venv_local_setup.md): **pip** (python package manager) and **venv** (python environment manager)
 
 ## Repository Contents
 
-## Todos
-
-- add to [repository contents](#repository-contents)
+- `.github/workflows/` : GitHub actions workflows for building the environment and running tests 
+- `aeon/` : Source code for the Aeon Python package 
+    - `aeon/dj_pipeline`: Source code for the Aeon DataJoint MySQL database pipeline
+    - `aeon/io`: Source code for loading raw data
+    - `aeon/processing`: Source code for processing raw data
+    - `aeon/qc`: Source code for quality control of raw data
+    - `aeon/schema`: Examples of 'experiment schemas': variables that can be used to load raw data from particular experiments
+- `docker/` : Dockerfiles for building Docker images for the Aeon DataJoint MySQL database pipeline.
+- `docs/` : Documentation for the Aeon project
+    - `docs/devs/` : Documentation for developers
+    - `docs/env_setup/` : Documentation for setting up the Aeon Python environment
+    - `docs/examples/` : Aeon usecase examples
+    - `docs/using_hpc_jupyterhub.md` : Instructions for using Jupyter notebooks to access Aeon data via SWC's HPC
+    - `docs/using_online_dashboard.md` : Instructions for connecting to Aeon's online dashboard
+- `env_config/` : Configuration files for the Aeon Python environment
+- `tests/` : Unit and integration tests
+    - `tests/data` : Data used by tests
diff --git a/aeon/__init__.py b/aeon/__init__.py
@@ -8,3 +8,6 @@
     __version__ = "unknown"
 finally:
     del version, PackageNotFoundError
+
+# Set functions avaialable directly under the 'aeon' top-level namespace
+from aeon.io.api import load