diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 0d2d2979..1fd11aa4 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -122,6 +122,11 @@ for a list of environment variables that you'll need to populate for your deploy - `AWS_*` and `DJANGO_STORAGE_BUCKET_NAME` are used to make sure the application can connect to your S3 bucket - `DJANGO_BATAI_NABAT_API_URL` (optional): the location of the NABat GraphQL endpoint used to retrieve information about files in NABat. +- `DJANGO_BATAI_SAVE_SPECTROGRAM_CONTOURS` (optional, default `false`): controls whether Celery + spectrogram tasks (recording upload and NABat import pipelines) extract contours from compressed + spectrogram masks and save them to `PulseMetadata.contours`. When `false` or unset, contour + extraction is skipped and stored contours are empty, which lowers DB storage size. Set to + `true` if you need pulse contour data (e.g. the spectrogram contour overlay in the client). - `VITE_APP_API_ROUTE`: this tells the Vue application where the backend (Django) API can be found. - `DJANGO_BATAI_URL_PATH`: this allows the Django application to be mounted at a subpath in a URL. It is used by the Django application itself and the nginx configuration at nginx.subpath.template diff --git a/README.md b/README.md index bb258b7a..e4a60f2b 100644 --- a/README.md +++ b/README.md @@ -189,3 +189,11 @@ Lastly, the GitLab CI/CD infrastructure runs the same `pre-commit` configuration on all pipelines for new MRs. The automated checks in GitLab are optional, but it is highly recommended to perform these checks locally prior to pushing new commits. + + + +### Spectrogram contours + +Spectrogram processing tasks honor `DJANGO_BATAI_SAVE_SPECTROGRAM_CONTOURS` environment variable. +Set to `False` by default so workers skip contour extraction (less DB storage space); set to `True` if you need +contours in the UI (UI for contours currently disabled due to performance) diff --git a/bats_ai/core/tasks/nabat/tasks.py b/bats_ai/core/tasks/nabat/tasks.py index bf66fbda..9382dcfa 100644 --- a/bats_ai/core/tasks/nabat/tasks.py +++ b/bats_ai/core/tasks/nabat/tasks.py @@ -5,6 +5,7 @@ import tempfile from typing import TYPE_CHECKING +from django.conf import settings from django.contrib.gis.geos import LineString, Point, Polygon import requests @@ -63,7 +64,8 @@ def generate_spectrograms( nabat_recording, spectrogram, compressed ) segment_index_map = {} - for segment in compressed["contours"]["segments"]: + contour_segments = compressed.get("contours", {}).get("segments", []) + for segment in contour_segments: pulse_metadata_obj, _ = PulseMetadata.objects.get_or_create( recording=compressed_obj.recording, index=segment["segment_index"], @@ -83,16 +85,19 @@ def generate_spectrograms( segment_index_map[segment["segment_index"]] = pulse_metadata_obj for segment in compressed["segments"]: if segment["segment_index"] not in segment_index_map: + defaults = { + "curve": LineString([Point(x[1], x[0]) for x in segment["curve_hz_ms"]]), + "char_freq": Point(segment["char_freq_ms"], segment["char_freq_hz"]), + "knee": Point(segment["knee_ms"], segment["knee_hz"]), + "heel": Point(segment["heel_ms"], segment["heel_hz"]), + "slopes": segment.get("slopes"), + } + if not settings.BATAI_SAVE_SPECTROGRAM_CONTOURS: + defaults["contours"] = [] PulseMetadata.objects.update_or_create( recording=compressed_obj.recording, index=segment["segment_index"], - defaults={ - "curve": LineString([Point(x[1], x[0]) for x in segment["curve_hz_ms"]]), - "char_freq": Point(segment["char_freq_ms"], segment["char_freq_hz"]), - "knee": Point(segment["knee_ms"], segment["knee_hz"]), - "heel": Point(segment["heel_ms"], segment["heel_hz"]), - "slopes": segment.get("slopes"), - }, + defaults=defaults, ) else: pulse_metadata_obj = segment_index_map[segment["segment_index"]] @@ -107,6 +112,8 @@ def generate_spectrograms( slopes = segment.get("slopes") if slopes: pulse_metadata_obj.slopes = slopes + if not settings.BATAI_SAVE_SPECTROGRAM_CONTOURS: + pulse_metadata_obj.contours = [] pulse_metadata_obj.save() processing_task.status = ProcessingTask.Status.COMPLETE diff --git a/bats_ai/core/tasks/tasks.py b/bats_ai/core/tasks/tasks.py index f5e4b9ed..58df55d3 100644 --- a/bats_ai/core/tasks/tasks.py +++ b/bats_ai/core/tasks/tasks.py @@ -7,6 +7,7 @@ from django.contrib.contenttypes.models import ContentType from django.contrib.gis.geos import LineString, Point, Polygon +from django.conf import settings from django.core.files import File from bats_ai.celery import app @@ -149,7 +150,8 @@ def recording_compute_spectrogram(self, recording_id: int): ) # Create SpectrogramContour objects for each segment segment_index_map = {} - for segment in compressed["contours"]["segments"]: + contour_segments = compressed.get("contours", {}).get("segments", []) + for segment in contour_segments: pulse_metadata_obj, _ = PulseMetadata.objects.update_or_create( recording=compressed_obj.recording, index=segment["segment_index"], @@ -169,18 +171,21 @@ def recording_compute_spectrogram(self, recording_id: int): segment_index_map[segment["segment_index"]] = pulse_metadata_obj for segment in compressed["segments"]: if segment["segment_index"] not in segment_index_map: + defaults = { + "curve": LineString( + [Point(x[1], x[0]) for x in segment["curve_hz_ms"]] + ), + "char_freq": Point(segment["char_freq_ms"], segment["char_freq_hz"]), + "knee": Point(segment["knee_ms"], segment["knee_hz"]), + "heel": Point(segment["heel_ms"], segment["heel_hz"]), + "slopes": segment.get("slopes"), + } + if not settings.BATAI_SAVE_SPECTROGRAM_CONTOURS: + defaults["contours"] = [] PulseMetadata.objects.update_or_create( recording=compressed_obj.recording, index=segment["segment_index"], - defaults={ - "curve": LineString( - [Point(x[1], x[0]) for x in segment["curve_hz_ms"]] - ), - "char_freq": Point(segment["char_freq_ms"], segment["char_freq_hz"]), - "knee": Point(segment["knee_ms"], segment["knee_hz"]), - "heel": Point(segment["heel_ms"], segment["heel_hz"]), - "slopes": segment.get("slopes"), - }, + defaults=defaults, ) else: pulse_metadata_obj = segment_index_map[segment["segment_index"]] @@ -195,6 +200,8 @@ def recording_compute_spectrogram(self, recording_id: int): slopes = segment.get("slopes") if slopes: pulse_metadata_obj.slopes = slopes + if not settings.BATAI_SAVE_SPECTROGRAM_CONTOURS: + pulse_metadata_obj.contours = [] pulse_metadata_obj.save() if processing_task: diff --git a/bats_ai/core/utils/batbot_metadata.py b/bats_ai/core/utils/batbot_metadata.py index 7d71121a..c6ea2536 100644 --- a/bats_ai/core/utils/batbot_metadata.py +++ b/bats_ai/core/utils/batbot_metadata.py @@ -14,6 +14,7 @@ "Spectrogram generation requires additional dependencies specified by the [tasks] extra." ) from exc +from django.conf import settings from pydantic import BaseModel, ConfigDict, Field, field_validator from .contour_utils import process_spectrogram_assets_for_contours @@ -398,7 +399,9 @@ def generate_spectrogram_assets(recording_path: str, output_folder: str): }, } - contour_segments_data = process_spectrogram_assets_for_contours(result) - result["compressed"]["contours"] = contour_segments_data + if settings.BATAI_SAVE_SPECTROGRAM_CONTOURS: + result["compressed"]["contours"] = process_spectrogram_assets_for_contours(result) + else: + result["compressed"]["contours"] = {"segments": [], "total_segments": 0} return result diff --git a/bats_ai/settings/base.py b/bats_ai/settings/base.py index 2250d7a4..f6f767ce 100644 --- a/bats_ai/settings/base.py +++ b/bats_ai/settings/base.py @@ -101,6 +101,10 @@ "DJANGO_BATAI_NABAT_API_URL", default="https://api.sciencebase.gov/nabat-graphql/graphql" ) +# DJANGO_BATAI_SAVE_SPECTROGRAM_CONTOURS: when false (default), spectrogram tasks skip contour +# extraction and store empty PulseMetadata.contours. +BATAI_SAVE_SPECTROGRAM_CONTOURS: bool = env.bool("DJANGO_BATAI_SAVE_SPECTROGRAM_CONTOURS", default=False) + # Django's docs suggest that STATIC_URL should be a relative path, # for convenience serving a site on a subpath. STATIC_URL = "static/"