diff --git a/server/api/views/uploadFile/test_title.py b/server/api/views/uploadFile/test_title.py index 69979620..0ec9e1bc 100644 --- a/server/api/views/uploadFile/test_title.py +++ b/server/api/views/uploadFile/test_title.py @@ -53,11 +53,11 @@ def test_falls_back_to_first_page_text_if_metadata_title_does_not_match_regex(se expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia" self.assertEqual(expected_title, title.generate_title(doc)) - @patch("api.services.openai_services.openAIServices.openAI") + @patch("api.views.uploadFile.title.openAIServices.openAI") def test_falls_back_to_chatgpt_if_no_title_found(self, mock_openAI): doc = MagicMock() doc.metadata = {"title": None} - doc.get_text.return_value = [] + doc[0].get_text.return_value = [] mock_response = MagicMock() mock_response.choices = [MagicMock()] @@ -67,3 +67,34 @@ def test_falls_back_to_chatgpt_if_no_title_found(self, mock_openAI): title.generate_title(doc) self.assertTrue(mock_openAI.called) + + @patch("api.views.uploadFile.title.openAIServices.openAI") + def test_strips_quotes_from_openai_title(self, mock_openAI): + doc = MagicMock() + doc.metadata = {"title": None} + doc[0].get_text.return_value = [] + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = '"Updated CANMAT/ISBD Guidelines for Treating Mixed Features in Bipolar Disorder"' + mock_openAI.return_value = mock_response + + result = title.generate_title(doc) + + self.assertEqual(result, "Updated CANMAT/ISBD Guidelines for Treating Mixed Features in Bipolar Disorder") + + @patch("api.views.uploadFile.title.openAIServices.openAI") + def test_truncates_long_openai_title(self, mock_openAI): + doc = MagicMock() + doc.metadata = {"title": None} + doc[0].get_text.return_value = [] + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "A" * 300 + mock_openAI.return_value = mock_response + + result = title.generate_title(doc) + + # Ensure the title is truncated to fit the UploadFile model's title field (max_length=255), since OpenAI responses may exceed this limit + self.assertLessEqual(len(result), 255) diff --git a/server/api/views/uploadFile/title.py b/server/api/views/uploadFile/title.py index 06e0ce0c..17f52a74 100644 --- a/server/api/views/uploadFile/title.py +++ b/server/api/views/uploadFile/title.py @@ -58,4 +58,6 @@ def summarize_pdf(pdf: fitz.Document) -> str: prompt = "Please provide a title for this document. The title should be less than 256 characters and will be displayed on a webpage." response = openAIServices.openAI( first_page_content, prompt, model='gpt-4o', temp=0.0) - return response.choices[0].message.content + title = response.choices[0].message.content.strip().strip('"').strip("'") + # Truncate to fit UploadFile model's max_length=255 title field as a final safeguard + return title[:255] diff --git a/server/api/views/uploadFile/views.py b/server/api/views/uploadFile/views.py index 69dfb996..58bd8752 100644 --- a/server/api/views/uploadFile/views.py +++ b/server/api/views/uploadFile/views.py @@ -12,6 +12,9 @@ import fitz from django.db import transaction from .title import generate_title +import logging + +logger = logging.getLogger(__name__) class UploadFileView(APIView): @@ -124,6 +127,7 @@ def post(self, request, format=None): ) except Exception as e: # Handle potential errors + logger.exception("File upload failed for '%s': %s", uploaded_file.name, e) return Response({"message": f"Error processing file and embeddings: {str(e)}"}, status=status.HTTP_400_BAD_REQUEST)