diff --git a/infrastructure/terraform/modules/sqs/README.md b/infrastructure/terraform/modules/sqs/README.md index 12a2f40..61bb29c 100644 --- a/infrastructure/terraform/modules/sqs/README.md +++ b/infrastructure/terraform/modules/sqs/README.md @@ -18,7 +18,10 @@ | [create\_dlq](#input\_create\_dlq) | Create a DLQ | `bool` | `false` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [delay\_seconds](#input\_delay\_seconds) | Time in seconds that the delivery of all messages in the queue will be delayed. An integer from 0 to 900 (15 minutes). | `number` | `0` | no | +| [dlq\_alarm\_config](#input\_dlq\_alarm\_config) | Object of optional CloudWatch alarm settings for the DLQ messages alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Sum")
threshold = optional(number, 0)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
}) | `{}` | no |
| [dlq\_message\_retention\_seconds](#input\_dlq\_message\_retention\_seconds) | The number of seconds Amazon SQS retains a message on the DLQ. Integer representing seconds, from 60 (1 minute) to 1209600 (14 days) | `number` | `1209600` | no |
+| [enable\_dlq\_alarm](#input\_enable\_dlq\_alarm) | Create a CloudWatch alarm when messages are visible in the DLQ | `bool` | `true` | no |
+| [enable\_queue\_oldest\_message\_alarm](#input\_enable\_queue\_oldest\_message\_alarm) | Create a CloudWatch alarm when the oldest visible message age breaches the configured threshold on the main queue | `bool` | `true` | no |
| [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes |
| [fifo\_queue](#input\_fifo\_queue) | Boolean designating a FIFO queue | `bool` | `false` | no |
| [kms\_data\_key\_reuse\_period\_seconds](#input\_kms\_data\_key\_reuse\_period\_seconds) | The length of time, in seconds, for which Amazon SQS can reuse a data key to encrypt or decrypt messages before calling AWS KMS again. An integer representing seconds, between 60 seconds (1 minute) and 86,400 seconds (24 hours) | `number` | `300` | no |
@@ -27,6 +30,7 @@
| [message\_retention\_seconds](#input\_message\_retention\_seconds) | The number of seconds Amazon SQS retains a message. Integer representing seconds, from 60 (1 minute) to 1209600 (14 days) | `number` | `null` | no |
| [name](#input\_name) | Name of the SQS Queue | `string` | n/a | yes |
| [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes |
+| [queue\_oldest\_message\_alarm\_config](#input\_queue\_oldest\_message\_alarm\_config) | Object of optional CloudWatch alarm settings for the main queue oldest message age alarm | object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Maximum")
threshold = optional(number, 300)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
}) | `{}` | no |
| [region](#input\_region) | The AWS Region | `string` | n/a | yes |
| [sqs\_kms\_key\_arn](#input\_sqs\_kms\_key\_arn) | ARN of the KMS key to encrypt SQS queue messages | `string` | n/a | yes |
| [sqs\_policy\_overload](#input\_sqs\_policy\_overload) | Optional additional policy to extend the SQS Resource Policy | `string` | `""` | no |
diff --git a/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_dlq_messages.tf b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_dlq_messages.tf
new file mode 100644
index 0000000..facee8a
--- /dev/null
+++ b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_dlq_messages.tf
@@ -0,0 +1,21 @@
+resource "aws_cloudwatch_metric_alarm" "dlq_messages" {
+ count = var.create_dlq && var.enable_dlq_alarm ? 1 : 0
+
+ alarm_name = "${local.csi}-dlq-messages-alarm"
+ alarm_description = "RELIABILITY: Alarm for messages in the DLQ"
+ comparison_operator = var.dlq_alarm_config.comparison_operator
+ evaluation_periods = var.dlq_alarm_config.evaluation_periods
+ metric_name = "ApproximateNumberOfMessagesVisible"
+ namespace = "AWS/SQS"
+ period = var.dlq_alarm_config.period
+ statistic = var.dlq_alarm_config.statistic
+ threshold = var.dlq_alarm_config.threshold
+ actions_enabled = var.dlq_alarm_config.actions_enabled
+ treat_missing_data = var.dlq_alarm_config.treat_missing_data
+
+ dimensions = {
+ QueueName = aws_sqs_queue.deadletter_queue[0].name
+ }
+
+ tags = local.default_tags
+}
diff --git a/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_queue_oldest_message.tf b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_queue_oldest_message.tf
new file mode 100644
index 0000000..c00b420
--- /dev/null
+++ b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_queue_oldest_message.tf
@@ -0,0 +1,21 @@
+resource "aws_cloudwatch_metric_alarm" "queue_oldest_message" {
+ count = var.enable_queue_oldest_message_alarm ? 1 : 0
+
+ alarm_name = "${local.csi}-queue-oldest-message-alarm"
+ alarm_description = "RELIABILITY: Alarm for old messages in the queue"
+ comparison_operator = var.queue_oldest_message_alarm_config.comparison_operator
+ evaluation_periods = var.queue_oldest_message_alarm_config.evaluation_periods
+ metric_name = "ApproximateAgeOfOldestMessage"
+ namespace = "AWS/SQS"
+ period = var.queue_oldest_message_alarm_config.period
+ statistic = var.queue_oldest_message_alarm_config.statistic
+ threshold = var.queue_oldest_message_alarm_config.threshold
+ actions_enabled = var.queue_oldest_message_alarm_config.actions_enabled
+ treat_missing_data = var.queue_oldest_message_alarm_config.treat_missing_data
+
+ dimensions = {
+ QueueName = aws_sqs_queue.sqs_queue.name
+ }
+
+ tags = local.default_tags
+}
diff --git a/infrastructure/terraform/modules/sqs/variables.tf b/infrastructure/terraform/modules/sqs/variables.tf
index f7ca0f6..8cfa8bc 100644
--- a/infrastructure/terraform/modules/sqs/variables.tf
+++ b/infrastructure/terraform/modules/sqs/variables.tf
@@ -117,6 +117,46 @@ variable "create_dlq" {
default = false
}
+variable "enable_dlq_alarm" {
+ description = "Create a CloudWatch alarm when messages are visible in the DLQ"
+ type = bool
+ default = true
+}
+
+variable "dlq_alarm_config" {
+ description = "Object of optional CloudWatch alarm settings for the DLQ messages alarm"
+ type = object({
+ comparison_operator = optional(string, "GreaterThanThreshold")
+ evaluation_periods = optional(number, 1)
+ period = optional(number, 300)
+ statistic = optional(string, "Sum")
+ threshold = optional(number, 0)
+ actions_enabled = optional(bool, true)
+ treat_missing_data = optional(string, "notBreaching")
+ })
+ default = {}
+}
+
+variable "enable_queue_oldest_message_alarm" {
+ description = "Create a CloudWatch alarm when the oldest visible message age breaches the configured threshold on the main queue"
+ type = bool
+ default = true
+}
+
+variable "queue_oldest_message_alarm_config" {
+ description = "Object of optional CloudWatch alarm settings for the main queue oldest message age alarm"
+ type = object({
+ comparison_operator = optional(string, "GreaterThanThreshold")
+ evaluation_periods = optional(number, 1)
+ period = optional(number, 300)
+ statistic = optional(string, "Maximum")
+ threshold = optional(number, 300)
+ actions_enabled = optional(bool, true)
+ treat_missing_data = optional(string, "notBreaching")
+ })
+ default = {}
+}
+
variable "max_receive_count" {
description = "The maximum number of times a message can be received before being sent to the DLQ"
type = number