diff --git a/infrastructure/terraform/modules/sqs/README.md b/infrastructure/terraform/modules/sqs/README.md index 12a2f40..61bb29c 100644 --- a/infrastructure/terraform/modules/sqs/README.md +++ b/infrastructure/terraform/modules/sqs/README.md @@ -18,7 +18,10 @@ | [create\_dlq](#input\_create\_dlq) | Create a DLQ | `bool` | `false` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [delay\_seconds](#input\_delay\_seconds) | Time in seconds that the delivery of all messages in the queue will be delayed. An integer from 0 to 900 (15 minutes). | `number` | `0` | no | +| [dlq\_alarm\_config](#input\_dlq\_alarm\_config) | Object of optional CloudWatch alarm settings for the DLQ messages alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Sum")
threshold = optional(number, 0)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
})
| `{}` | no | | [dlq\_message\_retention\_seconds](#input\_dlq\_message\_retention\_seconds) | The number of seconds Amazon SQS retains a message on the DLQ. Integer representing seconds, from 60 (1 minute) to 1209600 (14 days) | `number` | `1209600` | no | +| [enable\_dlq\_alarm](#input\_enable\_dlq\_alarm) | Create a CloudWatch alarm when messages are visible in the DLQ | `bool` | `true` | no | +| [enable\_queue\_oldest\_message\_alarm](#input\_enable\_queue\_oldest\_message\_alarm) | Create a CloudWatch alarm when the oldest visible message age breaches the configured threshold on the main queue | `bool` | `true` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | | [fifo\_queue](#input\_fifo\_queue) | Boolean designating a FIFO queue | `bool` | `false` | no | | [kms\_data\_key\_reuse\_period\_seconds](#input\_kms\_data\_key\_reuse\_period\_seconds) | The length of time, in seconds, for which Amazon SQS can reuse a data key to encrypt or decrypt messages before calling AWS KMS again. An integer representing seconds, between 60 seconds (1 minute) and 86,400 seconds (24 hours) | `number` | `300` | no | @@ -27,6 +30,7 @@ | [message\_retention\_seconds](#input\_message\_retention\_seconds) | The number of seconds Amazon SQS retains a message. Integer representing seconds, from 60 (1 minute) to 1209600 (14 days) | `number` | `null` | no | | [name](#input\_name) | Name of the SQS Queue | `string` | n/a | yes | | [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes | +| [queue\_oldest\_message\_alarm\_config](#input\_queue\_oldest\_message\_alarm\_config) | Object of optional CloudWatch alarm settings for the main queue oldest message age alarm |
object({
comparison_operator = optional(string, "GreaterThanThreshold")
evaluation_periods = optional(number, 1)
period = optional(number, 300)
statistic = optional(string, "Maximum")
threshold = optional(number, 300)
actions_enabled = optional(bool, true)
treat_missing_data = optional(string, "notBreaching")
})
| `{}` | no | | [region](#input\_region) | The AWS Region | `string` | n/a | yes | | [sqs\_kms\_key\_arn](#input\_sqs\_kms\_key\_arn) | ARN of the KMS key to encrypt SQS queue messages | `string` | n/a | yes | | [sqs\_policy\_overload](#input\_sqs\_policy\_overload) | Optional additional policy to extend the SQS Resource Policy | `string` | `""` | no | diff --git a/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_dlq_messages.tf b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_dlq_messages.tf new file mode 100644 index 0000000..facee8a --- /dev/null +++ b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_dlq_messages.tf @@ -0,0 +1,21 @@ +resource "aws_cloudwatch_metric_alarm" "dlq_messages" { + count = var.create_dlq && var.enable_dlq_alarm ? 1 : 0 + + alarm_name = "${local.csi}-dlq-messages-alarm" + alarm_description = "RELIABILITY: Alarm for messages in the DLQ" + comparison_operator = var.dlq_alarm_config.comparison_operator + evaluation_periods = var.dlq_alarm_config.evaluation_periods + metric_name = "ApproximateNumberOfMessagesVisible" + namespace = "AWS/SQS" + period = var.dlq_alarm_config.period + statistic = var.dlq_alarm_config.statistic + threshold = var.dlq_alarm_config.threshold + actions_enabled = var.dlq_alarm_config.actions_enabled + treat_missing_data = var.dlq_alarm_config.treat_missing_data + + dimensions = { + QueueName = aws_sqs_queue.deadletter_queue[0].name + } + + tags = local.default_tags +} diff --git a/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_queue_oldest_message.tf b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_queue_oldest_message.tf new file mode 100644 index 0000000..c00b420 --- /dev/null +++ b/infrastructure/terraform/modules/sqs/cloudwatch_metric_alarm_queue_oldest_message.tf @@ -0,0 +1,21 @@ +resource "aws_cloudwatch_metric_alarm" "queue_oldest_message" { + count = var.enable_queue_oldest_message_alarm ? 1 : 0 + + alarm_name = "${local.csi}-queue-oldest-message-alarm" + alarm_description = "RELIABILITY: Alarm for old messages in the queue" + comparison_operator = var.queue_oldest_message_alarm_config.comparison_operator + evaluation_periods = var.queue_oldest_message_alarm_config.evaluation_periods + metric_name = "ApproximateAgeOfOldestMessage" + namespace = "AWS/SQS" + period = var.queue_oldest_message_alarm_config.period + statistic = var.queue_oldest_message_alarm_config.statistic + threshold = var.queue_oldest_message_alarm_config.threshold + actions_enabled = var.queue_oldest_message_alarm_config.actions_enabled + treat_missing_data = var.queue_oldest_message_alarm_config.treat_missing_data + + dimensions = { + QueueName = aws_sqs_queue.sqs_queue.name + } + + tags = local.default_tags +} diff --git a/infrastructure/terraform/modules/sqs/variables.tf b/infrastructure/terraform/modules/sqs/variables.tf index f7ca0f6..8cfa8bc 100644 --- a/infrastructure/terraform/modules/sqs/variables.tf +++ b/infrastructure/terraform/modules/sqs/variables.tf @@ -117,6 +117,46 @@ variable "create_dlq" { default = false } +variable "enable_dlq_alarm" { + description = "Create a CloudWatch alarm when messages are visible in the DLQ" + type = bool + default = true +} + +variable "dlq_alarm_config" { + description = "Object of optional CloudWatch alarm settings for the DLQ messages alarm" + type = object({ + comparison_operator = optional(string, "GreaterThanThreshold") + evaluation_periods = optional(number, 1) + period = optional(number, 300) + statistic = optional(string, "Sum") + threshold = optional(number, 0) + actions_enabled = optional(bool, true) + treat_missing_data = optional(string, "notBreaching") + }) + default = {} +} + +variable "enable_queue_oldest_message_alarm" { + description = "Create a CloudWatch alarm when the oldest visible message age breaches the configured threshold on the main queue" + type = bool + default = true +} + +variable "queue_oldest_message_alarm_config" { + description = "Object of optional CloudWatch alarm settings for the main queue oldest message age alarm" + type = object({ + comparison_operator = optional(string, "GreaterThanThreshold") + evaluation_periods = optional(number, 1) + period = optional(number, 300) + statistic = optional(string, "Maximum") + threshold = optional(number, 300) + actions_enabled = optional(bool, true) + treat_missing_data = optional(string, "notBreaching") + }) + default = {} +} + variable "max_receive_count" { description = "The maximum number of times a message can be received before being sent to the DLQ" type = number