{
"metrics": [
{
"name": "RequestCount",
"unit": "Count",
"value": 1,
"dimensions": ["type", "statusCategory"],
"filter": { "type": ["http_request", "service_call", "page_view"] }
},
{
"name": "Latency",
"unit": "Milliseconds",
"value": "ms",
"dimensions": ["type"],
"filter": { "type": ["http_request", "service_call"] }
},
{
"name": "WorkerDemand",
"unit": "Count",
"value": "demand",
"dimensions": [],
"filter": { "type": ["worker_scaling"] }
},
{
"name": "WorkerCapacity",
"unit": "Count",
"value": "totalCapacity",
"dimensions": [],
"filter": { "type": ["worker_scaling"] }
},
{
"name": "WorkflowJobCount",
"unit": "Count",
"value": 1,
"dimensions": ["workflowEvent", "statusCategory"],
"filter": { "type": ["workflow_job"] }
},
{
"name": "WorkflowJobDuration",
"unit": "Milliseconds",
"value": "ms",
"dimensions": ["workflowEvent"],
"filter": { "type": ["workflow_job"] }
},
{
"name": "WorkflowStepCount",
"unit": "Count",
"value": 1,
"dimensions": ["workflowEvent", "statusCategory"],
"filter": { "type": ["workflow_step"] }
}
],
"dimensions": {
"type": {
"field": "type",
"buckets": {
"HTTP Request": "http_request",
"Service Call": "service_call",
"Page View": "page_view",
"Workflow Job": "workflow_job",
"Workflow Step": "workflow_step"
}
},
"statusCategory": {
"field": "status",
"buckets": {
"Success": [200, 399],
"Warning": [400, 499],
"Error": [500, 599]
}
},
"workflowEvent": {
"field": "event",
"buckets": {
"Started": "started",
"Completed": "completed",
"Failed": "failed",
"Retry": "retry"
}
}
},
"charts": [
{
"title": "Request Count by Status",
"metric": "RequestCount",
"stat": "Sum",
"chartType": "bar",
"splitBy": "statusCategory"
},
{
"title": "Request Count by Type",
"metric": "RequestCount",
"stat": "Sum",
"chartType": "line",
"splitBy": "type"
},
{
"title": "Error Rate",
"metric": "RequestCount",
"stat": "Sum",
"chartType": "line",
"splitBy": "statusCategory",
"color": "error",
"transform": {
"type": "percentage",
"numerator": ["Error"],
"denominator": ["Success", "Warning", "Error"]
}
},
{
"title": "Average Latency by Type",
"metric": "Latency",
"stat": "Average",
"chartType": "line",
"splitBy": "type"
},
{
"title": "P95 Latency by Type",
"metric": "Latency",
"stat": "p95",
"chartType": "line",
"splitBy": "type"
},
{
"title": "Max Latency by Type",
"metric": "Latency",
"stat": "Maximum",
"chartType": "line",
"splitBy": "type"
},
{
"title": "App CPU Utilization",
"metric": "CPUUtilization",
"stat": "Average",
"chartType": "line",
"unit": "Percent",
"tab": "infrastructure",
"namespace": "AWS/ECS",
"fixedDimensions": { "ClusterName": "${CLUSTER_NAME}", "ServiceName": "${APP_SERVICE_NAME}" }
},
{
"title": "App Memory Utilization",
"metric": "MemoryUtilization",
"stat": "Average",
"chartType": "line",
"unit": "Percent",
"tab": "infrastructure",
"namespace": "AWS/ECS",
"fixedDimensions": { "ClusterName": "${CLUSTER_NAME}", "ServiceName": "${APP_SERVICE_NAME}" }
},
{
"title": "App Task Count",
"metric": "RunningTaskCount",
"stat": "Average",
"chartType": "line",
"unit": "Count",
"tab": "infrastructure",
"namespace": "ECS/ContainerInsights",
"fixedDimensions": { "ClusterName": "${CLUSTER_NAME}", "ServiceName": "${APP_SERVICE_NAME}" },
"overlayMetrics": [{ "metric": "DesiredTaskCount", "label": "Desired" }],
"seriesStyles": { "Desired": { "borderDash": [6, 3] } }
},
{
"title": "Worker CPU Utilization",
"metric": "CPUUtilization",
"stat": "Average",
"chartType": "line",
"unit": "Percent",
"tab": "infrastructure",
"namespace": "AWS/ECS",
"fixedDimensions": { "ClusterName": "${CLUSTER_NAME}", "ServiceName": "${WORKER_SERVICE_NAME}" }
},
{
"title": "Worker Memory Utilization",
"metric": "MemoryUtilization",
"stat": "Average",
"chartType": "line",
"unit": "Percent",
"tab": "infrastructure",
"namespace": "AWS/ECS",
"fixedDimensions": { "ClusterName": "${CLUSTER_NAME}", "ServiceName": "${WORKER_SERVICE_NAME}" }
},
{
"title": "Worker Task Count",
"metric": "RunningTaskCount",
"stat": "Average",
"chartType": "line",
"unit": "Count",
"tab": "infrastructure",
"namespace": "ECS/ContainerInsights",
"fixedDimensions": { "ClusterName": "${CLUSTER_NAME}", "ServiceName": "${WORKER_SERVICE_NAME}" },
"overlayMetrics": [{ "metric": "DesiredTaskCount", "label": "Desired" }],
"seriesStyles": { "Desired": { "borderDash": [6, 3] } }
},
{
"title": "Worker Demand vs Capacity",
"metric": "WorkerDemand",
"stat": "Maximum",
"chartType": "line",
"unit": "Count",
"tab": "infrastructure",
"overlayMetrics": [{ "metric": "WorkerCapacity", "label": "Capacity" }],
"seriesStyles": { "Capacity": { "borderDash": [6, 3] } }
},
{
"title": "Workflow Jobs by Status",
"metric": "WorkflowJobCount",
"stat": "Sum",
"chartType": "bar",
"splitBy": "statusCategory",
"tab": "workflows"
},
{
"title": "Workflow Jobs by Event",
"metric": "WorkflowJobCount",
"stat": "Sum",
"chartType": "line",
"splitBy": "workflowEvent",
"tab": "workflows"
},
{
"title": "Workflow Job Failure Rate",
"metric": "WorkflowJobCount",
"stat": "Sum",
"chartType": "line",
"splitBy": "statusCategory",
"color": "error",
"tab": "workflows",
"transform": {
"type": "percentage",
"numerator": ["Error"],
"denominator": ["Success", "Error"]
}
},
{
"title": "Workflow Job Duration (P95)",
"metric": "WorkflowJobDuration",
"stat": "p95",
"chartType": "line",
"splitBy": "workflowEvent",
"tab": "workflows"
},
{
"title": "Workflow Steps by Status",
"metric": "WorkflowStepCount",
"stat": "Sum",
"chartType": "bar",
"splitBy": "statusCategory",
"tab": "workflows"
},
{
"title": "Workflow Steps by Event",
"metric": "WorkflowStepCount",
"stat": "Sum",
"chartType": "line",
"splitBy": "workflowEvent",
"tab": "workflows"
}
]
}