From 31d4b4b81c258cd4d3aa3aeebc9bc4f9810e1285 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 5 May 2025 19:20:01 -0400 Subject: [PATCH] Add some useful promql queries. --- google_cloud_logging_queries/kubernetes/oom | 1 + .../get_requests_for_engines | 1 + .../over_provisioned/cloud_run_job/notes.org | 21 +++++++++++++++++++ metrics_explorer/pod_memory_limit/promql | 1 + 4 files changed, 24 insertions(+) create mode 100644 google_cloud_logging_queries/kubernetes/oom create mode 100644 google_cloud_logging_queries/vertex_ai_agent_builder/get_requests_for_engines create mode 100644 metrics_explorer/over_provisioned/cloud_run_job/notes.org create mode 100644 metrics_explorer/pod_memory_limit/promql diff --git a/google_cloud_logging_queries/kubernetes/oom b/google_cloud_logging_queries/kubernetes/oom new file mode 100644 index 0000000..47f9465 --- /dev/null +++ b/google_cloud_logging_queries/kubernetes/oom @@ -0,0 +1 @@ +"OOMKilling" diff --git a/google_cloud_logging_queries/vertex_ai_agent_builder/get_requests_for_engines b/google_cloud_logging_queries/vertex_ai_agent_builder/get_requests_for_engines new file mode 100644 index 0000000..9f94998 --- /dev/null +++ b/google_cloud_logging_queries/vertex_ai_agent_builder/get_requests_for_engines @@ -0,0 +1 @@ +textPayload=~"HTTP Request: POST https://us-discoveryengine.googleapis.com/[^\s/]+/projects/[^\s/]+/locations/us/collections/[^\s/]+/engines/[^\s/]+/" diff --git a/metrics_explorer/over_provisioned/cloud_run_job/notes.org b/metrics_explorer/over_provisioned/cloud_run_job/notes.org new file mode 100644 index 0000000..882bd7c --- /dev/null +++ b/metrics_explorer/over_provisioned/cloud_run_job/notes.org @@ -0,0 +1,21 @@ +* Metrics Explorer PromQL Queries +** Cloud Run Job Memory Utilization +Line is a percent of memory usage. +#+begin_src text + histogram_quantile(0.99,sum by (job_name,le)(increase(run_googleapis_com:container_memory_utilizations_bucket{monitored_resource="cloud_run_job"}[${__interval}]))) +#+end_src +** Memory usage bytes +#+begin_src text + histogram_quantile(0.50,sum_over_time(run_googleapis_com:container_memory_usage_bucket{monitored_resource="cloud_run_job"}[${__interval}])) +#+end_src +** Memory allocation +#+begin_src text + sum by (job_name)(rate(run_googleapis_com:container_memory_allocation_time{monitored_resource="cloud_run_job"}[${__interval}])) +#+end_src +** Difference +#+begin_src text + sum by (job_name)( + rate(run_googleapis_com:container_memory_allocation_time{monitored_resource="cloud_run_job"}[${__interval}]) + - rate(run_googleapis_com:container_memory_usage_bucket{monitored_resource="cloud_run_job"}[${__interval}]) + ) +#+end_src diff --git a/metrics_explorer/pod_memory_limit/promql b/metrics_explorer/pod_memory_limit/promql new file mode 100644 index 0000000..0c8f9d0 --- /dev/null +++ b/metrics_explorer/pod_memory_limit/promql @@ -0,0 +1 @@ +sum by (pod_name)(avg_over_time(kubernetes_io:container_memory_limit_bytes{monitored_resource="k8s_container",container_name=~".*airflow-worker.*"}[${__interval}]))