infra_snippets/docker/scripts/graph_docker_memory.py

#!/usr/bin/env python
from __future__ import annotations

import json
import logging
import re
import subprocess
from dataclasses import dataclass
from datetime import datetime, timedelta
from time import sleep
from typing import Collection, Final, NewType, Tuple

ContainerId = NewType("ContainerId", str)
ContainerName = NewType("ContainerName", str)

SAMPLE_INTERVAL_SECONDS: Final[int] = 2


@dataclass
class Sample:
    instant: datetime
    stats: dict[ContainerId, Stats]


@dataclass
class Stats:
    memory_usage_bytes: int


def main():
    logging.basicConfig(level=logging.INFO)
    samples: list[Sample] = []
    labels: dict[ContainerId, ContainerName] = {}
    first_pass = True
    # First wait for any docker container to exist.
    while True:
        sample, labels_in_sample = take_sample()
        if labels_in_sample:
            break
        if first_pass:
            first_pass = False
            logging.info("Waiting for a docker container to exist to start recording.")
        sleep(1)
    # And then record memory until no containers exist.
    while True:
        sample, labels_in_sample = take_sample()
        if not labels_in_sample:
            break
        samples.append(sample)
        labels = {**labels, **labels_in_sample}
        sleep(SAMPLE_INTERVAL_SECONDS)
    if labels:
        # Draws a red horizontal line at 32 GiB since that is the memory limit for cloud run.
        write_plot(
            samples,
            labels,
            horizontal_lines=[(32 * 1024**3, "red", "Cloud Run Max Memory")],
        )


def write_plot(
    samples: Collection[Sample],
    labels: dict[ContainerId, ContainerName],
    *,
    horizontal_lines: Collection[Tuple[int, str, str | None]] = [],
):
    starting_time_per_container = {
        container_id: min(
            (sample.instant for sample in samples if container_id in sample.stats)
        )
        for container_id in labels.keys()
    }
    print(
        """set terminal svg background '#FFFFFF'
set title 'Docker Memory Usage'
set xdata time
set timefmt '%s'
set format x '%tH:%tM:%tS'
# Please note this is in SI units (base 10), not IEC (base 2). So, for example, this would show a Gigabyte, not a Gibibyte.
set format y '%.0s%cB'
set datafile separator "|"
"""
    )
    for y_value, color, label in horizontal_lines:
        print(
            f'''set arrow from graph 0, first {y_value} to graph 1, first {y_value} nohead linewidth 2 linecolor rgb "{color}"'''
        )
        if label is not None:
            print(f"""set label "{label}" at graph 0, first {y_value} offset 1,-0.5""")

    # Include the horizontal lines in the range
    if len(horizontal_lines) > 0:
        print(f"""set yrange [*:{max(x[0] for x in horizontal_lines)}<*]""")
    line_definitions = ", ".join(
        [
            f""""-" using 1:2 title '{gnuplot_escape(name)}' with lines"""
            for container_id, name in sorted(labels.items())
        ]
    )
    print("plot", line_definitions)
    for container_id in sorted(labels.keys()):
        start_time = int(starting_time_per_container[container_id].timestamp())
        for sample in sorted(samples, key=lambda x: x.instant):
            if container_id in sample.stats:
                print(
                    "|".join(
                        [
                            str(int((sample.instant).timestamp()) - start_time),
                            str(sample.stats[container_id].memory_usage_bytes),
                        ]
                    )
                )
        print("e")


def gnuplot_escape(inp: str) -> str:
    out = ""
    for c in inp:
        if c == "_":
            out += "\\"
        out += c
    return out


def take_sample() -> Tuple[Sample, dict[ContainerId, ContainerName]]:
    labels: dict[ContainerId, ContainerName] = {}
    stats: dict[ContainerId, Stats] = {}
    docker_inspect = subprocess.run(
        ["docker", "stats", "--no-stream", "--no-trunc", "--format", "json"],
        stdout=subprocess.PIPE,
    )
    for container_stat in (
        json.loads(l) for l in docker_inspect.stdout.decode("utf8").splitlines()
    ):
        if not container_stat["ID"]:
            # When containers are starting up, they sometimes have no ID and "--" as the name.
            continue
        labels[ContainerId(container_stat["ID"])] = ContainerName(
            container_stat["Name"]
        )
        memory_usage = parse_mem_usage(container_stat["MemUsage"])
        stats[ContainerId(container_stat["ID"])] = Stats(
            memory_usage_bytes=memory_usage
        )
    for container_id, container_stat in stats.items():
        logging.info(
            f"Recorded stat {labels[container_id]}: {container_stat.memory_usage_bytes} bytes"
        )
    return Sample(instant=datetime.now(), stats=stats), labels


def parse_mem_usage(mem_usage: str) -> int:
    parsed_mem_usage = re.match(
        r"(?P<number>[0-9]+\.?[0-9]*)(?P<unit>[^\s]+)", mem_usage
    )
    if parsed_mem_usage is None:
        raise Exception(f"Invalid Mem Usage: {mem_usage}")
    number = float(parsed_mem_usage.group("number"))
    unit = parsed_mem_usage.group("unit")
    for multiplier, identifier in enumerate(["B", "KiB", "MiB", "GiB", "TiB"]):
        if unit == identifier:
            return int(number * (1024**multiplier))
    raise Exception(f"Unrecognized unit: {unit}")


if __name__ == "__main__":
    main()
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`#!/usr/bin/env python`
			`from __future__ import annotations`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`import json`
			`import logging`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`import re`
			`import subprocess`
			`from dataclasses import dataclass`
			`from datetime import datetime, timedelta`
			`from time import sleep`
			`from typing import Collection, Final, NewType, Tuple`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00
			`ContainerId = NewType("ContainerId", str)`
			`ContainerName = NewType("ContainerName", str)`

			`SAMPLE_INTERVAL_SECONDS: Final[int] = 2`


			`@dataclass`
			`class Sample:`
			`instant: datetime`
			`stats: dict[ContainerId, Stats]`


			`@dataclass`
			`class Stats:`
			`memory_usage_bytes: int`


			`def main():`
			`logging.basicConfig(level=logging.INFO)`
			`samples: list[Sample] = []`
			`labels: dict[ContainerId, ContainerName] = {}`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`first_pass = True`
Wait for docker containers to exist before starting recording. This enables us to capture the full life of the container since the script can now be kicked off before the container is launched. 2024-10-18 12:32:02 +00:00			`# First wait for any docker container to exist.`
			`while True:`
			`sample, labels_in_sample = take_sample()`
			`if labels_in_sample:`
			`break`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`if first_pass:`
			`first_pass = False`
			`logging.info("Waiting for a docker container to exist to start recording.")`
			`sleep(1)`
Wait for docker containers to exist before starting recording. This enables us to capture the full life of the container since the script can now be kicked off before the container is launched. 2024-10-18 12:32:02 +00:00			`# And then record memory until no containers exist.`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`while True:`
			`sample, labels_in_sample = take_sample()`
			`if not labels_in_sample:`
			`break`
			`samples.append(sample)`
			`labels = {labels, labels_in_sample}`
			`sleep(SAMPLE_INTERVAL_SECONDS)`
			`if labels:`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`# Draws a red horizontal line at 32 GiB since that is the memory limit for cloud run.`
			`write_plot(`
			`samples,`
			`labels,`
			`horizontal_lines=[(32 * 1024**3, "red", "Cloud Run Max Memory")],`
			`)`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00

Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`def write_plot(`
			`samples: Collection[Sample],`
			`labels: dict[ContainerId, ContainerName],`
			`*,`
			`horizontal_lines: Collection[Tuple[int, str, str \| None]] = [],`
			`):`
			`starting_time_per_container = {`
			`container_id: min(`
			`(sample.instant for sample in samples if container_id in sample.stats)`
			`)`
			`for container_id in labels.keys()`
			`}`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`print(`
			`"""set terminal svg background '#FFFFFF'`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`set title 'Docker Memory Usage'`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`set xdata time`
			`set timefmt '%s'`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`set format x '%tH:%tM:%tS'`
			`# Please note this is in SI units (base 10), not IEC (base 2). So, for example, this would show a Gigabyte, not a Gibibyte.`
			`set format y '%.0s%cB'`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`set datafile separator "\|"`
			`"""`
			`)`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`for y_value, color, label in horizontal_lines:`
			`print(`
			`f'''set arrow from graph 0, first {y_value} to graph 1, first {y_value} nohead linewidth 2 linecolor rgb "{color}"'''`
			`)`
			`if label is not None:`
			`print(f"""set label "{label}" at graph 0, first {y_value} offset 1,-0.5""")`

			`# Include the horizontal lines in the range`
			`if len(horizontal_lines) > 0:`
			`print(f"""set yrange [:{max(x[0] for x in horizontal_lines)}<]""")`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`line_definitions = ", ".join(`
			`[`
			`f""""-" using 1:2 title '{gnuplot_escape(name)}' with lines"""`
			`for container_id, name in sorted(labels.items())`
			`]`
			`)`
			`print("plot", line_definitions)`
			`for container_id in sorted(labels.keys()):`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`start_time = int(starting_time_per_container[container_id].timestamp())`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`for sample in sorted(samples, key=lambda x: x.instant):`
			`if container_id in sample.stats:`
			`print(`
			`"\|".join(`
			`[`
Wait for docker container to start, draw horizontal lines, use 0-based start time for containers instead of real wall time, and add a graph title. 2024-10-18 21:22:54 +00:00			`str(int((sample.instant).timestamp()) - start_time),`
Add a script that graphs memory usage of docker containers over time. 2024-10-17 23:41:33 +00:00			`str(sample.stats[container_id].memory_usage_bytes),`
			`]`
			`)`
			`)`
			`print("e")`


			`def gnuplot_escape(inp: str) -> str:`
			`out = ""`
			`for c in inp:`
			`if c == "_":`
			`out += "\\"`
			`out += c`
			`return out`


			`def take_sample() -> Tuple[Sample, dict[ContainerId, ContainerName]]:`
			`labels: dict[ContainerId, ContainerName] = {}`
			`stats: dict[ContainerId, Stats] = {}`
			`docker_inspect = subprocess.run(`
			`["docker", "stats", "--no-stream", "--no-trunc", "--format", "json"],`
			`stdout=subprocess.PIPE,`
			`)`
			`for container_stat in (`
			`json.loads(l) for l in docker_inspect.stdout.decode("utf8").splitlines()`
			`):`
			`if not container_stat["ID"]:`
			`# When containers are starting up, they sometimes have no ID and "--" as the name.`
			`continue`
			`labels[ContainerId(container_stat["ID"])] = ContainerName(`
			`container_stat["Name"]`
			`)`
			`memory_usage = parse_mem_usage(container_stat["MemUsage"])`
			`stats[ContainerId(container_stat["ID"])] = Stats(`
			`memory_usage_bytes=memory_usage`
			`)`
			`for container_id, container_stat in stats.items():`
			`logging.info(`
			`f"Recorded stat {labels[container_id]}: {container_stat.memory_usage_bytes} bytes"`
			`)`
			`return Sample(instant=datetime.now(), stats=stats), labels`


			`def parse_mem_usage(mem_usage: str) -> int:`
			`parsed_mem_usage = re.match(`
			`r"(?P<number>[0-9]+\.?[0-9]*)(?P<unit>[^\s]+)", mem_usage`
			`)`
			`if parsed_mem_usage is None:`
			`raise Exception(f"Invalid Mem Usage: {mem_usage}")`
			`number = float(parsed_mem_usage.group("number"))`
			`unit = parsed_mem_usage.group("unit")`
			`for multiplier, identifier in enumerate(["B", "KiB", "MiB", "GiB", "TiB"]):`
			`if unit == identifier:`
			`return int(number * (1024**multiplier))`
			`raise Exception(f"Unrecognized unit: {unit}")`


			`if __name__ == "__main__":`
			`main()`