9.7 KiB
Graph Docker Container Memory Usage with Gnuplot
Sometimes it can be useful to build a graph of docker memory usage over time. For example, I was recently working on reducing the maximum memory of a long-running script. There certainly are heavy and complex options out there like setting up Prometheus and configuring docker to export metrics to it but I threw together a small python script, using only the python standard library, that outputs gnuplot code to render a graph.
Usage
Invoke the python script before starting any docker containers. Then, once a docker container is started, the script will start recording memory usage. Any additional docker containers that are started while the script is running will also get recorded. When no docker containers are left, the script will export gnuplot code over stdout that can then be rendered into a graph.
Each container will get its own line on the graph. All containers will have their start time aligned with the left-hand side of the graph as if they had started at the same time (so the X-axis it the number of seconds the docker container has been running, as opposed to the wall time).
If you'd like, you can insert a horizontal line at whatever memory quantity you'd like by uncommenting the
horizontal_lines
Example Invocation
$ ./graph_docker_memory.py | gnuplot > graph.svg
INFO:root:Waiting for a docker container to exist to start recording.
INFO:root:Recorded stat jovial_chandrasekhar: 528384 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 528384 bytes
INFO:root:Recorded stat exciting_bohr: 512000 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 516096 bytes
INFO:root:Recorded stat exciting_bohr: 512000 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 561152 bytes
INFO:root:Recorded stat exciting_bohr: 512000 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 561152 bytes
INFO:root:Recorded stat exciting_bohr: 4866441 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 561152 bytes
INFO:root:Recorded stat exciting_bohr: 3166699 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 561152 bytes
INFO:root:Recorded stat exciting_bohr: 3128950 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 8568963 bytes
INFO:root:Recorded stat exciting_bohr: 3128950 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 8528068 bytes
INFO:root:Recorded stat exciting_bohr: 3128950 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 8528068 bytes
INFO:root:Recorded stat exciting_bohr: 32547799 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 8528068 bytes
INFO:root:Recorded stat exciting_bohr: 4329570 bytes
INFO:root:Recorded stat jovial_chandrasekhar: 8528068 bytes
You can also throw
tee
./graph_docker_memory.py | tee graph.gnuplot | gnuplot > graph.svg
Output
The output from the above run would be:
And the gnuplot source:
set terminal svg background '#FFFFFF'
set title 'Docker Memory Usage'
set xdata time
set timefmt '%s'
set format x '%tH:%tM:%tS'
# Please note this is in SI units (base 10), not IEC (base 2). So, for example, this would show a Gigabyte, not a Gibibyte.
set format y '%.0s%cB'
set datafile separator "|"
plot "-" using 1:2 title 'exciting\_bohr' with lines, "-" using 1:2 title 'jovial\_chandrasekhar' with lines
0|512000
4|512000
9|512000
13|4866441
18|3166699
23|3128950
27|3128950
32|3128950
35|32547799
40|4329570
e
0|528384
5|516096
9|561152
14|561152
18|561152
23|561152
28|8568963
32|8528068
37|8528068
40|8528068
45|8528068
e
The script
#!/usr/bin/env python
from __future__ import annotations
import json
import logging
import re
import subprocess
from dataclasses import dataclass
from datetime import datetime
from time import sleep
from typing import Collection, Final, NewType, Tuple
ContainerId = NewType("ContainerId", str)
ContainerName = NewType("ContainerName", str)
SAMPLE_INTERVAL_SECONDS: Final[int] = 2
@dataclass
class Sample:
instant: datetime
stats: dict[ContainerId, Stats]
@dataclass
class Stats:
memory_usage_bytes: int
def main():
logging.basicConfig(level=logging.INFO)
samples: list[Sample] = []
labels: dict[ContainerId, ContainerName] = {}
first_pass = True
# First wait for any docker container to exist.
while True:
sample, labels_in_sample = take_sample()
if labels_in_sample:
break
if first_pass:
first_pass = False
logging.info("Waiting for a docker container to exist to start recording.")
sleep(1)
# And then record memory until no containers exist.
while True:
sample, labels_in_sample = take_sample()
if not labels_in_sample:
break
samples.append(sample)
labels = {**labels, **labels_in_sample}
sleep(SAMPLE_INTERVAL_SECONDS)
if labels:
# Draws a red horizontal line at 32 GiB since that is the memory limit for cloud run.
write_plot(
samples,
labels,
# horizontal_lines=[(32 * 1024**3, "red", "Cloud Run Max Memory")],
)
def write_plot(
samples: Collection[Sample],
labels: dict[ContainerId, ContainerName],
,*,
horizontal_lines: Collection[Tuple[int, str, str | None]] = [],
):
starting_time_per_container = {
container_id: min(
(sample.instant for sample in samples if container_id in sample.stats)
)
for container_id in labels.keys()
}
print(
"""set terminal svg background '#FFFFFF'
set title 'Docker Memory Usage'
set xdata time
set timefmt '%s'
set format x '%tH:%tM:%tS'
# Please note this is in SI units (base 10), not IEC (base 2). So, for example, this would show a Gigabyte, not a Gibibyte.
set format y '%.0s%cB'
set datafile separator "|"
"""
)
for y_value, color, label in horizontal_lines:
print(
f'''set arrow from graph 0, first {y_value} to graph 1, first {y_value} nohead linewidth 2 linecolor rgb "{color}"'''
)
if label is not None:
print(f"""set label "{label}" at graph 0, first {y_value} offset 1,-0.5""")
# Include the horizontal lines in the range
if len(horizontal_lines) > 0:
print(f"""set yrange [*:{max(x[0] for x in horizontal_lines)}<*]""")
line_definitions = ", ".join(
[
f""""-" using 1:2 title '{gnuplot_escape(name)}' with lines"""
for container_id, name in sorted(labels.items())
]
)
print("plot", line_definitions)
for container_id in sorted(labels.keys()):
start_time = int(starting_time_per_container[container_id].timestamp())
for sample in sorted(samples, key=lambda x: x.instant):
if container_id in sample.stats:
print(
"|".join(
[
str(int((sample.instant).timestamp()) - start_time),
str(sample.stats[container_id].memory_usage_bytes),
]
)
)
print("e")
def gnuplot_escape(inp: str) -> str:
out = ""
for c in inp:
if c == "_":
out += "\\"
out += c
return out
def take_sample() -> Tuple[Sample, dict[ContainerId, ContainerName]]:
labels: dict[ContainerId, ContainerName] = {}
stats: dict[ContainerId, Stats] = {}
docker_inspect = subprocess.run(
["docker", "stats", "--no-stream", "--no-trunc", "--format", "json"],
stdout=subprocess.PIPE,
)
for container_stat in (
json.loads(l) for l in docker_inspect.stdout.decode("utf8").splitlines()
):
if not container_stat["ID"]:
# When containers are starting up, they sometimes have no ID and "--" as the name.
continue
labels[ContainerId(container_stat["ID"])] = ContainerName(
container_stat["Name"]
)
memory_usage = parse_mem_usage(container_stat["MemUsage"])
stats[ContainerId(container_stat["ID"])] = Stats(
memory_usage_bytes=memory_usage
)
for container_id, container_stat in stats.items():
logging.info(
f"Recorded stat {labels[container_id]}: {container_stat.memory_usage_bytes} bytes"
)
return Sample(instant=datetime.now(), stats=stats), labels
def parse_mem_usage(mem_usage: str) -> int:
parsed_mem_usage = re.match(
r"(?P<number>[0-9]+\.?[0-9]*)(?P<unit>[^\s]+)", mem_usage
)
if parsed_mem_usage is None:
raise Exception(f"Invalid Mem Usage: {mem_usage}")
number = float(parsed_mem_usage.group("number"))
unit = parsed_mem_usage.group("unit")
for multiplier, identifier in enumerate(["B", "KiB", "MiB", "GiB", "TiB"]):
if unit == identifier:
return int(number * (1024**multiplier))
raise Exception(f"Unrecognized unit: {unit}")
if __name__ == "__main__":
main()