from __future__ import annotations from collections import deque from datetime import datetime, timezone from logging import getLogger import json from typing import Callable, Final, Iterator from pydantic import ValidationError import requests from log_stream.log_entry import create_log_entry, LogEntry BATCH_SIZE: Final[int] = 1000 REQ_TIMEOUT: Final[int] = 60 logger = getLogger("LogStream") class LogStream: _query: str _project: str _start_date: datetime _end_date: datetime _buffer: deque[dict] _finished: bool _next_page_token: str | None _session: requests.Session _access_token: Callable[[], str] def __init__( self, query: str, project: str, start_date: datetime, end_date: datetime | None, access_token: Callable[[], str], ) -> None: self._query = query self._project = project self._start_date = start_date self._end_date = ( end_date if end_date is not None else datetime.now(timezone.utc) ) self._buffer = deque(maxlen=BATCH_SIZE) self._finished = False self._next_page_token = None self._session = requests.Session() self._access_token = access_token def parsed(self) -> ParsedLogStream: return ParsedLogStream(self) def __iter__(self) -> Iterator[dict]: return self def __next__(self) -> dict: if len(self._buffer) == 0 and not self._finished: self._populate_buffer() if len(self._buffer) == 0: self._finished = True raise StopIteration() return self._buffer.popleft() def _populate_buffer(self) -> None: # https://cloud.google.com/logging/docs/reference/v2/rest/v2/entries/list data = { "filter": "\n".join((self._get_timestamp_filter(), self._query)), "orderBy": "timestamp desc", "pageSize": BATCH_SIZE, "resourceNames": [f"projects/{self._project}"], } if self._next_page_token is not None: data["pageToken"] = self._next_page_token headers = { "Authorization": f"Bearer {self._access_token()}", "x-goog-user-project": self._project, } req = requests.Request( "POST", "https://logging.googleapis.com/v2/entries:list", data=data, headers=headers, ) prepared = self._session.prepare_request(req) logger.info(f"Sending a request to {prepared.url}") response = self._session.send( prepared, timeout=REQ_TIMEOUT, ) response.raise_for_status() response_body = response.json() if (token := response_body.get("nextPageToken")) is not None: self._next_page_token = token else: self._finished = True self._next_page_token = None self._buffer.extend(response_body.get("entries", [])) def _get_timestamp_filter(self) -> str: time_format = "%Y-%m-%dT%H:%M:%S.%fZ" self._start_date.isoformat() # TODO: convert start/end date to UTC start = self._start_date.strftime(time_format) end = self._end_date.strftime(time_format) return f""" timestamp>="{start}" timestamp<="{end}" """ class ParsedLogStream: _inner: LogStream def __init__(self, inner: LogStream) -> None: self._inner = inner def __iter__(self) -> Iterator[LogEntry]: return self def __next__(self) -> LogEntry: raw = next(self._inner) try: return create_log_entry(raw) except ValidationError as e: logger.error("Failed to parse log message: %s", json.dumps(raw)) raise