# Example message: # {"time": "2021-07-20T05:05:47", "service": "foo", "log": "bar"} variable "project" { description = "Project ID." type = string } variable "region" { description = "Region." type = string } variable "service_cloudkms" { description = "cloudkms service." } data "google_project" "project" { project_id = var.project } #################### IAM ################################## resource "google_project_iam_binding" "pubsub_kms" { project = var.project role = "roles/cloudkms.cryptoKeyEncrypterDecrypter" members = [ "serviceAccount:service-${data.google_project.project.number}@gcp-sa-pubsub.iam.gserviceaccount.com" ] } #################### KMS ################################## resource "random_id" "bigquery_etl_keyring" { byte_length = 4 } resource "google_kms_crypto_key" "topic_key" { name = "bigquery-etl-topic-key" key_ring = google_kms_key_ring.bigquery_etl_keyring.id } resource "google_kms_crypto_key" "dataflow_key" { name = "bigquery-etl-dataflow-key" key_ring = google_kms_key_ring.bigquery_etl_keyring.id } resource "google_kms_key_ring" "bigquery_etl_keyring" { project = var.project name = "bigquery-etl-keyring-${random_id.bigquery_etl_keyring.hex}" location = var.region lifecycle { #prevent_destroy = true } depends_on = [ var.service_cloudkms ] } #################### PubSub ############################### resource "google_pubsub_topic" "bigquery_etl" { project = var.project name = "bigquery-etl" kms_key_name = google_kms_crypto_key.topic_key.id depends_on = [ google_project_iam_binding.pubsub_kms ] } resource "google_pubsub_subscription" "bigquery_etl" { project = var.project name = "bigquery-etl-sub" topic = google_pubsub_topic.bigquery_etl.name # 20 minutes message_retention_duration = "1200s" retain_acked_messages = false ack_deadline_seconds = 20 expiration_policy { ttl = "" } retry_policy { maximum_backoff = "600s" minimum_backoff = "10s" } } #################### Dataflow ############################# resource "google_project_service" "dataflow" { project = var.project service = "dataflow.googleapis.com" disable_dependent_services = true } resource "random_id" "temp_storage" { byte_length = 4 } resource "google_storage_bucket" "temp_storage" { project = var.project name = "${var.project}-etl-temp-${random_id.temp_storage.hex}" force_destroy = true } resource "google_dataflow_job" "etl_job" { project = var.project name = "bigquery-etl-job" region = var.region template_gcs_path = "gs://dataflow-templates-us-central1/latest/PubSub_Subscription_to_BigQuery" temp_gcs_location = "${google_storage_bucket.temp_storage.url}/temp" enable_streaming_engine = true max_workers = 3 # Can't use kms key with streaming mode :-( # kms_key_name = google_kms_crypto_key.dataflow_key.name parameters = { inputSubscription = google_pubsub_subscription.bigquery_etl.id outputTableSpec = "${google_bigquery_table.pubsub_etl.project}:${google_bigquery_table.pubsub_etl.dataset_id}.${google_bigquery_table.pubsub_etl.table_id}" } additional_experiments = [ "enable_streaming_engine", "enable_windmill_service" ] } #################### BigQuery ############################# resource "google_bigquery_dataset" "pubsub_etl" { project = var.project dataset_id = "pubsub_etl" friendly_name = "PubSub ETL" description = "Dataset where PubSub ETL data goes." location = "US" } resource "google_bigquery_table" "pubsub_etl" { project = var.project dataset_id = google_bigquery_dataset.pubsub_etl.dataset_id table_id = "pubsub_etl" deletion_protection = false time_partitioning { type = "DAY" field = "time" } schema = <