diff --git a/terraform/basic_gke/README.md b/terraform/basic_gke/README.md index ac3a929..7469a3b 100644 --- a/terraform/basic_gke/README.md +++ b/terraform/basic_gke/README.md @@ -6,3 +6,5 @@ A small terraform script to launch GKE with: - GCR for custom docker images - Auto-scaling node pool - K8s database encryption + +Contains some settings that are inadvisable in production (for example, disabling deletion protection). I include them because this project's goal is to provide a template for quickly spinning up a test/dev environment which may involve frequent deletions. diff --git a/terraform/basic_gke/main.tf b/terraform/basic_gke/main.tf index 3606993..477a9b9 100644 --- a/terraform/basic_gke/main.tf +++ b/terraform/basic_gke/main.tf @@ -184,3 +184,13 @@ output "redis_port" { description = "Port for redis database." value = module.redis.redis_port } + +#################### PubSub to BigQuery ################### + +module "bigquery" { + source = "../modules/bigquery" + project = var.project + region = var.region + service_cloudkms = google_project_service.cloudkms +} + diff --git a/terraform/modules/bigquery/bigquery.tf b/terraform/modules/bigquery/bigquery.tf new file mode 100644 index 0000000..caf0ab9 --- /dev/null +++ b/terraform/modules/bigquery/bigquery.tf @@ -0,0 +1,174 @@ +# Example message: +# {"time": "2021-07-20T05:05:47", "service": "foo", "log": "bar"} + +variable "project" { + description = "Project ID." + type = string +} + +variable "region" { + description = "Region." + type = string +} + +variable "service_cloudkms" { + description = "cloudkms service." +} + +data "google_project" "project" { + project_id = var.project +} + +#################### IAM ################################## + +resource "google_project_iam_binding" "pubsub_kms" { + project = var.project + role = "roles/cloudkms.cryptoKeyEncrypterDecrypter" + + members = [ + "serviceAccount:service-${data.google_project.project.number}@gcp-sa-pubsub.iam.gserviceaccount.com" + ] +} + +#################### KMS ################################## + +resource "random_id" "bigquery_etl_keyring" { + byte_length = 4 +} + +resource "google_kms_crypto_key" "topic_key" { + name = "bigquery-etl-topic-key" + key_ring = google_kms_key_ring.bigquery_etl_keyring.id +} + +resource "google_kms_crypto_key" "dataflow_key" { + name = "bigquery-etl-dataflow-key" + key_ring = google_kms_key_ring.bigquery_etl_keyring.id +} + +resource "google_kms_key_ring" "bigquery_etl_keyring" { + project = var.project + name = "bigquery-etl-keyring-${random_id.bigquery_etl_keyring.hex}" + location = var.region + + lifecycle { + #prevent_destroy = true + } + + depends_on = [ + var.service_cloudkms + ] +} + +#################### PubSub ############################### + +resource "google_pubsub_topic" "bigquery_etl" { + project = var.project + name = "bigquery-etl" + kms_key_name = google_kms_crypto_key.topic_key.id + depends_on = [ + google_project_iam_binding.pubsub_kms + ] +} + +resource "google_pubsub_subscription" "bigquery_etl" { + project = var.project + name = "bigquery-etl-sub" + topic = google_pubsub_topic.bigquery_etl.name + + # 20 minutes + message_retention_duration = "1200s" + retain_acked_messages = false + ack_deadline_seconds = 20 + + expiration_policy { + ttl = "" + } + + retry_policy { + maximum_backoff = "600s" + minimum_backoff = "10s" + } +} + +#################### Dataflow ############################# + +resource "google_project_service" "dataflow" { + project = var.project + service = "dataflow.googleapis.com" + disable_dependent_services = true +} + +resource "google_storage_bucket" "temp_storage" { + project = var.project + name = "${var.project}-bigquery-etl" + force_destroy = true +} + +resource "google_dataflow_job" "etl_job" { + project = var.project + name = "bigquery-etl-job" + region = var.region + template_gcs_path = "gs://dataflow-templates-us-central1/latest/PubSub_Subscription_to_BigQuery" + temp_gcs_location = "${google_storage_bucket.temp_storage.url}/temp" + enable_streaming_engine = true + max_workers = 3 + # Can't use kms key with streaming mode :-( + # kms_key_name = google_kms_crypto_key.dataflow_key.name + + + parameters = { + inputSubscription = google_pubsub_subscription.bigquery_etl.id + outputTableSpec = "${google_bigquery_table.pubsub_etl.project}:${google_bigquery_table.pubsub_etl.dataset_id}.${google_bigquery_table.pubsub_etl.table_id}" + } + + additional_experiments = [ + "enable_streaming_engine", + "enable_windmill_service" + ] +} + +#################### BigQuery ############################# + +resource "google_bigquery_dataset" "pubsub_etl" { + project = var.project + dataset_id = "pubsub_etl" + friendly_name = "PubSub ETL" + description = "Dataset where PubSub ETL data goes." + location = "US" +} + +resource "google_bigquery_table" "pubsub_etl" { + project = var.project + dataset_id = google_bigquery_dataset.pubsub_etl.dataset_id + table_id = "pubsub_etl" + + deletion_protection = false + + time_partitioning { + type = "DAY" + } + + schema = <