You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

180 lines
4.3 KiB
HCL

# Example message:
# {"time": "2021-07-20T05:05:47", "service": "foo", "log": "bar"}
variable "project" {
description = "Project ID."
type = string
}
variable "region" {
description = "Region."
type = string
}
variable "service_cloudkms" {
description = "cloudkms service."
}
data "google_project" "project" {
project_id = var.project
}
#################### IAM ##################################
resource "google_project_iam_binding" "pubsub_kms" {
project = var.project
role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
members = [
"serviceAccount:service-${data.google_project.project.number}@gcp-sa-pubsub.iam.gserviceaccount.com"
]
}
#################### KMS ##################################
resource "random_id" "bigquery_etl_keyring" {
byte_length = 4
}
resource "google_kms_crypto_key" "topic_key" {
name = "bigquery-etl-topic-key"
key_ring = google_kms_key_ring.bigquery_etl_keyring.id
}
resource "google_kms_crypto_key" "dataflow_key" {
name = "bigquery-etl-dataflow-key"
key_ring = google_kms_key_ring.bigquery_etl_keyring.id
}
resource "google_kms_key_ring" "bigquery_etl_keyring" {
project = var.project
name = "bigquery-etl-keyring-${random_id.bigquery_etl_keyring.hex}"
location = var.region
lifecycle {
#prevent_destroy = true
}
depends_on = [
var.service_cloudkms
]
}
#################### PubSub ###############################
resource "google_pubsub_topic" "bigquery_etl" {
project = var.project
name = "bigquery-etl"
kms_key_name = google_kms_crypto_key.topic_key.id
depends_on = [
google_project_iam_binding.pubsub_kms
]
}
resource "google_pubsub_subscription" "bigquery_etl" {
project = var.project
name = "bigquery-etl-sub"
topic = google_pubsub_topic.bigquery_etl.name
# 20 minutes
message_retention_duration = "1200s"
retain_acked_messages = false
ack_deadline_seconds = 20
expiration_policy {
ttl = ""
}
retry_policy {
maximum_backoff = "600s"
minimum_backoff = "10s"
}
}
#################### Dataflow #############################
resource "google_project_service" "dataflow" {
project = var.project
service = "dataflow.googleapis.com"
disable_dependent_services = true
}
resource "random_id" "temp_storage" {
byte_length = 4
}
resource "google_storage_bucket" "temp_storage" {
project = var.project
name = "${var.project}-etl-temp-${random_id.temp_storage.hex}"
force_destroy = true
}
resource "google_dataflow_job" "etl_job" {
project = var.project
name = "bigquery-etl-job"
region = var.region
template_gcs_path = "gs://dataflow-templates-us-central1/latest/PubSub_Subscription_to_BigQuery"
temp_gcs_location = "${google_storage_bucket.temp_storage.url}/temp"
enable_streaming_engine = true
max_workers = 3
# Can't use kms key with streaming mode :-(
# kms_key_name = google_kms_crypto_key.dataflow_key.name
parameters = {
inputSubscription = google_pubsub_subscription.bigquery_etl.id
outputTableSpec = "${google_bigquery_table.pubsub_etl.project}:${google_bigquery_table.pubsub_etl.dataset_id}.${google_bigquery_table.pubsub_etl.table_id}"
}
additional_experiments = [
"enable_streaming_engine",
"enable_windmill_service"
]
}
#################### BigQuery #############################
resource "google_bigquery_dataset" "pubsub_etl" {
project = var.project
dataset_id = "pubsub_etl"
friendly_name = "PubSub ETL"
description = "Dataset where PubSub ETL data goes."
location = "US"
}
resource "google_bigquery_table" "pubsub_etl" {
project = var.project
dataset_id = google_bigquery_dataset.pubsub_etl.dataset_id
table_id = "pubsub_etl"
deletion_protection = false
time_partitioning {
type = "DAY"
field = "time"
}
schema = <<EOF
[
{
"description": "Time of the event",
"mode": "NULLABLE",
"name": "time",
"type": "DATETIME"
},
{
"description": "Name of the service",
"mode": "NULLABLE",
"name": "service",
"type": "STRING"
},
{
"description": "Text of the log",
"mode": "NULLABLE",
"name": "log",
"type": "STRING"
}
]
EOF
}