Merge branch 'bq'
This commit is contained in:
commit
0596d2f359
@ -6,3 +6,5 @@ A small terraform script to launch GKE with:
|
||||
- GCR for custom docker images
|
||||
- Auto-scaling node pool
|
||||
- K8s database encryption
|
||||
|
||||
Contains some settings that are inadvisable in production (for example, disabling deletion protection). I include them because this project's goal is to provide a template for quickly spinning up a test/dev environment which may involve frequent deletions.
|
||||
|
@ -184,3 +184,13 @@ output "redis_port" {
|
||||
description = "Port for redis database."
|
||||
value = module.redis.redis_port
|
||||
}
|
||||
|
||||
#################### PubSub to BigQuery ###################
|
||||
|
||||
module "bigquery" {
|
||||
source = "../modules/bigquery"
|
||||
project = var.project
|
||||
region = var.region
|
||||
service_cloudkms = google_project_service.cloudkms
|
||||
}
|
||||
|
||||
|
174
terraform/modules/bigquery/bigquery.tf
Normal file
174
terraform/modules/bigquery/bigquery.tf
Normal file
@ -0,0 +1,174 @@
|
||||
# Example message:
|
||||
# {"time": "2021-07-20T05:05:47", "service": "foo", "log": "bar"}
|
||||
|
||||
variable "project" {
|
||||
description = "Project ID."
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "region" {
|
||||
description = "Region."
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "service_cloudkms" {
|
||||
description = "cloudkms service."
|
||||
}
|
||||
|
||||
data "google_project" "project" {
|
||||
project_id = var.project
|
||||
}
|
||||
|
||||
#################### IAM ##################################
|
||||
|
||||
resource "google_project_iam_binding" "pubsub_kms" {
|
||||
project = var.project
|
||||
role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
|
||||
|
||||
members = [
|
||||
"serviceAccount:service-${data.google_project.project.number}@gcp-sa-pubsub.iam.gserviceaccount.com"
|
||||
]
|
||||
}
|
||||
|
||||
#################### KMS ##################################
|
||||
|
||||
resource "random_id" "bigquery_etl_keyring" {
|
||||
byte_length = 4
|
||||
}
|
||||
|
||||
resource "google_kms_crypto_key" "topic_key" {
|
||||
name = "bigquery-etl-topic-key"
|
||||
key_ring = google_kms_key_ring.bigquery_etl_keyring.id
|
||||
}
|
||||
|
||||
resource "google_kms_crypto_key" "dataflow_key" {
|
||||
name = "bigquery-etl-dataflow-key"
|
||||
key_ring = google_kms_key_ring.bigquery_etl_keyring.id
|
||||
}
|
||||
|
||||
resource "google_kms_key_ring" "bigquery_etl_keyring" {
|
||||
project = var.project
|
||||
name = "bigquery-etl-keyring-${random_id.bigquery_etl_keyring.hex}"
|
||||
location = var.region
|
||||
|
||||
lifecycle {
|
||||
#prevent_destroy = true
|
||||
}
|
||||
|
||||
depends_on = [
|
||||
var.service_cloudkms
|
||||
]
|
||||
}
|
||||
|
||||
#################### PubSub ###############################
|
||||
|
||||
resource "google_pubsub_topic" "bigquery_etl" {
|
||||
project = var.project
|
||||
name = "bigquery-etl"
|
||||
kms_key_name = google_kms_crypto_key.topic_key.id
|
||||
depends_on = [
|
||||
google_project_iam_binding.pubsub_kms
|
||||
]
|
||||
}
|
||||
|
||||
resource "google_pubsub_subscription" "bigquery_etl" {
|
||||
project = var.project
|
||||
name = "bigquery-etl-sub"
|
||||
topic = google_pubsub_topic.bigquery_etl.name
|
||||
|
||||
# 20 minutes
|
||||
message_retention_duration = "1200s"
|
||||
retain_acked_messages = false
|
||||
ack_deadline_seconds = 20
|
||||
|
||||
expiration_policy {
|
||||
ttl = ""
|
||||
}
|
||||
|
||||
retry_policy {
|
||||
maximum_backoff = "600s"
|
||||
minimum_backoff = "10s"
|
||||
}
|
||||
}
|
||||
|
||||
#################### Dataflow #############################
|
||||
|
||||
resource "google_project_service" "dataflow" {
|
||||
project = var.project
|
||||
service = "dataflow.googleapis.com"
|
||||
disable_dependent_services = true
|
||||
}
|
||||
|
||||
resource "google_storage_bucket" "temp_storage" {
|
||||
project = var.project
|
||||
name = "${var.project}-bigquery-etl"
|
||||
force_destroy = true
|
||||
}
|
||||
|
||||
resource "google_dataflow_job" "etl_job" {
|
||||
project = var.project
|
||||
name = "bigquery-etl-job"
|
||||
region = var.region
|
||||
template_gcs_path = "gs://dataflow-templates-us-central1/latest/PubSub_Subscription_to_BigQuery"
|
||||
temp_gcs_location = "${google_storage_bucket.temp_storage.url}/temp"
|
||||
enable_streaming_engine = true
|
||||
max_workers = 3
|
||||
# Can't use kms key with streaming mode :-(
|
||||
# kms_key_name = google_kms_crypto_key.dataflow_key.name
|
||||
|
||||
|
||||
parameters = {
|
||||
inputSubscription = google_pubsub_subscription.bigquery_etl.id
|
||||
outputTableSpec = "${google_bigquery_table.pubsub_etl.project}:${google_bigquery_table.pubsub_etl.dataset_id}.${google_bigquery_table.pubsub_etl.table_id}"
|
||||
}
|
||||
|
||||
additional_experiments = [
|
||||
"enable_streaming_engine",
|
||||
"enable_windmill_service"
|
||||
]
|
||||
}
|
||||
|
||||
#################### BigQuery #############################
|
||||
|
||||
resource "google_bigquery_dataset" "pubsub_etl" {
|
||||
project = var.project
|
||||
dataset_id = "pubsub_etl"
|
||||
friendly_name = "PubSub ETL"
|
||||
description = "Dataset where PubSub ETL data goes."
|
||||
location = "US"
|
||||
}
|
||||
|
||||
resource "google_bigquery_table" "pubsub_etl" {
|
||||
project = var.project
|
||||
dataset_id = google_bigquery_dataset.pubsub_etl.dataset_id
|
||||
table_id = "pubsub_etl"
|
||||
|
||||
deletion_protection = false
|
||||
|
||||
time_partitioning {
|
||||
type = "DAY"
|
||||
}
|
||||
|
||||
schema = <<EOF
|
||||
[
|
||||
{
|
||||
"description": "Time of the event",
|
||||
"mode": "NULLABLE",
|
||||
"name": "time",
|
||||
"type": "DATETIME"
|
||||
},
|
||||
{
|
||||
"description": "Name of the service",
|
||||
"mode": "NULLABLE",
|
||||
"name": "service",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"description": "Text of the log",
|
||||
"mode": "NULLABLE",
|
||||
"name": "log",
|
||||
"type": "STRING"
|
||||
}
|
||||
]
|
||||
EOF
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user