diff --git a/docker-compose.yml b/docker-compose.yml index 4b31a953fb5932c90bb77fea63742b493be7efa8..1558298ce778e04adb433d1048f7f56ed640d7df 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -99,7 +99,7 @@ services: container_name: grafana-with-datasources environment: GF_SECURITY_ADMIN_USER: monitor - GF_SECURITY_ADMIN_PASSWORD: demo + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD:-demo} GF_INSTALL_PLUGINS: yesoreyeram-infinity-datasource ports: - "3000:3000" diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a700f88b73870de775b0ae2ce5d8e5aa0f2fcef3 --- /dev/null +++ b/terraform/README.md @@ -0,0 +1,92 @@ +# Terraform deployment modules + +Infrastructure as Code modules for deploying postgres_ai monitoring to cloud providers. + +## Available modules + +### AWS (EC2) +Single EC2 instance deployment with Docker Compose. + +- **Path**: `aws/` +- **Architecture**: Single EC2 instance with Docker Compose +- **Best for**: Small to medium deployments (1-10 databases) +- **Documentation**: [aws/README.md](aws/README.md) + +### GCP (Coming soon) +Deploy to Google Cloud Platform using Compute Engine or Cloud Run. + +### Azure (Coming soon) +Deploy to Microsoft Azure using Virtual Machines or Container Instances. + +## Quick start + +### AWS deployment + +```bash +cd terraform/aws + +# Copy example variables +cp terraform.tfvars.example terraform.tfvars + +# Edit variables with your settings +vim terraform.tfvars + +# Initialize Terraform +terraform init + +# Review the plan +terraform plan + +# Deploy infrastructure (takes 5-10 minutes) +terraform apply +``` + +## Architecture overview + +The AWS deployment creates: + +1. **Compute** + - Single EC2 instance (t3.medium default) + - Ubuntu 22.04 LTS (Jammy) with Docker and Docker Compose + - Systemd service for automatic startup + +2. **Storage** + - EBS volume for persistent data + - Automated snapshots available via AWS Backup + +3. **Networking** + - VPC with public subnet + - Security Group with restricted access + - Optional Elastic IP for stable addressing + +4. **Monitoring stack** + - Runs docker-compose from cloned repository + - Grafana accessible on port 3000 + +## Security considerations + +- EC2 instance in public subnet (can be changed to private with bastion) +- Security groups restrict access to SSH and Grafana only +- All data encrypted at rest (EBS encryption) +- Recommended: Use AWS Systems Manager Session Manager instead of SSH +- Recommended: Restrict `allowed_cidr_blocks` to your office/VPN IP + +## Instance types + +Recommended instance types based on workload: + +- **t3.medium**: 2 vCPU, 4 GiB RAM - suitable for 1-3 databases (default) +- **t3.large**: 2 vCPU, 8 GiB RAM - suitable for 3-10 databases +- **t3.xlarge**: 4 vCPU, 16 GiB RAM - suitable for 10+ databases + +Additional options: +- Use Spot Instances for non-critical workloads (subject to interruption) +- Disable Elastic IP if stable address not required + +## Support + +For issues or questions: +- Open an issue on GitLab +- Contact PostgresAI support +- Check documentation at https://postgres.ai + diff --git a/terraform/aws/.gitignore b/terraform/aws/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..359b41638c1c46b39b7c00fd11034d78e8531428 --- /dev/null +++ b/terraform/aws/.gitignore @@ -0,0 +1,33 @@ +# Terraform files +*.tfstate +*.tfstate.* +*.tfvars +!terraform.tfvars.example +.terraform/ +.terraform.lock.hcl +crash.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json +tfplan +plan.log + +# OS files +.DS_Store +Thumbs.db + +# IDE files +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Backup files +*.bak +*.backup + +# SSH keys +*.pem +*.key diff --git a/terraform/aws/QUICKSTART.md b/terraform/aws/QUICKSTART.md new file mode 100644 index 0000000000000000000000000000000000000000..d6501dcda324d2471b5e58877a622fe9a86e3088 --- /dev/null +++ b/terraform/aws/QUICKSTART.md @@ -0,0 +1,94 @@ +# Quick start + +## Prerequisites + +```bash +# Create SSH key +aws ec2 create-key-pair --key-name postgres-ai-key \ + --query 'KeyMaterial' --output text > ~/.ssh/postgres-ai-key.pem +chmod 400 ~/.ssh/postgres-ai-key.pem + +# Configure AWS credentials +aws configure +``` + +## Deploy + +```bash +cd terraform/aws + +# Configure +cp terraform.tfvars.example terraform.tfvars +vim terraform.tfvars # Set ssh_key_name + +# Validate +./validate.sh + +# Deploy +terraform init +terraform plan +terraform apply + +# Get access info +terraform output grafana_url +terraform output ssh_command +``` + +## Access + +```bash +# Grafana dashboard +open $(terraform output -raw grafana_url) +# Login: monitor / demo (or your custom password) + +# SSH +ssh -i ~/.ssh/postgres-ai-key.pem ubuntu@$(terraform output -raw public_ip) +``` + +## Add monitoring instances + +Edit `terraform.tfvars`: + +```hcl +monitoring_instances = [ + { + name = "prod-db" + conn_str = "postgresql://monitor:pass@db.example.com:5432/postgres" + environment = "production" + cluster = "main" + node_name = "primary" + } +] +``` + +Apply changes: +```bash +terraform apply +``` + +## Operations + +```bash +# View logs +ssh ubuntu@IP "sudo cat /var/log/user-data.log" + +# Restart services +ssh ubuntu@IP "sudo systemctl restart postgres-ai" + +# Destroy +terraform destroy +``` + +## Troubleshooting + +```bash +# Check installation log +ssh ubuntu@IP "sudo cat /var/log/user-data.log" + +# Check service status +ssh ubuntu@IP "sudo systemctl status postgres-ai" + +# Check containers +ssh ubuntu@IP "sudo docker ps" +``` + diff --git a/terraform/aws/README.md b/terraform/aws/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ba7ad25afde009cbf34fcf41c52f6efae634a8c7 --- /dev/null +++ b/terraform/aws/README.md @@ -0,0 +1,317 @@ +# AWS deployment + +Single EC2 instance deployment with Docker Compose. + +## Architecture + +Single EC2 instance with Docker Compose. + +Terraform creates: +- VPC with public subnet +- EC2 instance (t3.medium, Ubuntu 22.04 LTS) +- EBS volume (50 GiB gp3, encrypted) +- Security Group (SSH + Grafana ports) +- Elastic IP (optional) + +On first boot, EC2 instance clones this repository and runs `docker-compose up` to start all monitoring services. + +## Quick start + +See [QUICKSTART.md](QUICKSTART.md) for step-by-step guide. + +## Configuration + +### Minimal setup + +```hcl +# terraform.tfvars +ssh_key_name = "postgres-ai-key" + +# Optional: Set custom Grafana password (defaults to 'demo') +# grafana_password = "YourSecurePassword123!" +``` + +### Minimal production setup + +```hcl +# terraform.tfvars + +# REQUIRED PARAMETERS +ssh_key_name = "your-key-name" + +# AWS SETTINGS +aws_region = "us-east-1" +environment = "production" +instance_type = "t3.medium" + +# STORAGE +data_volume_size = 50 # GiB + +# SECURITY (restrict access!) +allowed_ssh_cidr = ["0.0.0.0/0"] # WARNING: Allows access from anywhere +allowed_cidr_blocks = ["0.0.0.0/0"] # WARNING: Allows access from anywhere + +# OPTIONAL PARAMETERS +# grafana_password = "YourSecurePassword123!" # Defaults to 'demo' +# postgres_ai_api_key = "your-api-key" # For uploading reports +# enable_demo_db = false # true for testing +# use_elastic_ip = true # Stable IP address + +monitoring_instances = [ + { + name = "main-db" + conn_str = "postgresql://monitor:pass@db.example.com:5432/postgres" + environment = "production" + cluster = "main" + node_name = "primary" + } +] +``` + +### Full configuration + +```hcl +# AWS +aws_region = "us-east-1" +environment = "production" +instance_type = "t3.medium" + +# Storage +data_volume_size = 50 + +# Security (restrict access in production) +allowed_ssh_cidr = ["203.0.113.0/24"] +allowed_cidr_blocks = ["203.0.113.0/24"] + +# Required +ssh_key_name = "ssh-key" + +# Optional +grafana_password = "SecurePassword123!" # Defaults to 'demo' +postgres_ai_api_key = "your-api-key" +enable_demo_db = false +use_elastic_ip = true + +# Monitoring instances +monitoring_instances = [ + { + name = "prod-db" + conn_str = "postgresql://monitor:pass@db.example.com:5432/postgres" + environment = "production" + cluster = "main" + node_name = "primary" + } +] +``` + +## Management + +### SSH access + +```bash +terraform output ssh_command +# Or directly: +ssh -i ~/.ssh/postgres-ai-key.pem ubuntu@$(terraform output -raw public_ip) +``` + +### Service management + +```bash +# On EC2 instance +cd /home/postgres_ai/postgres_ai + +# Status +sudo docker-compose ps + +# Logs +sudo docker-compose logs -f + +# Restart +sudo systemctl restart postgres-ai +``` + +### Add monitoring instance + +Method 1: Update terraform.tfvars and run `terraform apply` + +Method 2: Manual configuration on server: +```bash +ssh ubuntu@your-ip +cd /home/postgres_ai/postgres_ai +sudo -u postgres_ai vim instances.yml +sudo docker-compose restart +``` + +### Backup + +```bash +# Create snapshot +aws ec2 create-snapshot \ + --volume-id $(terraform output -raw data_volume_id) \ + --description "postgres-ai backup $(date +%Y-%m-%d)" +``` + +### System updates + +```bash +ssh ubuntu@your-ip + +# Update OS +sudo apt-get update && sudo apt-get upgrade -y + +# Update Docker images +cd /home/postgres_ai/postgres_ai +sudo docker-compose pull +sudo docker-compose up -d +``` + +## Security + +### Recommendations + +1. Restrict SSH access: +```hcl +allowed_ssh_cidr = ["your.ip.address/32"] +``` + +2. Restrict Grafana access: +```hcl +allowed_cidr_blocks = ["your.office.ip/24"] +``` + +3. Use AWS Systems Manager instead of SSH: +```bash +aws ssm start-session --target $(terraform output -raw instance_id) +``` + +4. Automate backups with AWS Backup or cron. + +## Monitoring + +### CloudWatch metrics + +```bash +# CPU utilization +aws cloudwatch get-metric-statistics \ + --namespace AWS/EC2 \ + --metric-name CPUUtilization \ + --dimensions Name=InstanceId,Value=$(terraform output -raw instance_id) \ + --start-time $(date -u -d '1 hour ago' +%Y-%m-%dT%H:%M:%S) \ + --end-time $(date -u +%Y-%m-%dT%H:%M:%S) \ + --period 300 \ + --statistics Average +``` + +### Disk space + +```bash +ssh ubuntu@your-ip "df -h /data" +``` + +## Troubleshooting + +### Services not starting + +```bash +# Check user-data log +ssh ubuntu@your-ip "sudo cat /var/log/user-data.log" + +# Check Docker +ssh ubuntu@your-ip "sudo systemctl status docker" +ssh ubuntu@your-ip "sudo docker ps -a" +``` + +### No access to Grafana + +```bash +# Check Security Group +aws ec2 describe-security-groups \ + --group-ids $(terraform output -raw security_group_id) + +# Check services +ssh ubuntu@your-ip "sudo docker-compose ps" +``` + +### Disk full + +```bash +# Increase EBS volume size +aws ec2 modify-volume --volume-id VOLUME_ID --size 200 + +# Expand filesystem +ssh ubuntu@your-ip "sudo resize2fs /dev/nvme1n1" +``` + +## Instance sizing + +Choose instance type based on monitoring workload: + +```hcl +instance_type = "t3.medium" # 2 vCPU, 4 GiB RAM +``` + +Suitable for: +- Monitoring 1-3 small databases +- Dev/test environments +- Proof of concept + +```hcl +instance_type = "t3.medium" # 2 vCPU, 8 GiB RAM (default) +``` + +Suitable for: +- Monitoring 3-10 databases +- Production environments + +```hcl +instance_type = "t3.xlarge" # 4 vCPU, 16 GiB RAM +``` + +Suitable for: +- Monitoring 10+ databases +- High-frequency metric collection + +## Custom domain + +```bash +# Create A record in Route53 +aws route53 change-resource-record-sets \ + --hosted-zone-id YOUR_ZONE_ID \ + --change-batch '{ + "Changes": [{ + "Action": "CREATE", + "ResourceRecordSet": { + "Name": "monitoring.example.com", + "Type": "A", + "TTL": 300, + "ResourceRecords": [{"Value": "'"$(terraform output -raw public_ip)"'"}] + } + }] + }' + +# Configure HTTPS with Let's Encrypt +ssh ubuntu@your-ip +sudo snap install certbot +sudo certbot certonly --standalone -d monitoring.example.com +``` + +## Limitations + +Single AZ deployment: +- No automatic failover +- Manual backups required +- Vertical scaling only +- Suitable for 1-10 databases + +Recovery time: 15-30 minutes (restore from snapshot) + +## Use cases + +This deployment is appropriate for: +- Development and testing environments +- Small to medium workloads (1-10 databases) +- Non-critical monitoring systems +- Budget-constrained projects +- Teams with Linux administration skills + +For production-critical systems requiring high availability, consider managed services (RDS, ECS Fargate) instead. diff --git a/terraform/aws/main.tf b/terraform/aws/main.tf new file mode 100644 index 0000000000000000000000000000000000000000..3c31a67245026c165d21847046d087207d1906cd --- /dev/null +++ b/terraform/aws/main.tf @@ -0,0 +1,195 @@ +terraform { + required_version = ">= 1.5.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region + + default_tags { + tags = { + Project = "postgres-ai-monitoring" + Environment = var.environment + ManagedBy = "terraform" + } + } +} + +# Data sources +data "aws_ami" "ubuntu" { + most_recent = true + owners = ["099720109477"] # Canonical + + filter { + name = "name" + values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } +} + +# VPC (simplified - use default or create minimal) +resource "aws_vpc" "main" { + cidr_block = "10.0.0.0/16" + enable_dns_hostnames = true + enable_dns_support = true + + tags = { + Name = "${var.environment}-postgres-ai-vpc" + } +} + +resource "aws_subnet" "main" { + vpc_id = aws_vpc.main.id + cidr_block = "10.0.1.0/24" + availability_zone = data.aws_availability_zones.available.names[0] + map_public_ip_on_launch = true + + tags = { + Name = "${var.environment}-postgres-ai-subnet" + } +} + +data "aws_availability_zones" "available" { + state = "available" +} + +resource "aws_internet_gateway" "main" { + vpc_id = aws_vpc.main.id + + tags = { + Name = "${var.environment}-postgres-ai-igw" + } +} + +resource "aws_route_table" "main" { + vpc_id = aws_vpc.main.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.main.id + } + + tags = { + Name = "${var.environment}-postgres-ai-rt" + } +} + +resource "aws_route_table_association" "main" { + subnet_id = aws_subnet.main.id + route_table_id = aws_route_table.main.id +} + +# Security Group +resource "aws_security_group" "main" { + name = "${var.environment}-postgres-ai-sg" + description = "Security group for postgres_ai monitoring EC2" + vpc_id = aws_vpc.main.id + + # SSH access + ingress { + description = "SSH" + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = var.allowed_ssh_cidr + } + + # Grafana + ingress { + description = "Grafana" + from_port = 3000 + to_port = 3000 + protocol = "tcp" + cidr_blocks = var.allowed_cidr_blocks + } + + # Allow all outbound + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "${var.environment}-postgres-ai-sg" + } +} + +# EBS Volume for data persistence +resource "aws_ebs_volume" "data" { + availability_zone = aws_subnet.main.availability_zone + size = var.data_volume_size + type = var.data_volume_type + encrypted = true + + tags = { + Name = "${var.environment}-postgres-ai-data" + } +} + +# EC2 Instance +resource "aws_instance" "main" { + ami = data.aws_ami.ubuntu.id + instance_type = var.instance_type + subnet_id = aws_subnet.main.id + + vpc_security_group_ids = [aws_security_group.main.id] + + key_name = var.ssh_key_name + + root_block_device { + volume_size = 30 + volume_type = var.root_volume_type + encrypted = true + } + + user_data = templatefile("${path.module}/user_data.sh", { + grafana_password = var.grafana_password + postgres_ai_api_key = var.postgres_ai_api_key + monitoring_instances = var.monitoring_instances + enable_demo_db = var.enable_demo_db + }) + + tags = { + Name = "${var.environment}-postgres-ai-monitoring" + } + + lifecycle { + ignore_changes = [user_data] + } +} + +# Attach EBS volume +resource "aws_volume_attachment" "data" { + device_name = "/dev/sdf" + volume_id = aws_ebs_volume.data.id + instance_id = aws_instance.main.id +} + +# Elastic IP (optional, for stable IP) +resource "aws_eip" "main" { + count = var.use_elastic_ip ? 1 : 0 + instance = aws_instance.main.id + domain = "vpc" + + tags = { + Name = "${var.environment}-postgres-ai-eip" + } +} + diff --git a/terraform/aws/outputs.tf b/terraform/aws/outputs.tf new file mode 100644 index 0000000000000000000000000000000000000000..182eaf9b69e761d3577065d970e51a68cbf512b6 --- /dev/null +++ b/terraform/aws/outputs.tf @@ -0,0 +1,67 @@ +output "instance_id" { + description = "EC2 instance ID" + value = aws_instance.main.id +} + +output "data_volume_id" { + description = "EBS data volume ID (for snapshots)" + value = aws_ebs_volume.data.id +} + +output "public_ip" { + description = "Public IP address" + value = var.use_elastic_ip ? aws_eip.main[0].public_ip : aws_instance.main.public_ip +} + +output "grafana_url" { + description = "Grafana dashboard URL" + value = "http://${var.use_elastic_ip ? aws_eip.main[0].public_ip : aws_instance.main.public_ip}:3000" +} + +output "ssh_command" { + description = "SSH command to connect" + value = "ssh -i ~/.ssh/${var.ssh_key_name}.pem ubuntu@${var.use_elastic_ip ? aws_eip.main[0].public_ip : aws_instance.main.public_ip}" +} + +output "grafana_credentials" { + description = "Grafana credentials" + value = { + username = "monitor" + password = var.grafana_password + } + sensitive = true +} + +output "deployment_info" { + description = "Deployment information" + value = { + instance_type = var.instance_type + region = var.aws_region + public_ip = var.use_elastic_ip ? aws_eip.main[0].public_ip : aws_instance.main.public_ip + data_volume = "${var.data_volume_size} GiB" + api_key_configured = var.postgres_ai_api_key != "" + monitoring_instances = length(var.monitoring_instances) + demo_mode = var.enable_demo_db + } + sensitive = true +} + +output "next_steps" { + description = "Next steps after deployment" + value = <<-EOT + +Deployment complete + +Grafana URL: http://${var.use_elastic_ip ? aws_eip.main[0].public_ip : aws_instance.main.public_ip}:3000 +Username: monitor +Password: (from terraform.tfvars) + +Monitoring: ${length(var.monitoring_instances)} instance(s) +API key: ${var.postgres_ai_api_key != "" ? "configured" : "not configured"} + +SSH: ssh -i ~/.ssh/${var.ssh_key_name}.pem ubuntu@${var.use_elastic_ip ? aws_eip.main[0].public_ip : aws_instance.main.public_ip} +Backup: aws ec2 create-snapshot --volume-id ${aws_ebs_volume.data.id} + +EOT +} + diff --git a/terraform/aws/terraform.tfvars.example b/terraform/aws/terraform.tfvars.example new file mode 100644 index 0000000000000000000000000000000000000000..93d188c497fe3cbd1636dacdadcdd24317ebb50c --- /dev/null +++ b/terraform/aws/terraform.tfvars.example @@ -0,0 +1,113 @@ +# AWS Deployment Configuration +# Copy to terraform.tfvars and customize + +# REQUIRED PARAMETERS +# ------------------------- + +# SSH key for EC2 access (create in AWS Console or CLI) +ssh_key_name = "your-key-name" + + +# AWS SETTINGS +# ------------------------- + +# AWS region +aws_region = "us-east-1" + +# Environment +environment = "production" + +instance_type = "t3.medium" + +# EBS volume size for data (GiB) +data_volume_size = 50 + +# Storage types +# data_volume_type = "gp3" # gp3 (SSD), st1 (HDD throughput optimized, min 125 GiB), sc1 (HDD cold, min 125 GiB) +# root_volume_type = "gp3" # gp3 (SSD), gp2 (older SSD) + + +# SECURITY +# ------------------------- + +# CIDR blocks for SSH access (restrict to your IP in production) +allowed_ssh_cidr = [ + "0.0.0.0/0" # WARNING: Allows access from anywhere + # "203.0.113.0/24" # Replace with your office/VPN IP +] + +# CIDR blocks for Grafana access (restrict to your IP in production) +allowed_cidr_blocks = [ + "0.0.0.0/0" # WARNING: Allows access from anywhere + # "203.0.113.0/24" # Replace with your office/VPN IP +] + +# Allocate Elastic IP for stable address +use_elastic_ip = true + + +# POSTGRESQL MONITORING +# ------------------------- + +# PostgreSQL instances to monitor +monitoring_instances = [ + { + name = "production-db" + conn_str = "postgresql://monitor:password@db.example.com:5432/postgres" + environment = "production" + cluster = "main" + node_name = "primary" + }, + # { + # name = "production-replica" + # conn_str = "postgresql://monitor:password@replica.example.com:5432/postgres" + # environment = "production" + # cluster = "main" + # node_name = "replica-1" + # } +] + + +# OPTIONAL PARAMETERS +# ------------------------- + +# PostgresAI API key (for uploading reports to cloud) +# +# How to get API key: +# 1. Register at https://console.postgres.ai +# 2. Go to: Your Organization → Manage → Access Tokens +# 3. Create new token and copy it here +# +# If not set, reports will be generated locally without upload +# postgres_ai_api_key = "your-api-key-here" + +# Grafana admin password (defaults to 'demo') +# grafana_password = "YourSecurePassword123!" + +# Enable demo database (for testing) +# enable_demo_db = false + + +# CONFIGURATION EXAMPLES +# ------------------------- + +# Minimal configuration (for testing): +# ------------------------------------ +# instance_type = "t3.medium" +# data_volume_size = 50 +# enable_demo_db = true +# monitoring_instances = [] + +# Production configuration: +# ------------------------- +# instance_type = "t3.xlarge" +# data_volume_size = 200 +# allowed_ssh_cidr = ["10.0.0.0/8"] +# allowed_cidr_blocks = ["10.0.0.0/8"] +# use_elastic_ip = true + +# Minimal configuration (dev/test): +# ---------------------------------- +# instance_type = "t3.small" # WARNING: May be slow for production use +# data_volume_size = 30 +# use_elastic_ip = false diff --git a/terraform/aws/user_data.sh b/terraform/aws/user_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..bd3257338016f267d7ab36564663d42513790a94 --- /dev/null +++ b/terraform/aws/user_data.sh @@ -0,0 +1,152 @@ +#!/bin/bash +set -e + +# Log everything +exec > >(tee /var/log/user-data.log) +exec 2>&1 + +echo "Starting postgres_ai monitoring installation..." + +# Update system +apt-get update +apt-get upgrade -y + +# Install Docker +curl -fsSL https://get.docker.com -o get-docker.sh +sh get-docker.sh +systemctl enable docker +systemctl start docker + +# Install Docker Compose +curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose +chmod +x /usr/local/bin/docker-compose + +# Create postgres_ai user +useradd -m -s /bin/bash postgres_ai +usermod -aG docker postgres_ai + +# Mount and prepare data volume +if [ ! -d /data ]; then + mkdir -p /data + + # Wait for volume to be attached + sleep 10 + + # Check if volume exists and format if needed + if [ -e /dev/nvme1n1 ]; then + DEVICE=/dev/nvme1n1 + elif [ -e /dev/xvdf ]; then + DEVICE=/dev/xvdf + else + echo "Data volume not found, using root volume" + DEVICE="" + fi + + if [ -n "$DEVICE" ]; then + # Check if filesystem exists + if ! blkid $DEVICE; then + mkfs.ext4 $DEVICE + fi + + # Mount volume + mount $DEVICE /data + + # Add to fstab for persistence + UUID=$(blkid -s UUID -o value $DEVICE) + echo "UUID=$UUID /data ext4 defaults,nofail 0 2" >> /etc/fstab + fi +fi + +# Set permissions +chown -R postgres_ai:postgres_ai /data + +# Clone postgres_ai repository +cd /home/postgres_ai +sudo -u postgres_ai git clone https://gitlab.com/postgres-ai/postgres_ai.git + +# Configure postgres_ai +cd postgres_ai + +# Create configuration +cat > .pgwatch-config < .env < 0 } +cat > instances.yml <<'INSTANCES_EOF' +%{ for instance in monitoring_instances ~} +- name: ${instance.name} + conn_str: ${instance.conn_str} + preset_metrics: full + custom_metrics: + is_enabled: true + group: default + custom_tags: + env: ${instance.environment} + cluster: ${instance.cluster} + node_name: ${instance.node_name} + sink_type: ~sink_type~ +%{ endfor ~} +INSTANCES_EOF +%{ else } +# No monitoring instances configured - will use empty or default config +cat > instances.yml <<'INSTANCES_EOF' +# PostgreSQL instances to monitor +# Add your instances using: ./postgres_ai add-instance + +INSTANCES_EOF +%{ endif } + +# Set ownership +chown -R postgres_ai:postgres_ai /home/postgres_ai/postgres_ai + +# Create systemd service +cat > /etc/systemd/system/postgres-ai.service <<'SERVICE_EOF' +[Unit] +Description=Postgres AI Monitoring +After=docker.service +Requires=docker.service + +[Service] +Type=oneshot +RemainAfterExit=yes +WorkingDirectory=/home/postgres_ai/postgres_ai +User=postgres_ai +Group=postgres_ai + +# Start services +ExecStart=/usr/local/bin/docker-compose up -d + +# Stop services +ExecStop=/usr/local/bin/docker-compose down + +[Install] +WantedBy=multi-user.target +SERVICE_EOF + +# Enable and start service +systemctl daemon-reload +systemctl enable postgres-ai +systemctl start postgres-ai + +# Wait for services to be healthy +sleep 30 + +# Reset Grafana admin password to match terraform config +echo "Setting Grafana admin password..." +cd /home/postgres_ai/postgres_ai +docker exec grafana-with-datasources grafana-cli admin reset-admin-password "${grafana_password}" 2>/dev/null || true + +echo "Installation complete!" +echo "Access Grafana at: http://$(curl -s http://169.254.169.254/latest/meta-data/public-ipv4):3000" +echo "Username: monitor" +echo "Password: ${grafana_password}" + diff --git a/terraform/aws/validate.sh b/terraform/aws/validate.sh new file mode 100755 index 0000000000000000000000000000000000000000..d6b4e289da43bb058b8c673a2e6ce28ba56f9241 --- /dev/null +++ b/terraform/aws/validate.sh @@ -0,0 +1,63 @@ +#!/bin/bash +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo "Validating Terraform configuration..." +echo + +# Check terraform +if ! command -v terraform &> /dev/null; then + echo -e "${RED}ERROR: Terraform not installed${NC}" + exit 1 +fi +echo -e "${GREEN}OK${NC} Terraform $(terraform version -json | grep -o '"version":"[^"]*' | cut -d'"' -f4)" + +# Check AWS CLI +if command -v aws &> /dev/null && aws sts get-caller-identity &> /dev/null 2>&1; then + ACCOUNT=$(aws sts get-caller-identity --query Account --output text) + echo -e "${GREEN}OK${NC} AWS credentials (Account: $ACCOUNT)" +else + echo -e "${YELLOW}WARN${NC} AWS credentials not configured" +fi + +# Init +terraform init -backend=false > /dev/null 2>&1 || { echo -e "${RED}ERROR: Terraform init failed${NC}"; exit 1; } +echo -e "${GREEN}OK${NC} Terraform init" + +# Validate +terraform validate > /dev/null 2>&1 || { echo -e "${RED}ERROR: Validation failed${NC}"; terraform validate; exit 1; } +echo -e "${GREEN}OK${NC} Configuration valid" + +# Check terraform.tfvars +if [ ! -f "terraform.tfvars" ]; then + echo -e "${RED}ERROR: terraform.tfvars not found${NC}" + echo "Run: cp terraform.tfvars.example terraform.tfvars" + exit 1 +fi + +# Check required variables +grep -q "ssh_key_name.*=" terraform.tfvars && ! grep -q 'ssh_key_name.*=.*""' terraform.tfvars || \ + { echo -e "${RED}ERROR: ssh_key_name not set in terraform.tfvars${NC}"; exit 1; } + +echo -e "${GREEN}OK${NC} Required variables configured" + +# Plan +echo +echo "Running terraform plan..." +if terraform plan -out=tfplan > /tmp/tfplan.log 2>&1; then + RESOURCES=$(terraform show -json tfplan 2>/dev/null | grep -o '"to_create":[0-9]*' | cut -d: -f2) + echo -e "${GREEN}OK${NC} Plan successful (${RESOURCES} resources to create)" +else + echo -e "${RED}ERROR: Plan failed${NC}" + cat /tmp/tfplan.log + exit 1 +fi + +echo +echo "Validation complete. Ready to deploy." +echo "Run: terraform apply tfplan" +echo diff --git a/terraform/aws/variables.tf b/terraform/aws/variables.tf new file mode 100644 index 0000000000000000000000000000000000000000..736cf3a72fa7f953b5b35acd3ce4e305d58e1c47 --- /dev/null +++ b/terraform/aws/variables.tf @@ -0,0 +1,91 @@ +variable "aws_region" { + description = "AWS region" + type = string + default = "us-east-1" +} + +variable "environment" { + description = "Environment name" + type = string + default = "production" +} + +variable "instance_type" { + description = "EC2 instance type" + type = string + default = "t3.medium" +} + +variable "data_volume_size" { + description = "Size of EBS data volume in GiB" + type = number + default = 50 +} + +variable "data_volume_type" { + description = "EBS volume type for data disk (gp3 for SSD, st1 for HDD throughput optimized, sc1 for HDD cold)" + type = string + default = "gp3" +} + +variable "root_volume_type" { + description = "EBS volume type for root disk (gp3 for SSD, gp2 for older SSD)" + type = string + default = "gp3" +} + +variable "ssh_key_name" { + description = "Name of SSH key pair for EC2 access" + type = string +} + +variable "allowed_ssh_cidr" { + description = "CIDR blocks allowed for SSH access" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "allowed_cidr_blocks" { + description = "CIDR blocks allowed for Grafana access" + type = list(string) + default = ["0.0.0.0/0"] +} + +variable "use_elastic_ip" { + description = "Allocate Elastic IP for stable address" + type = bool + default = true +} + +variable "grafana_password" { + description = "Grafana admin password (optional, defaults to 'demo')" + type = string + default = "demo" + sensitive = true +} + +variable "postgres_ai_api_key" { + description = "PostgresAI API key (optional)" + type = string + default = "" + sensitive = true +} + +variable "monitoring_instances" { + description = "PostgreSQL instances to monitor" + type = list(object({ + name = string + conn_str = string + environment = string + cluster = string + node_name = string + })) + default = [] +} + +variable "enable_demo_db" { + description = "Enable demo database" + type = bool + default = false +} +