kb.erickguedes.com
PostgreSQL: Banco Relacional Avançado

Tuning e Monitoramento

Aula 7 de 7

postgresql.conf — Parâmetros Críticos

# postgresql.conf — configurações principais

# ─── Memória ──────────────────────────────────────────────────
shared_buffers = '4GB'              # 25% da RAM total
effective_cache_size = '12GB'       # 75% da RAM total
work_mem = '64MB'                   # por operação de sort/hash
maintenance_work_mem = '1GB'        # VACUUM, CREATE INDEX
wal_buffers = '64MB'                # 16MB default (shared_buffers/32)

# ─── Conexões ────────────────────────────────────────────────
max_connections = 200               # cada conexão ~10MB
superuser_reserved_connections = 5

# ─── Planner ──────────────────────────────────────────────────
random_page_cost = 1.1              # 4.0 para HDD, 1.1 para SSD
effective_io_concurrency = 200      # 1 para HDD, 200 para SSD
default_statistics_target = 500     # 100 default (precisão do planner)

# ─── Checkpoint ──────────────────────────────────────────────
min_wal_size = '2GB'
max_wal_size = '8GB'
checkpoint_completion_target = 0.9  # espalhar write no tempo
checkpoint_timeout = '15min'

# ─── WAL ─────────────────────────────────────────────────────
wal_level = replica
wal_buffers = '64MB'
wal_compression = on                # zstd/pglz (reduz I/O)

# ─── JIT ─────────────────────────────────────────────────────
jit = on                            # compilação JIT (LLVM)
jit_above_cost = 100000             # queries complexas
jit_inline_above_cost = 500000
jit_optimize_above_cost = 500000

Script de cálculo

cat > pg-tune.sh << 'SCRIPT'
#!/bin/bash
# pg-tune.sh: calcula configurações baseadas na RAM
set -euo pipefail

TOTAL_RAM=$(free -g | awk '/^Mem:/ {print $2}')
echo "RAM total: ${TOTAL_RAM}GB"
echo ""
echo "=== RECOMENDAÇÕES ==="
echo "shared_buffers = '$((TOTAL_RAM / 4))GB'"
echo "effective_cache_size = '$((TOTAL_RAM * 3 / 4))GB'"
echo "maintenance_work_mem = '$((TOTAL_RAM / 16 > 1 ? TOTAL_RAM / 16 : 1))GB'"

# work_mem: shared_buffers / max_connections / 16
echo "# work_mem = 'XXMB' (shared_buffers / max_connections / 16)"
echo "# Ajustar work_mem com cuidado (cada query pode usar múltiplas operações)"
SCRIPT

chmod +x pg-tune.sh

pg_stat_statements

-- Habilitar extensão (requer shared_preload_libraries)
-- shared_preload_libraries = 'pg_stat_statements'
-- pg_stat_statements.max = 10000
-- pg_stat_statements.track = all

CREATE EXTENSION IF NOT EXISTS pg_stat_statements;

-- Top 10 queries por tempo total
SELECT
    queryid,
    ROUND(total_exec_time::NUMERIC, 2) AS total_ms,
    calls,
    ROUND(mean_exec_time::NUMERIC, 2) AS avg_ms,
    ROUND(mean_exec_time::NUMERIC, 2) || 'ms' AS avg,
    ROUND((total_exec_time / SUM(total_exec_time) OVER()) * 100, 2) AS pct_total,
    SUBSTR(query, 1, 80) AS query_preview
FROM pg_stat_statements
WHERE query NOT LIKE '%pg_stat%'
ORDER BY total_exec_time DESC
LIMIT 10;

-- Top por I/O (blk_read_time)
SELECT
    queryid,
    calls,
    ROUND(blk_read_time::NUMERIC, 2) AS read_ms,
    ROUND(blk_write_time::NUMERIC, 2) AS write_ms,
    SUBSTR(query, 1, 80) AS query
FROM pg_stat_statements
ORDER BY blk_read_time DESC
LIMIT 10;

-- Resetar estatísticas
SELECT pg_stat_statements_reset();

Connection Pooling — PgBouncer

# Instalação
sudo apt install pgbouncer -y

# /etc/pgbouncer/pgbouncer.ini
cat << 'EOF' | sudo tee /etc/pgbouncer/pgbouncer.ini
[databases]
meubanco = host=127.0.0.1 port=5432 dbname=meubanco

[pgbouncer]
listen_addr = 0.0.0.0
listen_port = 6432
auth_type = scram-sha-256
auth_file = /etc/pgbouncer/userlist.txt

# Pool modes: session, transaction, statement
pool_mode = transaction
default_pool_size = 25
max_client_conn = 200
max_db_connections = 0
idle_transaction_timeout = 300
server_idle_timeout = 600

# Monitoring
stats_period = 60
log_connections = 1
log_disconnections = 1
EOF

# userlist.txt (SCRAM-SHA-256)
# Formato: "username" "password"
echo '"app_user" "senha123"' | sudo tee /etc/pgbouncer/userlist.txt
echo '"postgres" "admin"' | sudo tee -a /etc/pgbouncer/userlist.txt

# Iniciar
sudo systemctl start pgbouncer
sudo systemctl enable pgbouncer

# Conectar via PgBouncer
psql -h localhost -p 6432 -U app_user -d meubanco

# SHOW commands
psql -h localhost -p 6432 pgbouncer -c "SHOW POOLS;"
psql -h localhost -p 6432 pgbouncer -c "SHOW STATS;"
psql -h localhost -p 6432 pgbouncer -c "SHOW CLIENTS;"
psql -h localhost -p 6432 pgbouncer -c "SHOW SERVERS;"

Monitoring Queries

Conexões Ativas

SELECT
    pid,
    usename,
    application_name,
    client_addr,
    state,
    backend_start,
    query_start,
    state_change,
    wait_event_type,
    wait_event,
    SUBSTR(query, 1, 100) AS query
FROM pg_stat_activity
WHERE state IS NOT NULL
  AND pid <> pg_backend_pid()
ORDER BY query_start DESC;

Locks

-- Bloqueios ativos
SELECT
    blocked_locks.pid AS blocked_pid,
    blocked_activity.usename AS blocked_user,
    blocked_activity.query AS blocked_query,
    blocking_locks.pid AS blocking_pid,
    blocking_activity.usename AS blocking_user,
    blocking_activity.query AS blocking_query
FROM pg_locks blocked_locks
JOIN pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_locks blocking_locks
    ON blocking_locks.locktype = blocked_locks.locktype
    AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database
    AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
    AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
    AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
    AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
    AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
    AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
    AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
    AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
    AND blocking_locks.pid != blocked_locks.pid
JOIN pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;

-- Matar query bloqueante
SELECT pg_terminate_backend(blocking_pid) FROM ...;

Estatísticas de Tabelas

-- Tabelas com mais acesso
SELECT
    schemaname,
    relname,
    seq_scan,
    seq_tup_read,
    idx_scan,
    idx_tup_fetch,
    n_tup_ins AS inserts,
    n_tup_upd AS updates,
    n_tup_del AS deletes,
    n_live_tup AS linhas_vivas,
    n_dead_tup AS linhas_mortas,
    ROUND(n_dead_tup * 100.0 / NULLIF(n_live_tup + n_dead_tup, 0), 2) AS dead_pct,
    last_vacuum,
    last_autovacuum,
    last_analyze,
    last_autoanalyze
FROM pg_stat_user_tables
ORDER BY seq_scan + idx_scan DESC
LIMIT 20;

Estatísticas de Índices

-- Índices não utilizados
SELECT
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch,
    pg_size_pretty(pg_relation_size(indexrelid)) AS size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
  AND indexrelid NOT IN (
      SELECT indexrelid FROM pg_constraint WHERE conindid = indexrelid
  )
ORDER BY pg_relation_size(indexrelid) DESC;

-- Índices vs tabela (scan ratio)
SELECT
    relname AS tabela,
    seq_scan,
    idx_scan,
    ROUND(idx_scan * 100.0 / NULLIF(seq_scan + idx_scan, 0), 2) AS idx_pct
FROM pg_stat_user_tables
WHERE seq_scan + idx_scan > 0
ORDER BY idx_pct;

pg_stat_bgwriter

-- Estatísticas do writer de background
SELECT
    checkpoints_timed,
    checkpoints_req,
    ROUND(checkpoints_req * 100.0 / NULLIF(checkpoints_timed + checkpoints_req, 0), 2) AS req_pct,
    buffers_checkpoint,
    buffers_clean,
    buffers_backend,
    buffers_alloc,
    maxwritten_clean,
    stats_reset
FROM pg_stat_bgwriter;

-- Se checkpoints_req > 30%: aumentar max_wal_size ou checkpoint_timeout

Slow Query Log

# postgresql.conf
log_min_duration_statement = 1000         # log queries > 1s
log_line_prefix = '%t [%p]: %u@%d '      # timestamp, pid, user, db
log_checkpoints = on
log_lock_waits = on                       # log waits > deadlock_timeout
log_temp_files = 0                        # log arquivos temporários
log_autovacuum_min_duration = 1000        # log autovacuum > 1s

auto_explain

# Explicar queries lentas automaticamente
# shared_preload_libraries = 'auto_explain'

# postgresql.conf
auto_explain.log_min_duration = '5s'     # explica queries > 5s
auto_explain.log_analyze = on
auto_explain.log_buffers = on
auto_explain.log_nested_statements = on   # explica funções também
auto_explain.log_timing = on
auto_explain.log_triggers = on

pg_top e pgAdmin

# pg_top: htop para PostgreSQL
sudo apt install ptop -y  # ou pg_top
pg_top -h localhost -U postgres

# Comandos pg_top
#   q: sair
#   p: ordenar por CPU
#   m: ordenar por memória
#   t: ordenar por tempo
#   k: matar query
#   s: alterar delay
#   space: refresh

Lab: Dashboard de Monitoramento

cat > pg-monitor.sh << 'SCRIPT'
#!/bin/bash
set -euo pipefail

PGHOST="${PGHOST:-localhost}"
PGPORT="${PGPORT:-5432}"
PGUSER="${PGUSER:-postgres}"
PGDB="${PGDB:-postgres}"
REFRESH="${REFRESH:-5}"

monitor() {
    while true; do
        clear
        echo "=== PostgreSQL Monitor $(date +%H:%M:%S) ==="
        echo ""

        echo "─── CONEXÕES ATIVAS ───"
        psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDB" -At << SQL
            SELECT
                'Ativas: ' || COUNT(*) ||
                ' | Active: ' || COUNT(*) FILTER (WHERE state = 'active') ||
                ' | Idle: ' || COUNT(*) FILTER (WHERE state = 'idle') ||
                ' | Idle in tx: ' || COUNT(*) FILTER (WHERE state = 'idle in transaction')
            FROM pg_stat_activity
            WHERE pid <> pg_backend_pid();
SQL
        echo ""

        echo "─── TOP 5 QUERIES LENTAS (ativas) ───"
        psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDB" << SQL
            SELECT
                pid,
                now() - pg_stat_activity.query_start AS duration,
                state,
                SUBSTR(query, 1, 60) AS query
            FROM pg_stat_activity
            WHERE state = 'active'
              AND pid <> pg_backend_pid()
              AND query NOT LIKE '%pg_stat%'
            ORDER BY query_start
            LIMIT 5;
SQL
        echo ""

        echo "─── LOCKS ───"
        psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDB" << SQL
            SELECT
                COALESCE(COUNT(*) FILTER (WHERE NOT granted), 0) AS blocked,
                COALESCE(COUNT(*) FILTER (WHERE granted), 0) AS granted
            FROM pg_locks
            WHERE locktype = 'transactionid';
SQL
        echo ""

        echo "─── TAMANHO DO DATABASE ───"
        psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDB" << SQL
            SELECT pg_size_pretty(SUM(pg_database_size(datname))) AS total
            FROM pg_database
            WHERE datistemplate = false;
SQL
        echo ""

        echo "─── CACHE HIT RATIO ───"
        psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDB" -At << SQL
            SELECT
                ROUND(
                    SUM(blks_hit) * 100.0 / NULLIF(SUM(blks_hit + blks_read), 0),
                    2
                ) || '%' AS cache_hit_ratio
            FROM pg_stat_database
            WHERE datname = current_database();
SQL
        echo ""

        echo "─── VACUUM STATUS ───"
        psql -h "$PGHOST" -p "$PGPORT" -U "$PGUSER" -d "$PGDB" << SQL
            SELECT relname, n_dead_tup, last_autovacuum, last_autoanalyze
            FROM pg_stat_user_tables
            WHERE n_dead_tup > 1000
            ORDER BY n_dead_tup DESC
            LIMIT 5;
SQL

        sleep "$REFRESH"
    done
}

monitor
SCRIPT

chmod +x pg-monitor.sh

shared_buffers (25% RAM) e effective_cache_size (75% RAM) são os parâmetros mais impactantes. pg_stat_statements revela queries problemáticas. PgBouncer reduz overhead de conexões. auto_explain captura plans de queries lentas. pg_stat_activity + pg_locks diagnosticam bloqueios.