Prometheus는 이벤트 모니터링 및 알림 등에 사용되는 오픈소스 시계열 DB이다. 라이선스는 APL2.0이다. PromQL을 이용해 데이터에 접근할 수 있다.
# Prometheus user 생성
useradd haedong
# Prometheus Log 디렉토리 생성 및 권한 부여
mkdir -p /var/log/prometheus
chown -R haedong /var/log/prometheus
# TSDB 디렉토리 생성 및 권한 부여
mkdir -p /xvdb/prometheus/tsdb
chown -R haedong /xvdb/prometheus
# prometheus user 환경 변수
sudo -i -u haedong
cat <<EOF | sudo tee /home/haedong/.bash_profile
export HOME=/home/haedong
export PROMETHEUS_HOME=\$HOME/prometheus
export PATH=\$PATH:\$PROMETHEUS_HOME/bin
EOF
source ~/.bash_profile
wget https://github.com/prometheus/prometheus/releases/download/v2.53.3/prometheus-2.53.3.linux-amd64.tar.gz
tar -xvzf prometheus-2.53.3.linux-amd64.tar.gz
mkdir -p $HOME/apps
mv prometheus-2.53.3.linux-amd64 $HOME/apps/
ln -s /home/haedong/apps/prometheus-2.53.3.linux-amd64 $HOME/prometheus
mkdir $PROMETHEUS_HOME/bin
mv $PROMETHEUS_HOME/prometheus $PROMETHEUS_HOME/bin/
mv $PROMETHEUS_HOME/promtool $PROMETHEUS_HOME/bin/
mkdir $PROMETHEUS_HOME/conf
mv $PROMETHEUS_HOME/prometheus.yml $PROMETHEUS_HOME/conf
$PROMETHEUS_HOME/conf/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# 기본 설정
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "K8s Master node status"
static_configs:
- targets: ["HOST.DOMAIN.NAME:60001", "IP.ADDR.NUM:60001"]
# 수집 대상에 TLS 설정, 인증 설정 등이 적용 돼있을 경우
- job_name: "Node Information https"
scheme: https
metrics_path: '/actuator/nodes'
authorization:
type: Bearer
credentials: ${BEARER_TOKEN}
tls_config:
insecure_skip_verify: true
static_configs:
- targets: ["HOST.DOMAIN.NAME:7777", "IP.ADDR.NUM:7777"]
$PROMETHEUS_HOME/conf/web.yml
tls_server_config:
# Certificate and key files for server to use to authenticate to client.
cert_file: /home/haedong/prometheus/certs/haedongg.net.crt
key_file: /home/haedong/prometheus/certs/haedongg.net.key
# TLS를 적용하지 않는 경우 --web.config.file 플래그는 넣지 않는다.
# 재기동 없이 설정 변경 적용을 위해 --web.enable-lifecycle 를 추가한다.
nohup $PROMETHEUS_HOME/bin/prometheus \
--config.file=$PROMETHEUS_HOME/conf/prometheus.yml \
--web.listen-address="0.0.0.0:9443" \
--web.config.file=$PROMETHEUS_HOME/conf/web.yml \
--storage.tsdb.path=/xvdb/prometheus/tsdb \
--storage.tsdb.retention.time=30d \
--storage.tsdb.retention.size=100GB \
--web.enable-lifecycle \
--log.level=info \
> /var/log/prometheus/prometheus.log 2>&1 &
$PROMETHEUS_HOME/prometheus.sh
#!/bin/bash
# Prometheus variables
PROMETHEUS_HOME="/home/haedong/prometheus"
PROMETHEUS_BIN="$PROMETHEUS_HOME/bin/prometheus"
PROMETHEUS_CONFIG="$PROMETHEUS_HOME/conf/prometheus.yml"
PROMETHEUS_WEB_CONFIG="$PROMETHEUS_HOME/conf/web.yml"
TSDB_DIR="/xvdb/prometheus/tsdb"
PID_FILE="$PROMETHEUS_HOME/prometheus.pid"
PROMETHEUS_LOG_DIR=/var/log/prometheus
TSDB_RETENTION_PERIOD=30d
TSDB_RETENTION_SIZE=100GB
# Function to start Prometheus
start_prometheus() {
if [ -f "$PID_FILE" ]; then
echo "Prometheus is already running (PID $(cat $PID_FILE))."
else
echo "Starting Prometheus..."
nohup $PROMETHEUS_BIN --config.file=$PROMETHEUS_CONFIG \
--web.listen-address="0.0.0.0:9443" \
--web.config.file=$PROMETHEUS_WEB_CONFIG \
--storage.tsdb.path=$TSDB_DIR \
--storage.tsdb.retention.time=$TSDB_RETENTION_PERIOD \
--storage.tsdb.retention.size=$TSDB_RETENTION_SIZE \
--web.enable-lifecycle \
--log.level=info \
> $PROMETHEUS_LOG_DIR/prometheus.log 2>&1 &
echo $! > $PID_FILE
echo "Prometheus started with PID $(cat $PID_FILE)."
fi
}
# Function to stop Prometheus
stop_prometheus() {
if [ -f "$PID_FILE" ]; then
PID=$(cat $PID_FILE)
echo "Stopping Prometheus (PID $PID)..."
kill $PID
rm -f $PID_FILE
echo "Prometheus stopped."
else
echo "Prometheus is not running."
fi
}
# Function to restart Prometheus
restart_prometheus() {
stop_prometheus
start_prometheus
}
# Function to check status
status_prometheus() {
if [ -f "$PID_FILE" ]; then
echo "Prometheus is running (PID $(cat $PID_FILE))."
else
echo "Prometheus is not running."
fi
}
# Main script execution
case "$1" in
start) start_prometheus ;;
stop) stop_prometheus ;;
restart) restart_prometheus ;;
status) status_prometheus ;;
*) echo "Usage: $0 {start|stop|restart|status}"; exit 1 ;;
esac
curl -X POST https://prometheus.haedongg.net:9443/-/reload
