Jenkins Monitoring and Observability
Jenkins Monitoring and Observability
Introduction to Monitoring
Why Monitor Jenkins?
- Performance optimization
- Resource utilization
- Capacity planning
- Problem detection
- Security monitoring
Key Metrics
- Build Statistics
- Queue Length
- Resource Usage
- Response Time
- Error Rates
Monitoring Infrastructure
Metrics Collection
// Prometheus Configuration
jenkins:
metrics:
prometheus:
enabled: true
path: /prometheus
defaultMetrics:
enabled: true
perBuildMetrics:
enabled: true
perPipelineMetrics:
enabled: true
Grafana Dashboard Setup
# Grafana Dashboard Configuration
apiVersion: 1
providers:
- name: 'Jenkins'
orgId: 1
folder: 'Jenkins'
type: file
options:
path: /var/lib/grafana/dashboards
Performance Monitoring
System Metrics
// System Metrics Collection
pipeline {
agent any
options {
timeout(time: 1, unit: 'HOURS')
timestamps()
}
stages {
stage('Monitor') {
steps {
script {
def metrics = [
cpu: sh(script: 'top -bn1 | grep "Cpu(s)" | awk "{print \$2}"', returnStdout: true).trim(),
memory: sh(script: 'free -m | grep Mem | awk "{print \$3/\$2 * 100}"', returnStdout: true).trim(),
disk: sh(script: 'df -h / | tail -1 | awk "{print \$5}"', returnStdout: true).trim()
]
echo "System Metrics:\nCPU: ${metrics.cpu}%\nMemory: ${metrics.memory}%\nDisk: ${metrics.disk}"
}
}
}
}
}
Build Performance
// Build Performance Analysis
node {
stage('Analyze Build Performance') {
def buildTime = currentBuild.duration
def queueTime = currentBuild.queueDuration
echo "Build Statistics:"
echo "Build Time: ${buildTime}ms"
echo "Queue Time: ${queueTime}ms"
if (buildTime > 300000) { // 5 minutes
warning "Build time exceeded threshold"
}
}
}
Resource Monitoring
Agent Monitoring
// Agent Resource Monitoring
jenkins:
nodes:
monitoring:
interval: 10
collectors:
- type: memory
warning: 85
error: 95
- type: diskSpace
warning: 80
error: 90
- type: responseTime
warning: 5000
error: 10000
Queue Management
// Queue Monitoring Pipeline
pipeline {
agent any
stages {
stage('Monitor Queue') {
steps {
script {
def queue = Jenkins.instance.queue
def queueItems = queue.items.length
echo "Current Queue Length: ${queueItems}"
if (queueItems > 10) {
emailext (
subject: "Jenkins Queue Alert",
body: "Queue length has exceeded threshold: ${queueItems} items",
to: 'admin@example.com'
)
}
}
}
}
}
}
Log Management
Log Configuration
# logging.properties
handlers=java.util.logging.FileHandler
java.util.logging.FileHandler.pattern=/var/log/jenkins/jenkins.log
java.util.logging.FileHandler.limit=50000000
java.util.logging.FileHandler.count=10
java.util.logging.FileHandler.formatter=java.util.logging.SimpleFormatter
Log Analysis
// Log Analysis Pipeline
pipeline {
agent any
stages {
stage('Analyze Logs') {
steps {
script {
def errorCount = sh(
script: 'grep -c "ERROR" /var/log/jenkins/jenkins.log',
returnStdout: true
).trim().toInteger()
if (errorCount > 100) {
error "High number of errors detected: ${errorCount}"
}
// Parse and analyze specific error patterns
def outOfMemoryErrors = sh(
script: 'grep -c "OutOfMemoryError" /var/log/jenkins/jenkins.log',
returnStdout: true
).trim().toInteger()
if (outOfMemoryErrors > 0) {
warning "OutOfMemoryErrors detected: ${outOfMemoryErrors}"
}
}
}
}
}
}
Alerting and Notifications
Alert Configuration
// Alert Configuration
jenkins:
alerts:
email:
recipients: admin@example.com
threshold:
error: immediate
warning: daily
slack:
channel: '#jenkins-alerts'
token: '${SLACK_TOKEN}'
Custom Notifications
// Custom Notification Pipeline
def notifyBuildStatus(String status) {
// Slack notification
slackSend (
channel: '#jenkins-builds',
color: status == 'SUCCESS' ? 'good' : 'danger',
message: "Build ${status}: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'"
)
// Email notification
emailext (
subject: "Build ${status}: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]'",
body: """Build Status: ${status}
Job: ${env.JOB_NAME}
Build Number: ${env.BUILD_NUMBER}
Build URL: ${env.BUILD_URL}""",
recipientProviders: [[$class: 'DevelopersRecipientProvider']]
)
}
Visualization and Dashboards
Grafana Dashboard
{
"dashboard": {
"id": null,
"title": "Jenkins Overview",
"panels": [
{
"title": "Build Success Rate",
"type": "graph",
"datasource": "Prometheus",
"targets": [
{
"expr": "jenkins_builds_success_rate",
"legendFormat": "Success Rate"
}
]
},
{
"title": "Queue Length",
"type": "gauge",
"datasource": "Prometheus",
"targets": [
{
"expr": "jenkins_queue_size"
}
]
}
]
}
}
Custom Reports
// Custom Reporting Pipeline
pipeline {
agent any
stages {
stage('Generate Report') {
steps {
script {
def report = generateBuildReport()
writeFile file: 'build-report.html', text: report
publishHTML [
allowMissing: false,
alwaysLinkToLastBuild: true,
keepAll: true,
reportDir: '.',
reportFiles: 'build-report.html',
reportName: 'Build Performance Report'
]
}
}
}
}
}
def generateBuildReport() {
// Generate HTML report with build statistics
return """
<html>
<head><title>Build Report</title></head>
<body>
<h1>Build Performance Report</h1>
<p>Build Time: ${currentBuild.duration}ms</p>
<p>Result: ${currentBuild.result}</p>
</body>
</html>
"""
}
Best Practices
Monitoring Checklist
- Set up basic metrics collection
- Configure alerting thresholds
- Implement log rotation
- Create visualization dashboards
- Regular metric review
- Automated reporting
- Performance baselines
- Capacity planning
Implementation Guide
- Install monitoring plugins
- Configure metrics collection
- Set up dashboards
- Configure alerts
- Implement log management
- Create reports
- Regular maintenance
Hands-on Exercise
Exercise 1: Monitoring Setup
- Install Prometheus plugin
- Configure metrics collection
- Set up Grafana dashboard
- Configure alerts
- Test monitoring system
Exercise 2: Custom Monitoring
- Create custom metrics
- Implement monitoring pipeline
- Set up notifications
- Create custom dashboard
- Test and verify
Assessment
Knowledge Check
- What are key Jenkins metrics to monitor?
- How do you implement metrics collection?
- What are effective alerting strategies?
- How do you analyze performance data?
Practice Tasks
- Set up monitoring
- Configure alerts
- Create dashboards
- Implement reporting
Additional Resources
Documentation
Best Practices
- Regular monitoring
- Proactive alerting
- Performance optimization
- Capacity planning
Next Steps
- Review monitoring concepts
- Practice implementation
- Explore advanced features
- Study real-world scenarios