Kubectl Command Guide

Troubleshooting and Debugging

Pod Troubleshooting

# Debug running pod
kubectl debug POD_NAME -it \
    --image=busybox \
    --target=CONTAINER_NAME
 
# Create debugging pod
kubectl run debug-pod \
    --image=nicolaka/netshoot \
    --rm -it -- /bin/bash
 
# Check pod events
kubectl get events --field-selector involvedObject.name=POD_NAME
 
# Get pod details
kubectl get pod POD_NAME -o yaml
kubectl describe pod POD_NAME
 
# Check pod logs with previous instances
kubectl logs POD_NAME --previous
kubectl logs POD_NAME -c CONTAINER_NAME --previous
 
# Port forward for debugging
kubectl port-forward pod/POD_NAME 8080:80

Cluster Diagnostics

# Check cluster health
kubectl get componentstatuses
kubectl get nodes -o custom-columns=NAME:.metadata.name,STATUS:.status.conditions[?(@.type=="Ready")].status
 
# Check system pods
kubectl get pods -n kube-system
kubectl top nodes
kubectl top pods --all-namespaces
 
# API server availability
kubectl get --raw /healthz
kubectl get --raw /metrics
 
# Check cluster events
kubectl get events --sort-by=.metadata.creationTimestamp
 
# Audit pod placement
kubectl get pods -o wide --all-namespaces | \
    grep -v Running | \
    grep -v Completed

Network Diagnostics

# Test network connectivity
kubectl run test-dns \
    --image=busybox:1.28 \
    --rm -it \
    -- nslookup kubernetes.default
 
# Check service endpoints
kubectl get endpoints SERVICE_NAME
kubectl describe endpoints SERVICE_NAME
 
# Test service connectivity
kubectl run curl \
    --image=curlimages/curl \
    --rm -it \
    -- curl http://SERVICE_NAME.NAMESPACE.svc.cluster.local
 
# DNS debugging
kubectl run dnsutils \
    --image=gcr.io/kubernetes-e2e-test-images/dnsutils:1.3 \
    --rm -it \
    -- bash

Monitoring and Logging

Resource Monitoring

# Monitor resource usage
kubectl top nodes --sort-by=cpu
kubectl top pods --sort-by=memory
 
# Get metrics for specific pod
kubectl top pod POD_NAME --containers
 
# Monitor pod resource usage over time
kubectl top pods --all-namespaces \
    --sort-by=cpu \
    --no-headers | \
    while read line; do
        echo "$(date '+%Y-%m-%d %H:%M:%S') $line" >> pod_metrics.log
    done

Log Collection

# Collect logs from all containers
kubectl logs -l app=nginx --all-containers=true
 
# Export logs to file
kubectl logs POD_NAME > pod.log
 
# Stream logs from multiple pods
kubectl logs -f -l app=nginx --all-containers=true
 
# Get logs with timestamp
kubectl logs POD_NAME --timestamps=true
 
# Aggregate logs script
cat <<'EOF' > collect-logs.sh
#!/bin/bash
NAMESPACE=$1
LABEL_SELECTOR=$2
OUTPUT_DIR="cluster-logs-$(date +%Y%m%d-%H%M%S)"
 
mkdir -p "$OUTPUT_DIR"
 
# Collect pod logs
kubectl get pods -n "$NAMESPACE" -l "$LABEL_SELECTOR" -o name | \
while read pod; do
    pod_name=$(basename "$pod")
    mkdir -p "$OUTPUT_DIR/$pod_name"
    
    # Get pod details
    kubectl describe pod "$pod_name" -n "$NAMESPACE" > \
        "$OUTPUT_DIR/$pod_name/describe.txt"
    
    # Get container logs
    kubectl get pod "$pod_name" -n "$NAMESPACE" \
        -o jsonpath='{.spec.containers[*].name}' | \
    tr ' ' '\n' | \
    while read container; do
        kubectl logs "$pod_name" -c "$container" -n "$NAMESPACE" \
            --timestamps > \
            "$OUTPUT_DIR/$pod_name/$container.log"
    done
done
 
# Collect events
kubectl get events -n "$NAMESPACE" \
    --sort-by='.lastTimestamp' > \
    "$OUTPUT_DIR/events.txt"
 
# Create archive
tar czf "$OUTPUT_DIR.tar.gz" "$OUTPUT_DIR"
rm -rf "$OUTPUT_DIR"
EOF
 
chmod +x collect-logs.sh

Advanced Scripting and Automation

Deployment Automation

# Rolling update script
cat <<'EOF' > rolling-update.sh
#!/bin/bash
DEPLOYMENT=$1
NEW_IMAGE=$2
NAMESPACE=${3:-default}
 
# Update image
kubectl set image deployment/$DEPLOYMENT \
    *=$NEW_IMAGE -n $NAMESPACE
 
# Watch rollout status
kubectl rollout status deployment/$DEPLOYMENT -n $NAMESPACE
 
# Verify deployment
if [ $? -eq 0 ]; then
    echo "Deployment successful"
    # Run tests here
else
    echo "Deployment failed"
    kubectl rollout undo deployment/$DEPLOYMENT -n $NAMESPACE
    exit 1
fi
EOF
 
# Health check script
cat <<'EOF' > health-check.sh
#!/bin/bash
NAMESPACE=${1:-default}
THRESHOLD=${2:-80}
 
check_resources() {
    # Check CPU usage
    kubectl top pods -n $NAMESPACE | \
    awk -v threshold=$THRESHOLD 'NR>1 {
        split($3, cpu, "m")
        if (cpu[1] > threshold) {
            print $1 " CPU: " $3
        }
    }'
 
    # Check memory usage
    kubectl top pods -n $NAMESPACE | \
    awk -v threshold=$THRESHOLD 'NR>1 {
        split($4, mem, "Mi")
        if (mem[1] > threshold) {
            print $1 " Memory: " $4
        }
    }'
}
 
# Check pod status
kubectl get pods -n $NAMESPACE | \
awk 'NR>1 && $3!="Running" && $3!="Completed" {
    print "Pod " $1 " is in state " $3
}'
 
# Run resource checks
check_resources
EOF

Backup and Restore

# Backup script
cat <<'EOF' > backup-resources.sh
#!/bin/bash
BACKUP_DIR="k8s-backup-$(date +%Y%m%d-%H%M%S)"
mkdir -p "$BACKUP_DIR"
 
# Backup all resources
for resource in $(kubectl api-resources --verbs=list --namespaced -o name); do
    echo "Backing up $resource"
    kubectl get "$resource" \
        --all-namespaces \
        -o yaml > \
        "$BACKUP_DIR/$resource.yaml"
done
 
# Backup non-namespaced resources
for resource in $(kubectl api-resources --verbs=list --namespaced=false -o name); do
    echo "Backing up $resource"
    kubectl get "$resource" \
        -o yaml > \
        "$BACKUP_DIR/$resource.yaml"
done
 
# Create archive
tar czf "$BACKUP_DIR.tar.gz" "$BACKUP_DIR"
rm -rf "$BACKUP_DIR"
EOF
 
# Restore script
cat <<'EOF' > restore-resources.sh
#!/bin/bash
BACKUP_FILE=$1
 
if [ ! -f "$BACKUP_FILE" ]; then
    echo "Backup file not found"
    exit 1
fi
 
# Extract backup
tar xzf "$BACKUP_FILE"
BACKUP_DIR=$(basename "$BACKUP_FILE" .tar.gz)
 
# Restore resources
for yaml in "$BACKUP_DIR"/*.yaml; do
    echo "Restoring $(basename "$yaml")"
    kubectl apply -f "$yaml"
done
 
rm -rf "$BACKUP_DIR"
EOF

Best Practices

Security Best Practices

# Pod security context
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: secure-pod
spec:
  securityContext:
    runAsNonRoot: true
    runAsUser: 1000
    fsGroup: 2000
  containers:
  - name: app
    image: nginx
    securityContext:
      allowPrivilegeEscalation: false
      readOnlyRootFilesystem: true
      capabilities:
        drop:
        - ALL
EOF
 
# Network policies
cat <<EOF | kubectl apply -f -
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: default-deny
spec:
  podSelector: {}
  policyTypes:
  - Ingress
  - Egress
EOF

Resource Management Best Practices

# Resource requests and limits
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: resource-pod
spec:
  containers:
  - name: app
    image: nginx
    resources:
      requests:
        memory: "64Mi"
        cpu: "250m"
      limits:
        memory: "128Mi"
        cpu: "500m"
EOF
 
# Horizontal Pod Autoscaling
cat <<EOF | kubectl apply -f -
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: app-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: app
  minReplicas: 2
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 80
EOF

Remember:

Always use resource requests and limits
Implement proper security contexts
Use network policies
Regular backup of critical resources
Monitor resource usage
Implement proper logging
Use namespaces for isolation
Regular security audits
Document all configurations
Use version control for manifests

For detailed information, consult the Kubernetes documentation and kubectl command reference (kubectl help).

Would you like me to cover any specific aspect in more detail?

cli.wiki

Explorer

Kubectl Command Guide - Part 3

Troubleshooting and Debugging

Pod Troubleshooting

Cluster Diagnostics

Network Diagnostics

Monitoring and Logging

Resource Monitoring

Log Collection

Advanced Scripting and Automation

Deployment Automation

Backup and Restore

Best Practices

Security Best Practices

Resource Management Best Practices

Graph View

Table of Contents

Backlinks