#保证每个node节点只有1个同服务类似的pod,使用了node的反亲和性,升级时手动再维护uptag后的自增数字即可
apiVersion: apps/v1 kind: Deployment metadata: name: nginx-test labels: app: nginx1 spec: replicas: 2 selector: matchLabels: app: nginx1 template: metadata: labels: app: nginx1 uptag: nginx0 spec: containers: - name: nginx-testpod image: nginx env: - name: pwda value: xx1122 - name: xxoxx value: pppp123a2 ports: - containerPort: 80 affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - key: uptag operator: In values: - nginx0 topologyKey: "kubernetes.io/hostname"
#nfs-subdir-external-provisioner
#nfs-client-provisioner (已经过期 比较老的东西) https://github.com/kubernetes-retired/external-storage/tree/master/nfs-client #nfs-subdir-external-provisioner (新版本 支持pathPattern) https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/ helm repo update helm install nfs-subdir-external-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \ --set nfs.server=x.x.x.x \ --set nfs.path=/exported/path kubectl create ns nfs-sc-default helm install nfs-subdir-external-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \ --set storageClass.name=nfs-sc-default \ --set nfs.server=192.168.3.244 \ --set nfs.path=/nfs \ --set storageClass.defaultClass=true \ -n nfs-sc-default kubectl get sc kubectl get pv -A kubectl get pvc -A # 可设置sc中的pathPattern参数,保证nfs服务器的目录层级更好管理 #test-pvc.yaml kind: PersistentVolumeClaim apiVersion: v1 metadata: name: test-claim spec: storageClassName: nfs-sc-default accessModes: - ReadWriteMany resources: requests: storage: 1Mi #test-pod.yaml kind: Pod apiVersion: v1 metadata: name: test-pod spec: containers: - name: test-pod image: busybox:stable command: - "/bin/sh" args: - "-c" - "touch /mnt/SUCCESS && exit 0 || exit 1" volumeMounts: - name: nfs-pvc mountPath: "/mnt" restartPolicy: "Never" volumes: - name: nfs-pvc persistentVolumeClaim: claimName: test-claim #volumeClaimTemplates例子 #subPath 可删除 #test-ss.yaml apiVersion: apps/v1 kind: StatefulSet metadata: name: nginx-ss labels: appEnv: sit namespace: default spec: serviceName: nginx-ssa replicas: 3 selector: matchLabels: appEnv: sit template: metadata: labels: appEnv: sit spec: dnsPolicy: ClusterFirst containers: - name: nginx image: nginx ports: - containerPort: 8848 imagePullPolicy: IfNotPresent volumeMounts: - name: data mountPath: /mnt subPath: data volumeClaimTemplates: - metadata: name: data spec: storageClassName: "nfs-sc-default" accessModes: - "ReadWriteMany" resources: requests: storage: 10Mi #另附nfs-server的安装过程 #debian11 nfs-server安装 apt-get install nfs-kernel-server mkdir /nfs echo "/nfs *(rw,sync,no_subtree_check,no_root_squash)" >> /etc/exports systemctl restart nfs-server systemctl status nfs-server #nfs-client apt-get install nfs-common systemctl enable rpcbind && systemctl start rpcbind mount 192.168.3.244:/nfs /mnt
#将metrics-server提供外部访问
#将metrics-server服务设置为NodePort kubectl patch svc -n kube-system metrics-server -p '{"spec": {"type": "NodePort"}}' #获取token kubectl get secrets -n kube-system metrics-server-token-zj94q -o jsonpath={".data.token"} | base64 -d #添加reset http请求的权限 #不添加会出现错误日志 "message": "forbidden: User \"system:serviceaccount:kube-system:metrics-server\" cannot get path \"/metrics\"" kubectl edit clusterroles.rbac.authorization.k8s.io system:metrics-server - apiGroups: - "" resources: - pods - nodes verbs: - get - list - watch #添加如下 - nonResourceURLs: - /metrics - /node/metrics verbs: - get - list #构造curl请求 curl -k -H 'Authorization: Bearer xxxxxxxxxx' https://192.168.3.231:30847/metrics
#安装metrics-server
#官网 https://github.com/kubernetes-sigs/metrics-server 0.6.x metrics.k8s.io/v1beta1 1.19+ 0.5.x metrics.k8s.io/v1beta1 *1.8+ #1.19+ kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.6.2/components.yaml #1.8+ kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.5.2/components.yaml #注意 yaml文件中需要增加 containers: - args: - --cert-dir=/tmp - --secure-port=4443 - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname - --kubelet-use-node-status-port - --metric-resolution=15s - --kubelet-insecure-tls #新增加 不然会出现 x509: cannot validate certificate 的错误
#创建serviceaccount并绑定集群角色cluster-admin:
kubectl create serviceaccount sa-xxx kubectl create clusterrolebinding sa-xxx-cluster-admin --clusterrole='cluster-admin' --serviceaccount=default:sa-xxx #同时获取对应的token TOKEN=$(kubectl get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='sa-xxx')].data.token}"|base64 -d)
#kubectl查看详细的执行curl命令的过程
kubectl get nodes --v=10
#docker通过镜像查看文件
#确定镜像及标签 docker images #将镜像保存成文件 docker save -o /tmp/xx.tar xxxxx:latest #将保存后的文件复制到其它文件夹解压 解压其中每层的tar文件 tar xvf xxxxlayer.tar
#k8s强制删除pod
kubectl delete -n kube-system pod xxxxx --grace-period=0 --force
#calico的ebfp模式
#内核 #rhel 4.19+ #debian ubuntu 5.3+ #debian10 升级内核 echo "deb http://deb.debian.org/debian buster-backports main" > /etc/apt/sources.list.d/backports.list apt update apt -t buster-backports install linux-image-amd64 apt -t buster-backports install linux-headers-amd64 update-grub init 6 #centos升级内核 rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org yum install -y https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm yum --disablerepo="*" --enablerepo="elrepo-kernel" list available yum --enablerepo=elrepo-kernel install kernel-lt-devel kernel-lt -y awk -F\' '$1=="menuentry " {print i++ " : " $2}' /etc/grub2.cfg grub2-set-default 0 grub2-mkconfig -o /boot/grub2/grub.cfg init 6 #1. 不是必须 kubectl edit cm -n kube-system calico-config "kubernetes": { "kubeconfig": "__KUBECONFIG_FILEPATH__" },#下面为新增 "dataplane": { "type": "ebpf" }, "ipip": { "enabled": false, "mode": "never" }, "features": "route-reflector,bgp,dsr" #2. calicoctl patch felixconfiguration default --patch='{"spec": {"bpfKubeProxyIptablesCleanupEnabled": false}}' calicoctl patch felixconfiguration default --patch='{"spec": {"bpfEnabled": true}}' calicoctl patch felixconfiguration default --patch='{"spec": {"bpfExternalServiceMode": "DSR"}}' kubectl patch ds -n kube-system kube-proxy --type merge -p '{"spec":{"template":{"spec":{"nodeSelector":{"non-calico": "true"}}}}}' #3.xxx.yaml kind: ConfigMap apiVersion: v1 metadata: name: kubernetes-services-endpoint namespace: kube-system data: KUBERNETES_SERVICE_HOST: "192.168.3.221" KUBERNETES_SERVICE_PORT: "6443" #检测是否成功开启了ebpf tc -s qdisc show dev enp1s0 | grep clsact
#保存secrets到文件
kubectl get secrets alertmanager-prometheus-kube-prometheus-alertmanager-generated -o json | jq '.data."alertmanager.yaml.gz"' | sed 's/"//g' | base64 -d > alertmanager.yaml
#安装prometheus-pushgateway
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo update helm install prometheus-pushgateway prometheus-community/prometheus-pushgateway #get grafana的密码 kubectl get secret --namespace monitoring grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo #cat prometheus-additional.yaml - job_name: "pushgateway" static_configs: - targets: ["prometheus-pushgateway:9091"] honor_labels: true kubectl create secret generic additional-configs --from-file=prometheus-additional.yaml -n default kubectl patch prometheuses -n default prometheus-kube-prometheus-prometheus --type='json' -p '[{ "op": "add", "path": "/spec/additionalScrapeConfigs", "value": {"name": "additional-configs", "key":"prometheus-additional.yaml"} }']
#prometheus安装相关
# #相关git仓库 https://github.com/prometheus-operator/kube-prometheus/releases #helm安装 helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo update helm show values prometheus-community/kube-prometheus-stack > values.yaml sed -i 's@ruleSelectorNilUsesHelmValues: true@ruleSelectorNilUsesHelmValues: false@' values.yaml sed -i 's@serviceMonitorSelectorNilUsesHelmValues: true@serviceMonitorSelectorNilUsesHelmValues: false@' values.yaml sed -i 's@podMonitorSelectorNilUsesHelmValues: true@podMonitorSelectorNilUsesHelmValues: false@' values.yaml sed -i 's@probeSelectorNilUsesHelmValues: true@probeSelectorNilUsesHelmValues: false@' values.yaml #helm upgrade --install prometheus prometheus-community/kube-prometheus-stack -f values.yaml #安装过的更新 helm install prometheus prometheus-community/kube-prometheus-stack -f values.yaml #或者 helm install prometheus prometheus-community/kube-prometheus-stack kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus --type merge -p '{"spec": {"podMonitorSelector": null}}' kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus -p '{"spec": {"podMonitorSelector": {}}}}' kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus --type merge -p '{"spec": {"probeSelector": null}}' kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus --type merge -p '{"spec": {"probeSelector": {}}}' kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus --type merge -p '{"spec": {"ruleSelector": null}}' kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus --type merge -p '{"spec": {"ruleSelector": {}}}' kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus --type merge -p '{"spec": {"serviceMonitorSelector": null}}' kubectl patch Prometheus -n default prometheus-kube-prometheus-prometheus --type merge -p '{"spec": {"serviceMonitorSelector": {}}}' kubectl delete po prometheus-prometheus-kube-prometheus-prometheus-0 #把svc设置为nodeport提供对外访问 #grafane默认的用户名/密码为:admin/prom-operator kubectl patch svc -n default prometheus-kube-prometheus-prometheus -p '{"spec": {"type": "NodePort"}}' kubectl patch svc -n default prometheus-grafana -p '{"spec": {"type": "NodePort"}}' #用于测试的yaml apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: app: node-exporter-ext1 prometheus: ipaas name: node-exporter-ext1 spec: endpoints: - interval: 30s port: http-6580 scheme: http path: /actuator/prometheus jobLabel: app namespaceSelector: matchNames: - monitoring selector: {} [root@k8s-master tmp2]# cat podm.yaml apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: name: sb-api labels: group: sportsbook spec: selector: matchLabels: app: sportsbook-api podMetricsEndpoints: - port: web path: /actuator/prometheus interval: 30s [root@k8s-master tmp2]# cat app1.yaml apiVersion: apps/v1 kind: Deployment metadata: labels: app: sportsbook-api name: sb-api annotations: prometheus.io/scrape: "true" prometheus.io/probe: "true" prometheus.io/port: "8080" prometheus.io/path: "/actuator/prometheus" spec: replicas: 5 selector: matchLabels: app: sportsbook-api template: metadata: labels: app: sportsbook-api spec: containers: - image: platardev/springboot-prometheus-micrometer-servicemonitor name: sportsbook-api imagePullPolicy: IfNotPresent ports: - name: web containerPort: 8080 --- kind: Service apiVersion: v1 metadata: name: sb-api labels: app: sportsbook-api spec: type: NodePort ports: - name: web port: 8080 nodePort: 30901 selector: app: sportsbook-api #docker中的有springboot-prometheus的用于测试的镜像 #springboot的metrics地址为/actuator/prometheus #hub 关键字 springboot-prometheus # https://hub.docker.com/search?q=springboot-prometheus #其它 #使kube-proxy能被prometheus的target所连接 $ kubectl edit cm/kube-proxy -n kube-system ## Change from metricsBindAddress: 127.0.0.1:10249 ### <--- Too secure ## Change to metricsBindAddress: 0.0.0.0:10249 $ kubectl delete pod -l k8s-app=kube-proxy -n kube-system #其它组件中的绑定地址由127.0.0.1 改为0.0.0.0 在 /etc/kubernetes/manifests/*.yaml 只用修改yaml文件即能自动应用配置,不用手动重启相关pod #默认的账号权限是已经添加了的,若有权限不足够的情况 可参考 apiVersion: v1 kind: ServiceAccount metadata: name: prometheus namespace: monitoring --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: prometheus1 rules: - apiGroups: [""] resources: - nodes - nodes/metrics - services - endpoints - pods verbs: ["get", "list", "watch"] - apiGroups: [""] resources: - configmaps verbs: ["get"] - apiGroups: - networking.k8s.io resources: - ingresses verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus1 roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: prometheus1 subjects: - kind: ServiceAccount name: prometheus-k8s namespace: monitoring #日志查看 kubectl logs -f --tail 10 prometheus-prometheus-kube-prometheus-prometheus-0 #参考资料 https://yunlzheng.gitbook.io/prometheus-book/part-iii-prometheus-shi-zhan/readmd/use-prometheus-monitor-kubernetes #k8s book https://www.qikqiak.com/k8s-book/docs/60.Prometheus%20Operator%E9%AB%98%E7%BA%A7%E9%85%8D%E7%BD%AE.html
# calico 下, 若需要集群外机器通过添加路由的方式直接访问pod地址(不开calico bgp代理模式),可添加此内核参数
net.ipv4.conf.tunl0.rp_filter = 0 net.ipv4.conf.all.rp_filter = 0 # 参考 https://imroc.cc/kubernetes/tencent/faq/modify-rp-filter-causing-exception.html # 其它网卡接口可考虑是否关闭 net.ipv4.conf.all.rp_filter=0 net.ipv4.conf.eth0.rp_filter=0 net.ipv4.conf.default.rp_filter = 0 net.ipv4.conf.lo.rp_filter = 0 net.ipv4.conf.docker0.rp_filter = 0 # 通过不断对比尝试发现的,相关参数供参考与搜索 net.ipv4.conf.default.accept_source_route = 1 net.ipv4.conf.default.promote_secondaries = 0 net.ipv4.conf.default.rp_filter = 0 net.ipv4.conf.docker0.accept_source_route = 1 net.ipv4.conf.docker0.promote_secondaries = 0 net.ipv4.conf.docker0.rp_filter = 0 net.ipv4.conf.enp1s0.accept_source_route = 1 net.ipv4.conf.enp1s0.promote_secondaries = 0 net.ipv4.conf.enp1s0.rp_filter = 0 net.ipv4.conf.lo.accept_source_route = 1 net.ipv4.conf.lo.promote_secondaries = 0 net.ipv4.conf.lo.rp_filter = 0 net.ipv4.conf.tunl0.accept_source_route = 1 net.ipv4.conf.tunl0.promote_secondaries = 0 net.ipv4.conf.tunl0.rp_filter = 0 net.ipv4.conf.all.promote_secondaries = 0 net.ipv4.conf.all.rp_filter = 0