分类默认分类下的文章 - 小陈运维

登录 / 注册

chenby

累计撰写 199 篇文章
累计收到 124 条评论

搜索到 199 篇与默认分类的结果

2021-12-30
kubernetes核心实战（九） --- Ingress 14、Ingress检查是否有安装[root@k8s-master-node1 ~/yaml/test]# kubectl get pod,svc -n ingress-nginx NAME READY STATUS RESTARTS AGE pod/ingress-nginx-admission-create--1-74mtg 0/1 Completed 0 172m pod/ingress-nginx-admission-patch--1-5qrct 0/1 Completed 0 172m pod/ingress-nginx-controller-f97bd58b5-vr8c2 1/1 Running 0 172m NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE service/ingress-nginx-controller NodePort 10.96.109.80 <none> 80:30127/TCP,443:36903/TCP 172m service/ingress-nginx-controller-admission ClusterIP 10.96.215.201 <none> 443/TCP 172m [root@k8s-master-node1 ~/yaml/test]#若未安装可以查看官网文档：https://kubernetes.github.io/ingress-nginx/deploy/创建环境：[root@k8s-master-node1 ~/yaml/test]# vim ingress.yaml [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# cat ingress.yaml apiVersion: apps/v1 kind: Deployment metadata: name: hello-server spec: replicas: 2 selector: matchLabels: app: hello-server template: metadata: labels: app: hello-server spec: containers: - name: hello-server image: registry.cn-hangzhou.aliyuncs.com/lfy_k8s_images/hello-server ports: - containerPort: 9000 --- apiVersion: apps/v1 kind: Deployment metadata: labels: app: nginx-demo name: nginx-demo spec: replicas: 2 selector: matchLabels: app: nginx-demo template: metadata: labels: app: nginx-demo spec: containers: - image: nginx name: nginx --- apiVersion: v1 kind: Service metadata: labels: app: nginx-demo name: nginx-demo spec: selector: app: nginx-demo ports: - port: 8000 protocol: TCP targetPort: 80 --- apiVersion: v1 kind: Service metadata: labels: app: hello-server name: hello-server spec: selector: app: hello-server ports: - port: 8000 protocol: TCP targetPort: 9000 [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# kubectl apply -f ingress.yaml deployment.apps/hello-server created deployment.apps/nginx-demo created service/nginx-demo created service/hello-server created [root@k8s-master-node1 ~/yaml/test]#查看四层负载并测试[root@k8s-master-node1 ~/yaml/test]# kubectl get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE hello-server ClusterIP 10.96.246.46 <none> 8000/TCP 21s ingress-demo-app ClusterIP 10.96.145.40 <none> 80/TCP 3h1m kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 3h3m my-dep NodePort 10.96.241.162 <none> 8000:32306/TCP 23m nginx ClusterIP None <none> 80/TCP 115m nginx-demo ClusterIP 10.96.162.193 <none> 8000/TCP 21s [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# curl -I 10.96.162.193:8000 HTTP/1.1 200 OK Server: nginx/1.21.4 Date: Wed, 17 Nov 2021 09:49:40 GMT Content-Type: text/html Content-Length: 615 Last-Modified: Tue, 02 Nov 2021 14:49:22 GMT Connection: keep-alive ETag: "61814ff2-267" Accept-Ranges: bytes [root@k8s-master-node1 ~/yaml/test]# curl -I 10.96.246.46:8000 HTTP/1.1 200 OK Date: Wed, 17 Nov 2021 09:49:52 GMT Content-Length: 12 Content-Type: text/plain; charset=utf-8 [root@k8s-master-node1 ~/yaml/test]#创建七层[root@k8s-master-node1 ~/yaml/test]# vim ingress-7.yaml [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# cat ingress-7.yaml apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: ingress-host-bar spec: ingressClassName: nginx rules: - host: "hello.chenby.cn" http: paths: - pathType: Prefix path: "/" backend: service: name: hello-server port: number: 8000 - host: "demo.chenby.cn" http: paths: - pathType: Prefix path: "/nginx" # 把请求会转给下面的服务，下面的服务一定要能处理这个路径，不能处理就是404 backend: service: name: nginx-demo ## java，比如使用路径重写，去掉前缀nginx port: number: 8000 [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# kubectl apply -f ingress-7.yaml ingress.networking.k8s.io/ingress-host-bar created [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# kubectl get ingress NAME CLASS HOSTS ADDRESS PORTS AGE ingress-demo-app <none> app.demo.com 192.168.1.62 80 3h14m ingress-host-bar nginx hello.chenby.cn,demo.chenby.cn 192.168.1.62 80 9m50s [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# kubectl describe ingress ingress-host-bar Name: ingress-host-bar Namespace: default Address: 192.168.1.62 Default backend: default-http-backend:80 (10.244.2.7:8080) Rules: Host Path Backends ---- ---- -------- hello.chenby.cn / hello-server:8000 (10.244.2.47:9000,10.244.2.48:9000) demo.chenby.cn /nginx nginx-demo:8000 (10.244.0.13:80,10.244.1.34:80) Annotations: <none> Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Sync 6m26s (x2 over 6m50s) nginx-ingress-controller Scheduled for sync [root@k8s-master-node1 ~/yaml/test]#电脑上写死hosts[root@k8s-master-node1 ~/yaml/test]# kubectl get service ingress-demo-app NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE ingress-demo-app ClusterIP 10.96.145.40 <none> 80/TCP 3h15m [root@k8s-master-node1 ~/yaml/test]# 10.96.145.40 hello.chenby.cn 10.96.145.40 demo.chenby.cn测试访问[root@k8s-master-node1 ~/yaml/test]# curl hello.chenby.cn Hostname: ingress-demo-app-694bf5d965-8rh7f IP: 127.0.0.1 IP: 10.244.1.6 RemoteAddr: 192.168.1.61:49809 GET / HTTP/1.1 Host: hello.chenby.cn User-Agent: curl/7.68.0 Accept: */* [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# curl demo.chenby.cn Hostname: ingress-demo-app-694bf5d965-swkpb IP: 127.0.0.1 IP: 10.244.2.4 RemoteAddr: 192.168.1.61:57797 GET / HTTP/1.1 Host: demo.chenby.cn User-Agent: curl/7.68.0 Accept: */* [root@k8s-master-node1 ~/yaml/test]#url 重写[root@k8s-master-node1 ~/yaml/test]# vim ingress-url [root@k8s-master-node1 ~/yaml/test]# cat ingress-url.yaml apiVersion: networking.k8s.io/v1 kind: Ingress metadata: annotations: nginx.ingress.kubernetes.io/rewrite-target: /$2 name: ingress-host-bar spec: ingressClassName: nginx rules: - host: "hello.chenby.cn" http: paths: - pathType: Prefix path: "/" backend: service: name: hello-server port: number: 8000 - host: "demo.chenby.cn" http: paths: - pathType: Prefix path: "/nginx(/|$)(.*)" # 把请求会转给下面的服务，下面的服务一定要能处理这个路径，不能处理就是404 backend: service: name: nginx-demo ## java，比如使用路径重写，去掉前缀nginx port: number: 8000 [root@k8s-master-node1 ~/yaml/test]# kubectl apply -f ingress-url.yaml ingress.networking.k8s.io/ingress-host-bar created [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# curl hello.chenby.cn Hostname: ingress-demo-app-694bf5d965-8rh7f IP: 127.0.0.1 IP: 10.244.1.6 RemoteAddr: 192.168.1.61:42303 GET / HTTP/1.1 Host: hello.chenby.cn User-Agent: curl/7.68.0 Accept: */* [root@k8s-master-node1 ~/yaml/test]# curl demo.chenby.cn Hostname: ingress-demo-app-694bf5d965-swkpb IP: 127.0.0.1 IP: 10.244.2.4 RemoteAddr: 192.168.1.61:1108 GET / HTTP/1.1 Host: demo.chenby.cn User-Agent: curl/7.68.0 Accept: */* [root@k8s-master-node1 ~/yaml/test]#流量限制[root@k8s-master-node1 ~/yaml/test]# vim ingress-limit [root@k8s-master-node1 ~/yaml/test]# cat ingress-limit.yaml apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: ingress-limit-rate annotations: nginx.ingress.kubernetes.io/limit-rps: "1" spec: ingressClassName: nginx rules: - host: "haha.chenby.cn" http: paths: - pathType: Exact path: "/" backend: service: name: nginx-demo port: number: 8000 [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# kubectl apply -f ingress-limit.yaml ingress.networking.k8s.io/ingress-limit-rate created [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# vim /etc/hosts [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# curl haha.chenby.cn Hostname: ingress-demo-app-694bf5d965-8rh7f IP: 127.0.0.1 IP: 10.244.1.6 RemoteAddr: 192.168.1.61:1676 GET / HTTP/1.1 Host: haha.chenby.cn User-Agent: curl/7.68.0 Accept: */* [root@k8s-master-node1 ~/yaml/test]#注：可以将server 改为nodeport 即可在外部访问，将 type 改为 NodePort[root@k8s-master-node1 ~/yaml/test]# kubectl get service ingress-demo-app NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE ingress-demo-app ClusterIP 10.96.145.40 <none> 80/TCP 6h27m [root@k8s-master-node1 ~/yaml/test]# kubectl edit service ingress-demo-app service/ingress-demo-app edited [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# [root@k8s-master-node1 ~/yaml/test]# kubectl get service ingress-demo-app NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE ingress-demo-app NodePort 10.96.145.40 <none> 80:30975/TCP 6h28m [root@k8s-master-node1 ~/yaml/test]#https://blog.csdn.net/qq_33921750https://my.oschina.net/u/3981543https://www.zhihu.com/people/chen-bu-yun-2https://segmentfault.com/u/hppyvyv6/articleshttps://juejin.cn/user/3315782802482007https://space.bilibili.com/352476552/articlehttps://cloud.tencent.com/developer/column/93230知乎、CSDN、开源中国、思否、掘金、哔哩哔哩、腾讯云本文使用文章同步助手同步
- 2021年12月30日
- 485 阅读
- 0 评论
- 0 点赞
2021-12-30
Docker容器中使用GPU 背景容器封装了应用程序的依赖项，以提供可重复和可靠的应用程序和服务执行，而无需整个虚拟机的开销。如果您曾经花了一天的时间为一个科学或深度学习应用程序提供一个包含大量软件包的服务器，或者已经花费数周的时间来确保您的应用程序可以在多个 linux 环境中构建和部署，那么 Docker 容器非常值得您花费时间。安装添加docker源[root@localhost ~]# sudo yum-config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo Loaded plugins: fastestmirror, langpacks adding repo from: https://download.docker.com/linux/centos/docker-ce.repo grabbing file https://download.docker.com/linux/centos/docker-ce.repo to /etc/yum.repos.d/docker-ce.repo repo saved to /etc/yum.repos.d/docker-ce.repo [root@localhost ~]# [root@localhost ~]# cat /etc/yum.repos.d/docker-ce.repo [docker-ce-stable] name=Docker CE Stable - $basearch baseurl=https://download.docker.com/linux/centos/$releasever/$basearch/stable enabled=1 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-stable-debuginfo] name=Docker CE Stable - Debuginfo $basearch baseurl=https://download.docker.com/linux/centos/$releasever/debug-$basearch/stable enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-stable-source] name=Docker CE Stable - Sources baseurl=https://download.docker.com/linux/centos/$releasever/source/stable enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-test] name=Docker CE Test - $basearch baseurl=https://download.docker.com/linux/centos/$releasever/$basearch/test enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-test-debuginfo] name=Docker CE Test - Debuginfo $basearch baseurl=https://download.docker.com/linux/centos/$releasever/debug-$basearch/test enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-test-source] name=Docker CE Test - Sources baseurl=https://download.docker.com/linux/centos/$releasever/source/test enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-nightly] name=Docker CE Nightly - $basearch baseurl=https://download.docker.com/linux/centos/$releasever/$basearch/nightly enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-nightly-debuginfo] name=Docker CE Nightly - Debuginfo $basearch baseurl=https://download.docker.com/linux/centos/$releasever/debug-$basearch/nightly enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [docker-ce-nightly-source] name=Docker CE Nightly - Sources baseurl=https://download.docker.com/linux/centos/$releasever/source/nightly enabled=0 gpgcheck=1 gpgkey=https://download.docker.com/linux/centos/gpg [root@localhost ~]#下载安装包[root@localhost ~]# cd docker [root@localhost docker]# [root@localhost docker]# repotrack docker-ce安装docker 并设置开机自启[root@localhost docker]# yum install ./* [root@localhost docker]# systemctl start docker [root@localhost docker]# [root@localhost docker]# systemctl enable docker Created symlink from /etc/systemd/system/multi-user.target.wants/docker.service to /usr/lib/systemd/system/docker.service. [root@localhost docker]#配置nvidia-docker的源[root@localhost docker]# distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ > && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo [root@localhost docker]# cat /etc/yum.repos.d/nvidia-docker.repo [libnvidia-container] name=libnvidia-container baseurl=https://nvidia.github.io/libnvidia-container/stable/centos7/$basearch repo_gpgcheck=1 gpgcheck=0 enabled=1 gpgkey=https://nvidia.github.io/libnvidia-container/gpgkey sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt [libnvidia-container-experimental] name=libnvidia-container-experimental baseurl=https://nvidia.github.io/libnvidia-container/experimental/centos7/$basearch repo_gpgcheck=1 gpgcheck=0 enabled=0 gpgkey=https://nvidia.github.io/libnvidia-container/gpgkey sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt [nvidia-container-runtime] name=nvidia-container-runtime baseurl=https://nvidia.github.io/nvidia-container-runtime/stable/centos7/$basearch repo_gpgcheck=1 gpgcheck=0 enabled=1 gpgkey=https://nvidia.github.io/nvidia-container-runtime/gpgkey sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt [nvidia-container-runtime-experimental] name=nvidia-container-runtime-experimental baseurl=https://nvidia.github.io/nvidia-container-runtime/experimental/centos7/$basearch repo_gpgcheck=1 gpgcheck=0 enabled=0 gpgkey=https://nvidia.github.io/nvidia-container-runtime/gpgkey sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt [nvidia-docker] name=nvidia-docker baseurl=https://nvidia.github.io/nvidia-docker/centos7/$basearch repo_gpgcheck=1 gpgcheck=0 enabled=1 gpgkey=https://nvidia.github.io/nvidia-docker/gpgkey sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt [root@localhost docker]#安装下载nvidia-docker[root@localhost ~]# mkdir nvidia-docker2 [root@localhost ~]# cd nvidia-docker2 [root@localhost nvidia-docker2]# yum update -y [root@localhost nvidia-docker2]# repotrack nvidia-docker2 [root@localhost nvidia-docker2]# yum install ./* [root@localhost ~]# mkdir nvidia-container-toolkit [root@localhost ~]# cd nvidia-container-toolkit [root@localhost nvidia-container-toolkit]# repotrack nvidia-container-toolkit [root@ai-rd nvidia-container-toolkit]# yum install ./*下载镜像，并保存[root@localhost ~]# docker pull nvidia/cuda:11.0-base 11.0-base: Pulling from nvidia/cuda 54ee1f796a1e: Pull complete f7bfea53ad12: Pull complete 46d371e02073: Pull complete b66c17bbf772: Pull complete 3642f1a6dfb3: Pull complete e5ce55b8b4b9: Pull complete 155bc0332b0a: Pull complete Digest: sha256:774ca3d612de15213102c2dbbba55df44dc5cf9870ca2be6c6e9c627fa63d67a Status: Downloaded newer image for nvidia/cuda:11.0-base docker.io/nvidia/cuda:11.0-base [root@localhost ~]# [root@localhost ~]# docker images REPOSITORY TAG IMAGE ID CREATED SIZE nvidia/cuda 11.0-base 2ec708416bb8 15 months ago 122MB [root@localhost ~]# [root@localhost ~]# docker save -o cuda-11.0.tar nvidia/cuda:11.0-base [root@localhost ~]# [root@localhost ~]# ls cuda-11.0.tar cuda-11.0.tar [root@localhost ~]#在要测试的服务器上导入镜像[root@ai-rd cby]# docker load -i cuda-11.0.tar 2ce3c188c38d: Loading layer [==================================================>] 75.23MB/75.23MB ad44aa179b33: Loading layer [==================================================>] 1.011MB/1.011MB 35a91a75d24b: Loading layer [==================================================>] 15.36kB/15.36kB a4399aeb9a0e: Loading layer [==================================================>] 3.072kB/3.072kB fa39d0e9f3dc: Loading layer [==================================================>] 18.84MB/18.84MB 232fb43df6ad: Loading layer [==================================================>] 30.08MB/30.08MB 0da51e35db05: Loading layer [==================================================>] 22.53kB/22.53kB Loaded image: nvidia/cuda:11.0-base [root@ai-rd cby]# [root@ai-rd cby]# docker images | grep cuda nvidia/cuda 11.0-base 2ec708416bb8 15 months ago 122MB [root@ai-rd cby]#安装升级内核[root@ai-rd cby]# yum install kernel-headers [root@ai-rd cby]# yum install kernel-devel [root@ai-rd cby]# yum update kernel*禁用模块，并升级boot[root@ai-rd cby]# vim /etc/modprobe.d/blacklist-nouveau.conf [root@ai-rd cby]# cat /etc/modprobe.d/blacklist-nouveau.conf blacklist nouveau options nouveau modeset=0 [root@ai-rd cby]# [root@ai-rd cby]# mv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r).img.bak [root@ai-rd cby]# sudo dracut -v /boot/initramfs-$(uname -r).img $(uname -r)下载驱动并安装[root@localhost ~]# wget https://cn.download.nvidia.cn/tesla/450.156.00/NVIDIA-Linux-x86_64-450.156.00.run [root@ai-rd cby]# chmod +x NVIDIA-Linux-x86_64-450.156.00.run [root@ai-rd cby]# ./NVIDIA-Linux-x86_64-450.156.00.run配置docker[root@ai-rd ~]# vim /etc/docker/daemon.json [root@ai-rd ~]# cat /etc/docker/daemon.json { "runtimes": { "nvidia": { "path": "nvidia-container-runtime", "runtimeArgs": [] } } } [root@ai-rd ~]# [root@ai-rd ~]# systemctl daemon-reload [root@ai-rd ~]# [root@ai-rd ~]# [root@ai-rd ~]# [root@ai-rd ~]# systemctl restart docker [root@ai-rd ~]#测试docker中的调用情况[root@ai-rd ~]# [root@ai-rd ~]# sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi Tue Nov 23 06:03:04 2021 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 450.156.00 Driver Version: 450.156.00 CUDA Version: 11.0 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |===============================+======================+======================| | 0 Tesla T4 Off | 00000000:86:00.0 Off | 0 | | N/A 90C P0 34W / 70W | 0MiB / 15109MiB | 6% Default | | | | N/A | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=============================================================================| | No running processes found | +-----------------------------------------------------------------------------+ [root@ai-rd ~]# https://blog.csdn.net/qq_33921750https://my.oschina.net/u/3981543https://www.zhihu.com/people/chen-bu-yun-2https://segmentfault.com/u/hppyvyv6/articleshttps://juejin.cn/user/3315782802482007https://space.bilibili.com/352476552/articlehttps://cloud.tencent.com/developer/column/93230知乎、CSDN、开源中国、思否、掘金、哔哩哔哩、腾讯云本文使用文章同步助手同步
- 2021年12月30日
- 1,065 阅读
- 0 评论
- 0 点赞
2021-12-30
在Kubernetes（k8s）中使用GPU 介绍Kubernetes 支持对节点上的 AMD 和 NVIDIA GPU （图形处理单元）进行管理，目前处于实验状态。修改docker配置文件root@hello:~# cat /etc/docker/daemon.json { "default-runtime": "nvidia", "runtimes": { "nvidia": { "path": "/usr/bin/nvidia-container-runtime", "runtimeArgs": [] } }, "data-root": "/var/lib/docker", "exec-opts": ["native.cgroupdriver=systemd"], "registry-mirrors": [ "https://docker.mirrors.ustc.edu.cn", "http://hub-mirror.c.163.com" ], "insecure-registries": ["127.0.0.1/8"], "max-concurrent-downloads": 10, "live-restore": true, "log-driver": "json-file", "log-level": "warn", "log-opts": { "max-size": "50m", "max-file": "1" }, "storage-driver": "overlay2" } root@hello:~# root@hello:~# systemctl daemon-reload root@hello:~# systemctl start docker添加标签root@hello:~# kubectl label nodes 192.168.1.56 nvidia.com/gpu.present=true root@hello:~# kubectl get nodes -L nvidia.com/gpu.present NAME STATUS ROLES AGE VERSION GPU.PRESENT 192.168.1.55 Ready,SchedulingDisabled master 128m v1.22.2 192.168.1.56 Ready node 127m v1.22.2 true root@hello:~#安装helm仓库root@hello:~# curl https://baltocdn.com/helm/signing.asc | sudo apt-key add - root@hello:~# sudo apt-get install apt-transport-https --yes root@hello:~# echo "deb https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list root@hello:~# sudo apt-get update root@hello:~# sudo apt-get install helm helm install \ --version=0.10.0 \ --generate-name \ nvdp/nvidia-device-plugin查看是否有nvidiaroot@hello:~# kubectl describe node 192.168.1.56 | grep nv nvidia.com/gpu.present=true nvidia.com/gpu: 1 nvidia.com/gpu: 1 kube-system nvidia-device-plugin-1637728448-fgg2d 0 (0%) 0 (0%) 0 (0%) 0 (0%) 50s nvidia.com/gpu 0 0 root@hello:~#下载镜像root@hello:~# docker pull registry.cn-beijing.aliyuncs.com/ai-samples/tensorflow:1.5.0-devel-gpu root@hello:~# docker save -o tensorflow-gpu.tar registry.cn-beijing.aliyuncs.com/ai-samples/tensorflow:1.5.0-devel-gpu root@hello:~# docker load -i tensorflow-gpu.tar创建tensorflow测试podroot@hello:~# vim gpu-test.yaml root@hello:~# cat gpu-test.yaml apiVersion: v1 kind: Pod metadata: name: test-gpu labels: test-gpu: "true" spec: containers: - name: training image: registry.cn-beijing.aliyuncs.com/ai-samples/tensorflow:1.5.0-devel-gpu command: - python - tensorflow-sample-code/tfjob/docker/mnist/main.py - --max_steps=300 - --data_dir=tensorflow-sample-code/data resources: limits: nvidia.com/gpu: 1 tolerations: - effect: NoSchedule operator: Exists root@hello:~# root@hello:~# kubectl apply -f gpu-test.yaml pod/test-gpu created root@hello:~#查看日志root@hello:~# kubectl logs test-gpu WARNING:tensorflow:From tensorflow-sample-code/tfjob/docker/mnist/main.py:120: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version. Instructions for updating: Future major versions of TensorFlow will allow gradients to flow into the labels input on backprop by default. See tf.nn.softmax_cross_entropy_with_logits_v2. 2021-11-24 04:38:50.846973: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:895] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2021-11-24 04:38:50.847698: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1105] Found device 0 with properties: name: Tesla T4 major: 7 minor: 5 memoryClockRate(GHz): 1.59 pciBusID: 0000:00:10.0 totalMemory: 14.75GiB freeMemory: 14.66GiB 2021-11-24 04:38:50.847759: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1195] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: Tesla T4, pci bus id: 0000:00:10.0, compute capability: 7.5) root@hello:~# https://blog.csdn.net/qq_33921750https://my.oschina.net/u/3981543https://www.zhihu.com/people/chen-bu-yun-2https://segmentfault.com/u/hppyvyv6/articleshttps://juejin.cn/user/3315782802482007https://space.bilibili.com/352476552/articlehttps://cloud.tencent.com/developer/column/93230知乎、CSDN、开源中国、思否、掘金、哔哩哔哩、腾讯云本文使用文章同步助手同步
- 2021年12月30日
- 868 阅读
- 0 评论
- 0 点赞
2021-12-30
Kubernetes（k8s）集群安装JupyterHub以及Lab 背景JupyterHub 为用户组带来了笔记本的强大功能。它使用户能够访问计算环境和资源，而不会给用户带来安装和维护任务的负担。用户——包括学生、研究人员和数据科学家——可以在他们自己的工作空间中完成他们的工作，共享资源可以由系统管理员有效管理。JupyterHub 在云端或您自己的硬件上运行，可以为世界上的任何用户提供预先配置的数据科学环境。它是可定制和可扩展的，适用于小型和大型团队、学术课程和大型基础设施。第一步、参考：https://cloud.tencent.com/developer/article/1902519 创建动态挂载存储第二步、安装helmroot@hello:~# curl https://baltocdn.com/helm/signing.asc | sudo apt-key add - root@hello:~# sudo apt-get install apt-transport-https --yes root@hello:~# echo "deb https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list root@hello:~# sudo apt-get update root@hello:~# sudo apt-get install helm第三步、导入镜像root@hello:~# docker load -i pause-3.5.tar root@hello:~# docker load -i kube-scheduler.tar第四步、安装jupyterhubhelm repo add jupyterhub https://jupyterhub.github.io/helm-chart/ helm repo update helm upgrade --cleanup-on-fail \ --install ju jupyterhub/jupyterhub \ --namespace ju \ --create-namespace \ --version=1.2.0 \ --values config.yaml注：此文件可以自定义内容，具体看注释，如下开启lab功能root@hello:~# vim config.yaml root@hello:~# cat config.yaml # This file can update the JupyterHub Helm chart's default configuration values. # # # # For reference see the configuration reference and default values, but make # # sure to refer to the Helm chart version of interest to you! # # # # Introduction to YAML: https://www.youtube.com/watch?v=cdLNKUoMc6c # # Chart config reference: https://zero-to-jupyterhub.readthedocs.io/en/stable/resources/reference.html # # Chart default values: https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/HEAD/jupyterhub/values.yaml # # Available chart versions: https://jupyterhub.github.io/helm-chart/ # # singleuser: defaultUrl: "/lab" extraEnv: JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp" #singleuser: # defaultUrl: "/lab" # extraEnv: # JUPYTERHUB_SINGLEUSER_APP: "notebook.notebookapp.NotebookApp" root@hello:~# root@hello:~# root@hello:~# 第五步、修改svc为nodeportroot@hello:~# kubectl get svc -A NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE default kubernetes ClusterIP 10.68.0.1 <none> 443/TCP 16h ju hub ClusterIP 10.68.60.16 <none> 8081/TCP 114s ju proxy-api ClusterIP 10.68.239.54 <none> 8001/TCP 114s ju proxy-public LoadBalancer 10.68.62.47 <pending> 80:32070/TCP 114s kube-system dashboard-metrics-scraper ClusterIP 10.68.244.241 <none> 8000/TCP 16h kube-system kube-dns ClusterIP 10.68.0.2 <none> 53/UDP,53/TCP,9153/TCP 16h kube-system kube-dns-upstream ClusterIP 10.68.221.104 <none> 53/UDP,53/TCP 16h kube-system kubernetes-dashboard NodePort 10.68.206.196 <none> 443:32143/TCP 16h kube-system metrics-server ClusterIP 10.68.1.149 <none> 443/TCP 16h kube-system node-local-dns ClusterIP None <none> 9253/TCP 16h root@hello:~# kubectl edit svc proxy-public -n ju service/proxy-public edited root@hello:~# root@hello:~# root@hello:~# kubectl get svc -A NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE default kubernetes ClusterIP 10.68.0.1 <none> 443/TCP 16h ju hub ClusterIP 10.68.60.16 <none> 8081/TCP 2m19s ju proxy-api ClusterIP 10.68.239.54 <none> 8001/TCP 2m19s ju proxy-public NodePort 10.68.62.47 <none> 80:32070/TCP 2m19s kube-system dashboard-metrics-scraper ClusterIP 10.68.244.241 <none> 8000/TCP 16h kube-system kube-dns ClusterIP 10.68.0.2 <none> 53/UDP,53/TCP,9153/TCP 16h kube-system kube-dns-upstream ClusterIP 10.68.221.104 <none> 53/UDP,53/TCP 16h kube-system kubernetes-dashboard NodePort 10.68.206.196 <none> 443:32143/TCP 16h kube-system metrics-server ClusterIP 10.68.1.149 <none> 443/TCP 16h kube-system node-local-dns ClusterIP None <none> 9253/TCP 16h root@hello:~# https://blog.csdn.net/qq_33921750https://my.oschina.net/u/3981543https://www.zhihu.com/people/chen-bu-yun-2https://segmentfault.com/u/hppyvyv6/articleshttps://juejin.cn/user/3315782802482007https://space.bilibili.com/352476552/articlehttps://cloud.tencent.com/developer/column/93230知乎、CSDN、开源中国、思否、掘金、哔哩哔哩、腾讯云本文使用文章同步助手同步
- 2021年12月30日
- 1,568 阅读
- 0 评论
- 0 点赞
2021-12-30
kubernetes核心实战（五）--- StatefulSets 7、StatefulSetsStatefulSet 是用来管理有状态应用的工作负载 API 对象。StatefulSet 用来管理 Deployment 和扩展一组 Pod，并且能为这些 Pod 提供序号和唯一性保证。和 Deployment 相同的是，StatefulSet 管理了基于相同容器定义的一组 Pod。但和 Deployment 不同的是，StatefulSet 为它们的每个 Pod 维护了一个固定的 ID。这些 Pod 是基于相同的声明来创建的，但是不能相互替换：无论怎么调度，每个 Pod 都有一个永久不变的 ID。StatefulSet 和其他控制器使用相同的工作模式。你在 StatefulSet 对象中定义你期望的状态，然后 StatefulSet 的控制器就会通过各种更新来达到那种你想要的状态。使用 StatefulSetsStatefulSets 对于需要满足以下一个或多个需求的应用程序很有价值：稳定的、唯一的网络标识符。稳定的、持久的存储。有序的、优雅的部署和缩放。有序的、自动的滚动更新。在上面，稳定意味着 Pod 调度或重调度的整个过程是有持久性的。如果应用程序不需要任何稳定的标识符或有序的部署、删除或伸缩，则应该使用由一组无状态的副本控制器提供的工作负载来部署应用程序，比如 Deployment 或者 ReplicaSet 可能更适用于您的无状态应用部署需要。限制给定 Pod 的存储必须由 PersistentVolume 驱动基于所请求的 storage class 来提供，或者由管理员预先提供。删除或者收缩 StatefulSet 并不会删除它关联的存储卷。这样做是为了保证数据安全，它通常比自动清除 StatefulSet 所有相关的资源更有价值。StatefulSet 当前需要无头服务来负责 Pod 的网络标识。您需要负责创建此服务。当删除 StatefulSets 时，StatefulSet 不提供任何终止 Pod 的保证。为了实现 StatefulSet 中的 Pod 可以有序和优雅的终止，可以在删除之前将 StatefulSet 缩放为 0。在默认 Pod 管理策略(OrderedReady) 时使用滚动更新，可能进入需要人工干预才能修复的损坏状态。示例：[root@k8s-master-node1 ~/yaml/test]# vim statefulsets.yaml [root@k8s-master-node1 ~/yaml/test]# cat statefulsets.yaml apiVersion: v1 kind: Service metadata: name: nginx labels: app: nginx spec: ports: - port: 80 name: web clusterIP: None selector: app: nginx --- kind: PersistentVolumeClaim apiVersion: v1 metadata: name: nginx-pvc-0 spec: accessModes: - ReadWriteMany resources: requests: storage: 200Mi --- kind: PersistentVolumeClaim apiVersion: v1 metadata: name: nginx-pvc-1 spec: accessModes: - ReadWriteMany resources: requests: storage: 200Mi --- kind: PersistentVolumeClaim apiVersion: v1 metadata: name: nginx-pvc-2 spec: accessModes: - ReadWriteMany resources: requests: storage: 200Mi --- apiVersion: apps/v1 kind: StatefulSet metadata: name: web spec: selector: matchLabels: app: nginx # has to match .spec.template.metadata.labels serviceName: "nginx" replicas: 3 # by default is 1 template: metadata: labels: app: nginx # has to match .spec.selector.matchLabels spec: terminationGracePeriodSeconds: 10 containers: - name: nginx image: nginx ports: - containerPort: 80 name: web volumeMounts: - name: www mountPath: /usr/share/nginx/html volumes: - name: www persistentVolumeClaim: claimName: nginx-pvc-0 volumes: - name: www persistentVolumeClaim: claimName: nginx-pvc-1 volumes: - name: www persistentVolumeClaim: claimName: nginx-pvc-2 [root@k8s-master-node1 ~/yaml/test]#创建statefulsets[root@k8s-master-node1 ~/yaml/test]# kubectl apply -f statefulsets.yaml service/nginx created statefulset.apps/web created [root@k8s-master-node1 ~/yaml/test]#查看pod[root@k8s-master-node1 ~/yaml/test]# kubectl get pod NAME READY STATUS RESTARTS AGE ingress-demo-app-694bf5d965-8rh7f 1/1 Running 0 67m ingress-demo-app-694bf5d965-swkpb 1/1 Running 0 67m nfs-client-provisioner-dc5789f74-5bznq 1/1 Running 0 52m web-0 1/1 Running 0 93s web-1 1/1 Running 0 85s web-2 1/1 Running 0 66s [root@k8s-master-node1 ~/yaml/test]#查看statefulsets[root@k8s-master-node1 ~/yaml/test]# kubectl get statefulsets.apps -o wide NAME READY AGE CONTAINERS IMAGES web 3/3 113s nginx nginx [root@k8s-master-node1 ~/yaml/test]#注意：前提是解决kubernetes动态分配pv，参考文档：https://cloud.tencent.com/developer/article/1902519 https://blog.csdn.net/qq_33921750https://my.oschina.net/u/3981543https://www.zhihu.com/people/chen-bu-yun-2https://segmentfault.com/u/hppyvyv6/articleshttps://juejin.cn/user/3315782802482007https://space.bilibili.com/352476552/articlehttps://cloud.tencent.com/developer/column/93230知乎、CSDN、开源中国、思否、掘金、哔哩哔哩、腾讯云本文使用文章同步助手同步
- 2021年12月30日
- 719 阅读
- 0 评论
- 0 点赞