开头语
写在前面:如有问题,以你为准,
目前24年应届生,各位大佬轻喷,部分资料与图片来自网络
内容较长,页面右上角目录方便跳转
安全上下文(Security Context)定义 Pod 或 Container 的特权与访问控制设置。
OPA(Open Policy Agent):是以个开源的、通用策略引擎,把策略当作代码去写。
提供一个种高级声明性语言-Rgo来编写策略,并把决策这一步骤从复杂的业务逻辑中解耦出来。
Gatekeeper是基于OPA的一个Kubernetes策略解决方案,可替代PSP或者部分RBAC功能。
当在集群中部署了Gatekeeper 组件,APIServer 所有的创建、更新或者删除操作都会触 Gatekeeper来处理,如果不满足策略则拒绝。
Gatekeeper类似一个准入控制器,对资源对象向kubernetes api 调用进行过滤
# 项目地址
https://github.com/open-policy-agent/gatekeeper
# 安装地址
https://open-policy-agent.github.io/gatekeeper/website/docs/install/
# 语法介绍地址
https://www.openpolicyagent.org/docs/latest/#rego
https://open-policy-agent.github.io/gatekeeper/website/docs/install/
[root@master opa]# kubectl create -f gatekeeper.yaml
namespace/gatekeeper-system created
resourcequota/gatekeeper-critical-pods created
customresourcedefinition.apiextensions.k8s.io/assign.mutations.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/assignmetadata.mutations.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/configs.config.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/constraintpodstatuses.status.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/constrainttemplatepodstatuses.status.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/constrainttemplates.templates.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/modifyset.mutations.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/mutatorpodstatuses.status.gatekeeper.sh created
customresourcedefinition.apiextensions.k8s.io/providers.externaldata.gatekeeper.sh created
serviceaccount/gatekeeper-admin created
role.rbac.authorization.k8s.io/gatekeeper-manager-role created
clusterrole.rbac.authorization.k8s.io/gatekeeper-manager-role created
rolebinding.rbac.authorization.k8s.io/gatekeeper-manager-rolebinding created
clusterrolebinding.rbac.authorization.k8s.io/gatekeeper-manager-rolebinding created
secret/gatekeeper-webhook-server-cert created
service/gatekeeper-webhook-service created
Warning: spec.template.metadata.annotations[container.seccomp.security.alpha.kubernetes.io/manager]: non-functional in v1.27+; use the "seccompProfile" field instead
deployment.apps/gatekeeper-audit created
deployment.apps/gatekeeper-controller-manager created
mutatingwebhookconfiguration.admissionregistration.k8s.io/gatekeeper-mutating-webhook-configuration created
validatingwebhookconfiguration.admissionregistration.k8s.io/gatekeeper-validating-webhook-configuration created
resource mapping not found for name: "gatekeeper-admin" namespace: "" from "gatekeeper.yaml": no matches for kind "PodSecurityPolicy" in version "policy/v1beta1"
ensure CRDs are installed first
resource mapping not found for name: "gatekeeper-controller-manager" namespace: "gatekeeper-system" from "gatekeeper.yaml": no matches for kind "PodDisruptionBudget" in version "policy/v1beta1"
ensure CRDs are installed first
[root@master opa]# kubectl get pod? -n gatekeeper-system
NAME???????????????????????????????????????????? READY?? STATUS??? RESTARTS?? AGE
gatekeeper-audit-5f64f9f48-h4rjc???????????????? 1/1???? Running?? 0????????? 6m12s
gatekeeper-controller-manager-64555779db-6q6xv?? 1/1???? Running?? 0????????? 6m12s
gatekeeper-controller-manager-64555779db-95q8l?? 1/1???? Running?? 0????????? 6m12s
gatekeeper-controller-manager-64555779db-cg9x2?? 1/1???? Running?? 0????????? 6m12s
(安装后才有会ConstraintTemplate资源对象)
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
? name: privileged
spec:
? crd:
??? spec:
????? names:
??????? kind: privileged # 与上面name保持一致
? targets: # 这个字段里面才是定义逻辑的核心
??? - target: admission.k8s.gatekeeper.sh
????? rego: | # 指定rego语言,|是代表多行
??????? package admission # 导入包
??????? violation[{"msg": msg}] {? # 如果violation为true说明违反约束,下面的逻辑通过了就是拒绝,没有通过就是拒绝
????????? containers = input.review.object.spec.template.spec.containers # 导入pod 中 containers 字段
????????? c_name := containers[0].name # 从上面导入的变量中再从中获取name字段?????????
????????? containers[0].securityContext.privileged # 再从上面导入变量中拿到特权字段,来进行判断,如果为True说明启用了这个特权,则违反约束
????????? # 整个violation 返回的就是一个true, 则说明违反约束
????????? msg := sprintf("提示:'%v'容器禁止启用特权!",[c_name]) # 如果违法则打印此消息
??????? }
? containers:
? - image: nginx:1.17.1
??? name: web
??? securityContext:
????? allowPrivilegeEscalation: false # 不允许提权
????? privileged: true? # 开启特权模式
[root@master opa]# kubectl apply -f privileged_tpl.yaml
constrainttemplate.templates.gatekeeper.sh/privileged created
[root@master opa]# kubectl get ConstraintTemplate
NAME???????? AGE
privileged?? 8s
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: privileged # 这个名字指定的是上面创建 ConstraintTemplate 中的name
metadata:
? name: privileged # 这个才是这个约束策略的名字
spec:
? match:? # 要匹配的资源
??? kinds:
????? - apiGroups: ["apps"]
??????? kinds:
??????? - "Deployment"
??????? - "DaemonSet"
??????? - "StatefulSet"
[root@master opa]# kubectl apply -f privileged_constraints.yaml
privileged.constraints.gatekeeper.sh/privileged created
[root@master opa]# cd .
NAME???????? AGE
privileged?? 12s
apiVersion: apps/v1
kind: Deployment
metadata:
? labels:
??? app: web
? name: web
spec:
? replicas: 1
? selector:
??? matchLabels:
????? app: web
? strategy: {}
? template:
??? metadata:
????? creationTimestamp: null
????? labels:
??????? app: web
??? spec:
????? containers:
????? - image: busybox:1.30
??????? name: sc-cap
??????? command:
??????? - sleep
??????? - 12h
??????? securityContext:
????????? privileged: true
[root@master opa]# kubectl apply -f deploy.yaml
Error from server ([privileged] 提示:'opa-test'容器禁止启用特权!): error when creating "deploy.yaml": admission webhook "validation.gatekeeper.sh" denied the request: [privileged] 提示:'opa-test'容器禁止启用特权!
删除??????? securityContext:?? privileged: true 即可创建
判断镜像的开头是否匹配,如果没带域名就是dockerhub的
registry.aliyuncs.com/google_containers/pause???????????????????? 3.9?????? e6f181688397?? 13 months ago?? 744kB
centos??????????????????????????????????????????????????????????? latest??? 5d0da3dc9764?? 2 years ago???? 231MB
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
? name: image-check
spec:
? crd:
??? spec:
????? names:
??????? kind: image-check
????? validation:
??????? openAPIV3Schema:
????????? properties:? # 需要满足条件的参数,接收参数的固定语法
??????????? prefix:
????????????? type: string
? targets:
??? - target: admission.k8s.gatekeeper.sh
????? rego: |
??????? package image
??????? violation[{"msg": msg}] {
????????? containers = input.review.object.spec.template.spec.containers
????????? image := containers[0].image
????????? not startswith(image, input.parameters.prefix)
????????? # 镜像地址开头不匹配并取反则为True,说明违反约束
????????? msg := sprintf("提示:'%v'镜像地址不在可信任仓库!", [image])
??????? }
apiVersion: constraints.gatekeeper.sh/v1beta1
kind: image-check
metadata:
? name: image-check
spec:
? match:
??? kinds:
????? - apiGroups: ["apps"]
??????? kinds:
??????? - "Deployment"
??????? - "DaemonSet"
??????? - "StatefulSet"
? parameters:? # 传递给opa的参数,用于匹配镜像开头域名
??? prefix: "snj/"
[root@master opa]# kubectl apply -f image-check_tpl.yaml
constrainttemplate.templates.gatekeeper.sh/image-check created
[root@master opa]# kubectl apply -f image-check_constraints.yaml
image-check.constraints.gatekeeper.sh/image-check created
???????下面 Deployment中使用的是dockerhub的镜像仓库
[root@master opa]# kubectl create deployment web1 --image=nginx:v1
error: failed to create deployment: admission webhook "validation.gatekeeper.sh" denied the request: [image-check] 提示:'nginx:v1'镜像地址不在可信任仓库!
[root@master opa]# kubectl create deployment web1 --image=snj/nginx:v1
deployment.apps/web1 created
apiVersion: v1
kind: Pod
metadata:
? labels:
??? run: web
? name: web3
? namespace: test
spec:
? securityContext:
??? runAsUser: 1000 # 镜像里必须有这个用户UID
??? fsGroup: 1000 # 数据卷挂载后的目录属组设置为该组
? containers:
? - image: nginx:1.17.1
??? name: web
??? securityContext:
????? allowPrivilegeEscalation: false # 不允许提权
????? privileged: true? # 开启特权模式
????? # 特权模式:在特权模式下运行容器,特权容器中的进程本质上相当于主机上的 root
????? Capabilities:
??????? add: ["SYS_ADMIN"]??
????? readOnlyRootFilesystem: true
????? # 只读挂载容器文件系统?????????
apiVersion: v1
kind: Pod
metadata:
? name: securitycontext-capabilities-demo
? namespace: default
spec:
? securityContext:??????????? #pod级别的安全上下文,对内部多有容器均有效
??? runAsUer <integer>??????? #以指定的用户身份运行容器进程,默认由镜像中的USER指定
??? runAsGroup <integer>????? #以指定的用户组运行容器进程,默认使用的组随容器运行时设定
??? supplementalGroups <[] integer>? #为容器中1号进程的用户添加的附加组
??? fsGroup <integer>???????? #为容器中的1号进程附加一个专用组,其功能类似于sgid
??? runAsNonRoot <boolean>??? #是否以非root身份运行
??? seLinuxOptions <object>?? #SELINUX的相关配置
??? sysctl <[] object>??????? #应用到当前pod名称空间级别的sysctl参数设置列表
??? windowsOptions <object>?? #windows容器专用配置
? containers:
? - name: ...
??? image: ...
??? securityContext:????? #容器级别的安全上下文,仅在当前容器生效
????? runAsUer <integer>??????? #以指定的用户身份运行容器进程,默认由镜像中的USER指定
????? runAsGroup <integer>????? #以指定的用户组运行容器进程,默认使用的组随容器运行时设定
????? runAsNonRoot <boolean>??? #是否以非root身份运行
????? allowPrivilegeEscalation <boolean> #是否允许特权升级
????? capabilities <object>???? #为当前容器添加或删除内核能力
??????? add <[] string>???????? #添加由列表定义的各项内核能力
??????? drop <[] string>??????? #移除由列表定义的各项内核能力
????? privileged <boolean>????? #是否允许为特权容器
????? procMount <string>??????? #设置容器procMount类型,默认为DefaultProcMount
????? readOnlyRootFilesystem <boolean>? #是否将根文件系统设置为只读模式
????? seLinuxOptions <object>?? #selinux相关配置
????? windowsOptions <object>?? #windows相关配置
设置启动容器的用户,可以是用户名或UID,所以,只有下面的两种写法是正确的
FROM python
RUN useradd python
RUN mkdir /data/www -p
COPY . /data/www
RUN chown -R python /data
RUN pip install flask -i https://mirrors.aliyun.com/pypi/simple/ && \
??? pip install prometheus_client -i https://mirrors.aliyun.com/pypi/simple/
WORKDIR /data/www
USER python
CMD python main.py # CMD 执行命令使用的用户是上面执行的用户
因为容器中跑的程序在宿主机中也是一个进程(k8s 可以解决这个问题)
如果使用 root 用户,在宿主机中也就等于root 去执行可能会导致这个程序权限过大,非常不安全
[root@master flask-demo]# docker run -d --name nginx-python nginx/python:v1
[root@master flask-demo]# docker ps -a | grep python
6440126bac3b?? nginx/python:v1???????????????????????????????????? "/bin/sh -c 'python …"?? About a minute ago?? Up About a minute??????????????????? nginx-python
[root@master flask-demo]# docker exec -it 644 /bin/bash
python@6440126bac3b:/data/www$ id
uid=1000(python) gid=1000(python) groups=1000(python)
python@6440126bac3b:/data/www$ ps -ef
UID????????? PID??? PPID? C STIME TTY????????? TIME CMD
python???????? 1?????? 0? 0 10:07 ???????? 00:00:00 /bin/sh -c python main.py
python???????? 6?????? 1? 0 10:07 ???????? 00:00:00 python main.py
python??????? 22?????? 0? 4 10:09 pts/0??? 00:00:00 /bin/bash
python??????? 28????? 22? 0 10:09 pts/0??? 00:00:00 ps -ef
[root@master flask-demo]# ps -ef | grep python
root??????? 1045?????? 1? 0 Oct30 ???????? 00:01:12 /usr/libexec/platform-python -Es /usr/sbin/tuned -l -P
root??????? 3282?????? 1? 0 Oct30 ???????? 00:00:01 /usr/libexec/platform-python /usr/libexec/rhsmd
snj????? 4086721 4086700? 0 05:07 ???????? 00:00:00 /bin/sh -c python main.py
snj????? 4086767 4086721? 0 05:07 ???????? 00:00:00 python main.py
root???? 4090010 3475369? 0 05:08 pts/2??? 00:00:00 grep --color=auto python
containers下也有针对容器的ssecurityContext字段
k8s 解决docker 程序外露问题,因为容器外部是 pod
---
apiVersion: apps/v1 # 版本号
kind: Deployment # 类型
metadata: # 元数据
? name: rq-deployment # deployment的名称
? namespace: test # 命名类型
spec: # 详细描述
? replicas: 2 # 副本数量
? selector: # 选择器,通过它指定该控制器可以管理哪些Pod
??? matchLabels: # Labels匹配规则
????? app: flask-python-pod
? template: # 模块 当副本数据不足的时候,会根据下面的模板创建Pod副本
??? metadata:
????? labels:
??????? app: flask-python
??? spec:
????? securityContext:
??????? runAsUser: 1000
????? containers:
??????? - name: flask-python # 容器名称
????????? image: flask-python:v1 # 容器需要的镜像地址
[root@master cks]# kubectl apply -f sc-deploy.yaml
deployment.apps/rq-deployment created
[root@master cks]# kubectl get deploy -n test
NAME??????????? READY?? UP-TO-DATE?? AVAILABLE?? AGE
rq-deployment?? 2/2???? 2??????????? 2?????????? 2m37s
[root@master cks]# kubectl get pod -n test
NAME???????????????????????????? READY?? STATUS??? RESTARTS????? AGE
rq-deployment-77b8f8dfbd-6vhvv?? 1/1???? Running?? 0???????????? 2m18s
rq-deployment-77b8f8dfbd-vfvz7?? 1/1???? Running?? 0???????????? 2m19s
web-seccomp????????????????????? 1/1???? Running?? 1 (22h ago)?? 46h
[root@master cks]# id 1000
uid=1000(snj) gid=1000(snj) groups=1000(snj)
[root@master cks]# kubectl exec -n test rq-deployment-77b8f8dfbd-6vhvv -- id
uid=1000(python) gid=1000(python) groups=1000(python)
k8s 解决docker 程序外露问题
[root@master cks]# kubectl exec -n test rq-deployment-77b8f8dfbd-6vhvv -- ps -ef
UID?????????PID???PPID??C STIME TTY??????????TIME CMD
python????????1??????0??0 10:27 ?????????00:00:00 /bin/sh -c python main.py
python????????7??????1??0 10:27 ?????????00:00:00 python main.py
python???????56??????0??0 10:34 ?????????00:00:00 ps -ef
[root@master cks]# ps -ef | grep python
root????????1045???????1??0 Oct30 ?????????00:01:12 /usr/libexec/platform-python -Es /usr/sbin/tuned -l -P
root????????3282???????1??0 Oct30 ?????????00:00:01 /usr/libexec/platform-python /usr/libexec/rhsmd
root?????4187675 3475369??0 05:32 pts/2????00:00:00 grep --color=auto python
apiVersion: v1
kind: Pod
metadata:
? labels:
??? run: sc-cap
? name: sc-cap
? namespace: test
spec:
? containers:
? - image: busybox:1.30
??? name: sc-cap
??? command:
??? - sleep
??? - 12h
??? securityContext:
????? privileged: true? # 开启特权模式
????? # 特权模式:在特权模式下运行容器,特权容器中的进程本质上相当于主机上的 root???????
???????启用特权模式就意味着,你要为容器提供了访问Liux内核的所有能力,这是很危险的,为了减少系统调用的供给,可以使用Capabilities为容器赋予仅所需的能力。
Linux Capabilities: Capabilities 是一个内核级别的权限,它允许对内核调用权
Capabilities 包括更改文件权限、控制网络子系统和执行系统管理等功能。在
securityContext中,可以添加或删除Capabilities,做到容器精细化权限控制。
apiVersion: v1
kind: Pod
metadata:
? labels:
??? run: sc-cap
? name: sc-cap
? namespace: test
spec:
? containers:
? - image: busybox:1.30
??? name: sc-cap
??? command:
??? - sleep
??? - 12h
??? securityContext:
????? capabilities:
??????? add: ["SYS_ADMIN"]???
??????? # SYS_ADMIN 允许执行系统管理任务,如加载或卸
载文件系统、设置磁盘配额等
[root@master ~]#? kubectl run busybox? -n test? --image=busybox:1.30 -- sleep 12h
pod/busybox created
[root@master ~]# kubectl get pod -n test
NAME????????? READY?? STATUS??? RESTARTS????? AGE
busybox?????? 1/1???? Running?? 0???????????? 4s
web-seccomp?? 1/1???? Running?? 1 (23h ago)?? 47h
[root@master ~]# kubectl exec -it -n test busybox /bin/sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/ # mount tmpfs /tmp
mount: permission denied (are you root?)
apiVersion: v1
kind: Pod
metadata:
? labels:
??? run: sc-cap
? name: sc-cap
? namespace: test
spec:
? containers:
? - image: busybox:1.30
??? name: sc-cap
??? command:
??? - sleep
??? - 12h
??? securityContext:
????? privileged: true? # 开启特权模式
????? # 特权模式:在特权模式下运行容器,特权容器中的进程本质上相当于主机上的 root??????
[root@master ~]# kubectl apply -f sc-cap.yaml
pod/sc-cap created
[root@master ~]# kubectl get pod -n test
NAME????????? READY?? STATUS??? RESTARTS????? AGE
busybox?????? 1/1???? Running?? 0???????????? 5m51s
sc-cap??????? 1/1???? Running?? 0???????????? 5s
web-seccomp?? 1/1???? Running?? 1 (24h ago)?? 2d
[root@master ~]# kubectl exec -it -n test sc-cap /bin/sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/ # mount test /test
mount: mounting test on /test failed: No such file or directory
# 原先是 mount: permission denied (are you root?)
apiVersion: v1
kind: Pod
metadata:
? labels:
??? run: sc-cap
? name: sc-cap
? namespace: test
spec:
? containers:
? - image: busybox:1.30
??? name: sc-cap
??? command:
??? - sleep
??? - 12h
??? securityContext:
????? capabilities:
??????? add: ["SYS_ADMIN"]???
??????? # SYS_ADMIN 允许执行系统管理任务,如加载或卸载文件系统、设置磁盘配额等
[root@master ~]# kubectl apply -f sc-cap.yaml
pod/sc-cap created
[root@master ~]# kubectl get pod -n test
NAME????????? READY?? STATUS??? RESTARTS????? AGE
busybox?????? 1/1???? Running?? 0???????????? 9m18s
sc-cap??????? 1/1???? Running?? 0???????????? 5s
web-seccomp?? 1/1???? Running?? 1 (24h ago)?? 2d
[root@master ~]# kubectl exec -it -n test sc-cap /bin/sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/ # mount test /test
mount: mounting test on /test failed: No such file or directory
[root@master ~]# docker run -d centos sleep 24h
Unable to find image 'centos:latest' locally
latest: Pulling from library/centos
a1d0c7532777: Pull complete
Digest: sha256:a27fd8080b517143cbbbab9dfb7c8571c40d67d534bbdee55bd6c473f432b177
Status: Downloaded newer image for centos:latest
c196fab47974a3a2624f6d746847130650040223946b38806e70e420393edc4c
[root@master ~]# docker ps | grep centos
c196fab47974?? centos????????????????????????????????????????????? "sleep 24h"????????????? 2 minutes ago?? Up 2 minutes???????????? lucid_lamarr
[root@master ~]# docker exec -it? c1 /bin/bash
[root@c196fab47974 /]# capsh --print
Current: = cap_chown,cap_dac_override,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_net_bind_service,cap_net_raw,cap_sys_chroot,cap_mknod,cap_audit_write,cap_setfcap+ep
Bounding set =cap_chown,cap_dac_override,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_net_bind_service,cap_net_raw,cap_sys_chroot,cap_mknod,cap_audit_write,cap_setfcap
Ambient set =
Securebits: 00/0x0/1'b0
?secure-noroot: no (unlocked)
?secure-no-suid-fixup: no (unlocked)
?secure-keep-caps: no (unlocked)
?secure-no-ambient-raise: no (unlocked)
uid=0(root)
gid=0(root)
groups=0(root)
# Current 就是所拥有的权限,对比宿主机来说少很多
apiVersion: v1
kind: Pod
metadata:
? labels:
??? run: sc-cap
? name: sc-cap
? namespace: test
spec:
? containers:
? - image: busybox:1.30
??? name: sc-cap
??? command:
??? - sleep
??? - 12h
??? securityContext:
????? readOnlyRootFilesystem: true
?测试
[root@master ~]# kubectl apply -f sc-cap.yaml
pod/sc-cap created
[root@master ~]# kubectl get pod -n test
NAME????????? READY?? STATUS??? RESTARTS?????? AGE
busybox?????? 1/1???? Running?? 1 (150m ago)?? 14h
sc-cap??????? 1/1???? Running?? 0????????????? 5s
web-seccomp?? 1/1???? Running?? 2 (14h ago)??? 2d14h
[root@master ~]# kubectl exec -it -n test sc-cap /bin/sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/ # ls
bin?? dev?? etc?? home? proc? root? sys?? tmp?? usr?? var
/ # touch a
touch: a: Read-only file system