Kubernetes Operator开发实战
Kubernetes Operator开发实战一、Operator概述Kubernetes Operator是一种软件扩展模式用于管理复杂的有状态应用。1.1 Operator模式┌─────────────────────────────────────────────────────────────┐ │ Operator │ │ ┌───────────────────────────────────────────────────────┐ │ │ │ Controller ── Watch ── Reconcile ── Update │ │ │ └───────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────┘ │ ▼ ┌─────────────────────────────────────────────────────────────┐ │ Custom Resource │ │ (定义应用期望状态) │ └─────────────────────────────────────────────────────────────┘1.2 Operator组成组件说明Custom Resource定义应用的自定义资源Controller监听资源变化维持期望状态Reconcile Loop核心控制循环持续调谐状态二、环境准备2.1 安装Operator SDK# 安装Operator SDK curl -sL https://github.com/operator-framework/operator-sdk/releases/download/v1.32.0/operator-sdk_linux_amd64 -o operator-sdk chmod x operator-sdk sudo mv operator-sdk /usr/local/bin/ # 验证安装 operator-sdk version2.2 初始化项目# 创建项目 operator-sdk init --domain example.com --repo github.com/example/myapp-operator # 添加API operator-sdk create api --group apps --version v1 --kind MyApp --resource --controller三、定义Custom Resource3.1 API定义// MyAppSpec defines the desired state of MyApp type MyAppSpec struct { Replicas *int32 json:replicas,omitempty Image string json:image,omitempty Port int32 json:port,omitempty // 自定义配置 Resources Resources json:resources,omitempty Env []EnvVar json:env,omitempty } // MyAppStatus defines the observed state of MyApp type MyAppStatus struct { ReadyReplicas int32 json:readyReplicas,omitempty Phase string json:phase,omitempty Conditions []metav1.Condition json:conditions,omitempty } // kubebuilder:object:roottrue // kubebuilder:subresource:status // MyApp is the Schema for the myapps API type MyApp struct { metav1.TypeMeta json:,inline metav1.ObjectMeta json:metadata,omitempty Spec MyAppSpec json:spec,omitempty Status MyAppStatus json:status,omitempty } // kubebuilder:object:roottrue // MyAppList contains a list of MyApp type MyAppList struct { metav1.TypeMeta json:,inline metav1.ListMeta json:metadata,omitempty Items []MyApp json:items }3.2 生成CRD# 生成CRD make manifests # 查看生成的CRD cat config/crd/bases/apps.example.com_myapps.yaml四、Controller实现4.1 Reconcile逻辑func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log : log.FromContext(ctx) // 1. 获取MyApp资源 var myapp appsv1.MyApp if err : r.Get(ctx, req.NamespacedName, myapp); err ! nil { if apierrors.IsNotFound(err) { return ctrl.Result{}, nil } log.Error(err, Unable to fetch MyApp) return ctrl.Result{}, err } // 2. 检查Deployment是否存在 var deployment appsv1.Deployment deploymentName : myapp.Name if err : r.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: myapp.Namespace}, deployment); err ! nil { if apierrors.IsNotFound(err) { // 创建Deployment deployment r.createDeployment(myapp) if err : r.Create(ctx, deployment); err ! nil { log.Error(err, Failed to create Deployment) return ctrl.Result{}, err } return ctrl.Result{Requeue: true}, nil } log.Error(err, Unable to fetch Deployment) return ctrl.Result{}, err } // 3. 检查副本数是否匹配 if *deployment.Spec.Replicas ! *myapp.Spec.Replicas { deployment.Spec.Replicas myapp.Spec.Replicas if err : r.Update(ctx, deployment); err ! nil { log.Error(err, Failed to update Deployment) return ctrl.Result{}, err } return ctrl.Result{Requeue: true}, nil } // 4. 更新状态 r.updateStatus(ctx, myapp, deployment) return ctrl.Result{}, nil }4.2 创建Deploymentfunc (r *MyAppReconciler) createDeployment(myapp *appsv1.MyApp) appsv1.Deployment { labels : map[string]string{ app: myapp.Name, } return appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: myapp.Name, Namespace: myapp.Namespace, OwnerReferences: []metav1.OwnerReference{ *metav1.NewControllerRef(myapp, appsv1.GroupVersion.WithKind(MyApp)), }, }, Spec: appsv1.DeploymentSpec{ Replicas: myapp.Spec.Replicas, Selector: metav1.LabelSelector{ MatchLabels: labels, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: labels, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{{ Name: myapp.Name, Image: myapp.Spec.Image, Ports: []corev1.ContainerPort{{ ContainerPort: myapp.Spec.Port, }}, Resources: myapp.Spec.Resources, Env: myapp.Spec.Env, }}, }, }, }, } }4.3 更新状态func (r *MyAppReconciler) updateStatus(ctx context.Context, myapp *appsv1.MyApp, deployment *appsv1.Deployment) { readyReplicas : deployment.Status.ReadyReplicas phase : Pending if readyReplicas *deployment.Spec.Replicas { phase Ready } else if readyReplicas 0 { phase Partial } myapp.Status.ReadyReplicas readyReplicas myapp.Status.Phase phase if err : r.Status().Update(ctx, myapp); err ! nil { log.FromContext(ctx).Error(err, Failed to update MyApp status) } }4.4 Setup With Managerfunc (r *MyAppReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(appsv1.MyApp{}). Owns(appsv1.Deployment{}). Owns(corev1.Service{}). Complete(r) }五、测试Operator5.1 创建测试CRapiVersion: apps.example.com/v1 kind: MyApp metadata: name: myapp-sample spec: replicas: 3 image: nginx:latest port: 80 resources: requests: memory: 128Mi cpu: 100m limits: memory: 256Mi cpu: 200m env: - name: ENV value: production5.2 部署Operator# 部署CRD kubectl apply -f config/crd/bases/apps.example.com_myapps.yaml # 部署Operator make install make deploy IMGmyapp-operator:latest # 创建示例资源 kubectl apply -f config/samples/apps_v1_myapp.yaml5.3 验证部署# 查看Operator日志 kubectl logs -n myapp-operator-system -l control-planecontroller-manager # 查看MyApp状态 kubectl get myapps kubectl describe myapp myapp-sample # 查看创建的资源 kubectl get deployments kubectl get pods六、高级功能6.1 事件处理func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log : log.FromContext(ctx) var myapp appsv1.MyApp if err : r.Get(ctx, req.NamespacedName, myapp); err ! nil { return ctrl.Result{}, client.IgnoreNotFound(err) } // 记录事件 r.Recorder.Event(myapp, corev1.EventTypeNormal, Reconciling, Starting reconciliation) // ... 业务逻辑 ... r.Recorder.Event(myapp, corev1.EventTypeNormal, Reconciled, Reconciliation completed) return ctrl.Result{}, nil }6.2 Finalizer处理func (r *MyAppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { var myapp appsv1.MyApp if err : r.Get(ctx, req.NamespacedName, myapp); err ! nil { return ctrl.Result{}, client.IgnoreNotFound(err) } // 检查是否正在删除 if myapp.GetDeletionTimestamp() ! nil { // 清理资源 if err : r.cleanupResources(ctx, myapp); err ! nil { return ctrl.Result{}, err } // 移除finalizer myapp.Finalizers removeString(myapp.Finalizers, myapp.finalizer) if err : r.Update(ctx, myapp); err ! nil { return ctrl.Result{}, err } return ctrl.Result{}, nil } // 添加finalizer if !containsString(myapp.Finalizers, myapp.finalizer) { myapp.Finalizers append(myapp.Finalizers, myapp.finalizer) if err : r.Update(ctx, myapp); err ! nil { return ctrl.Result{}, err } } // ... 业务逻辑 ... }6.3 状态条件func (r *MyAppReconciler) updateStatusConditions(ctx context.Context, myapp *appsv1.MyApp, ready bool) { conditionType : appsv1.MyAppReady status : metav1.ConditionFalse if ready { status metav1.ConditionTrue } r.Status().Patch(ctx, myapp, client.MergeFrom(myapp.DeepCopy())) myapp.Status.Conditions append(myapp.Status.Conditions, metav1.Condition{ Type: string(conditionType), Status: status, LastTransitionTime: metav1.Now(), Reason: Reconciled, Message: MyApp is ready, }) }七、部署与分发7.1 构建镜像# 构建镜像 make docker-build IMGmyapp-operator:latest # 推送镜像 make docker-push IMGmyapp-operator:latest7.2 Helm ChartapiVersion: v2 name: myapp-operator description: A Helm chart for MyApp Operator type: application version: 0.1.0 appVersion: 1.0 dependencies: - name: cert-manager version: v1.13.0 repository: https://charts.jetstack.io condition: cert-manager.enabled templates: - deployment.yaml - service.yaml - rbac.yaml7.3 OLM部署# 创建Catalog operator-sdk olm install # 打包Operator operator-sdk bundle create --image-builder docker --directory deploy/olm-catalog/myapp-operator # 推送Bundle docker push myapp-operator-bundle:latest # 订阅Operator kubectl apply -f deploy/olm-catalog/myapp-operator/subscription.yaml八、最佳实践8.1 设计原则幂等性Reconcile应该是幂等的重试机制使用Result.Requeue进行重试错误处理区分可重试和不可重试错误状态管理合理使用Status字段8.2 性能优化// 使用FieldSelector减少监听范围 func (r *MyAppReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(appsv1.MyApp{}). WithEventFilter(predicate.GenerationChangedPredicate{}). Complete(r) }8.3 测试策略func TestMyAppReconciler(t *testing.T) { tests : []struct { name string setup func(*envtest.Environment) wantErr bool }{ { name: create myapp, setup: func(env *envtest.Environment) { // 测试设置 }, wantErr: false, }, } for _, tt : range tests { t.Run(tt.name, func(t *testing.T) { // 测试逻辑 }) } }九、总结开发Kubernetes Operator需要以下步骤定义CRD使用kubebuilder定义自定义资源实现Controller编写Reconcile逻辑处理状态更新资源状态测试验证单元测试和集成测试部署分发构建镜像和ChartOperator模式是管理复杂应用的最佳实践通过声明式API提供一致的管理体验。