@@ -798,24 +798,121 @@ func (ClusterHandler *GCPClusterHandler) UpgradeCluster(clusterIID irs.IID, newV
798798 hiscallInfo := GetCallLogScheme (ClusterHandler .Region , call .CLUSTER , clusterIID .NameId , "UpgradeCluster()" )
799799
800800 parent := getParentClusterAtContainer (projectID , zone , clusterIID .NameId )
801- rb := & container.UpdateMasterRequest {
802- MasterVersion : newVersion ,
801+
802+ // 현재 클러스터 버전 확인
803+ currentCluster , err := ClusterHandler .ContainerClient .Projects .Locations .Clusters .Get (parent ).Do ()
804+ if err != nil {
805+ return clusterInfo , fmt .Errorf ("Failed to get current cluster version: %v" , err )
803806 }
804807
805- start := call .Start ()
806- op , err := ClusterHandler .ContainerClient .Projects .Locations .Clusters .UpdateMaster (parent , rb ).Do ()
807- hiscallInfo .ElapsedTime = call .Elapsed (start )
808+ // 이미 원하는 버전으로 업그레이드되었다면 노드 풀만 업그레이드
809+ if currentCluster .CurrentMasterVersion == newVersion {
810+ cblogger .Info (fmt .Sprintf ("Control plane is already at version %s, skipping control plane upgrade" , newVersion ))
811+ } else {
812+ // 기존 컨트롤 플레인 업그레이드 로직
813+ rb := & container.UpdateMasterRequest {
814+ MasterVersion : newVersion ,
815+ }
816+
817+ start := call .Start ()
818+ op , err := ClusterHandler .ContainerClient .Projects .Locations .Clusters .UpdateMaster (parent , rb ).Do ()
819+ hiscallInfo .ElapsedTime = call .Elapsed (start )
820+ if err != nil {
821+ err := fmt .Errorf ("Failed to UpgradeCluster: %v" , err )
822+ cblogger .Error (err )
823+ return clusterInfo , err
824+ }
825+ cblogger .Debug (op )
826+
827+ // WaitContainerOperationDone 함수 사용 (20분 타임아웃)
828+ operationErr := WaitContainerOperationDone (ClusterHandler .ContainerClient , projectID , region , zone , op .Name , GCP_CONTAINER_OPERATION_UPDATE_CLUSTER , 1200 )
829+ if operationErr != nil {
830+ cblogger .Error (operationErr )
831+ return clusterInfo , operationErr
832+ }
833+
834+ // 컨트롤 플레인 업그레이드 후 클러스터 상태 확인
835+ updatedCluster , err := ClusterHandler .ContainerClient .Projects .Locations .Clusters .Get (parent ).Do ()
836+ if err != nil {
837+ return clusterInfo , fmt .Errorf ("Failed to get cluster after master upgrade: %v" , err )
838+ }
839+
840+ // 컨트롤 플레인 버전이 실제로 업그레이드 되었는지 확인
841+ if updatedCluster .CurrentMasterVersion != newVersion {
842+ return clusterInfo , fmt .Errorf ("Control plane upgrade not complete. Current version: %s, Expected: %s" ,
843+ updatedCluster .CurrentMasterVersion , newVersion )
844+ }
845+ cblogger .Info (fmt .Sprintf ("Control plane upgraded successfully to version %s" , newVersion ))
846+ }
847+
848+ // 업그레이드 재시도 로직
849+ maxRetries := 10
850+ retryInterval := 120
851+ backoffFactor := 1.5
852+
853+ currentInterval := retryInterval
854+ for i := 0 ; i < maxRetries ; i ++ {
855+ hasActive , err := ClusterHandler .hasActiveOperations (projectID , zone , clusterIID .NameId )
856+ if err != nil {
857+ return clusterInfo , err
858+ }
859+
860+ if ! hasActive {
861+ break // 진행 중인 작업이 없으면 계속 진행
862+ }
863+
864+ if i == maxRetries - 1 {
865+ return clusterInfo , fmt .Errorf ("Cluster has active operations after %d retries" , maxRetries )
866+ }
867+
868+ cblogger .Info (fmt .Sprintf ("Cluster has active operations, waiting %d seconds before retry (%d/%d)" ,
869+ currentInterval , i + 1 , maxRetries ))
870+ time .Sleep (time .Duration (currentInterval ) * time .Second )
871+
872+ // 지수 백오프 적용
873+ currentInterval = int (float64 (currentInterval ) * backoffFactor )
874+ }
875+
876+ // 노드풀 리스트 조회
877+ cblogger .Info (fmt .Sprintf ("Fetching node pools for cluster: %s" , clusterIID .NameId ))
878+
879+ nodePools , err := ClusterHandler .ContainerClient .Projects .Locations .Clusters .NodePools .List (parent ).Do ()
808880 if err != nil {
809- err := fmt .Errorf ("Failed to UpgradeCluster : %v" , err )
881+ err := fmt .Errorf ("Failed to list Node Pools: %v" , err )
810882 cblogger .Error (err )
811- return clusterInfo , err
883+ return clusterInfo , err // 노드풀 리스트 조회 오류 시 clusterInfo 반환
812884 }
813- cblogger .Debug (op )
814885
815- operationErr := WaitContainerOperationFail (ClusterHandler .ContainerClient , projectID , region , zone , op .Name , GCP_CONTAINER_OPERATION_UPDATE_CLUSTER )
816- if operationErr != nil {
817- cblogger .Error (err )
818- return clusterInfo , err
886+ // Worker Node(노드풀) 업그레이드 기능
887+ for _ , nodePool := range nodePools .NodePools { // 각 노드풀을 순회하며 업그레이드
888+ // cblogger.Info(fmt.Sprintf("Upgrading Node Pool: %s", nodePool.Name))
889+ cblogger .Info (fmt .Sprintf ("Upgrading Node Pool: %s to version %s" , nodePool .Name , newVersion ))
890+
891+ nodePoolParent := fmt .Sprintf ("projects/%s/locations/%s/clusters/%s/nodePools/%s" , projectID , zone , clusterIID .NameId , nodePool .Name )
892+ nodePoolRequest := & container.UpdateNodePoolRequest {
893+ NodeVersion : newVersion , // 각 노드풀의 버전 변경 요청
894+ }
895+
896+ // 업그레이드 요청 전에 로그 추가
897+ cblogger .Info (fmt .Sprintf ("Sending upgrade request for Node Pool: %s with path: %s" , nodePool .Name , nodePoolParent ))
898+
899+ nodeOp , err := ClusterHandler .ContainerClient .Projects .Locations .Clusters .NodePools .Update (nodePoolParent , nodePoolRequest ).Do ()
900+ if err != nil {
901+ err := fmt .Errorf ("Failed to Upgrade Node Pool: %v" , err )
902+ cblogger .Error (err )
903+ return clusterInfo , err // 노드풀 업그레이드 실패 시 clusterInfo 반환
904+ }
905+
906+ // GCP 업그레이드 요청 완료 로그
907+ cblogger .Info (fmt .Sprintf ("Upgrade request sent for Node Pool: %s, Operation Name: %s" , nodePool .Name , nodeOp .Name ))
908+
909+ // WaitContainerOperationDone 함수 사용 (20분 타임아웃)
910+ operationErr := WaitContainerOperationDone (ClusterHandler .ContainerClient , projectID , region , zone , nodeOp .Name , GCP_CONTAINER_OPERATION_UPGRADE_NODES , 1200 )
911+ if operationErr != nil {
912+ return clusterInfo , operationErr
913+ }
914+
915+ cblogger .Info (fmt .Sprintf ("Node Pool %s upgrade completed" , nodePool .Name ))
819916 }
820917
821918 return ClusterHandler .GetCluster (clusterIID )
@@ -1393,3 +1490,25 @@ func (ClusterHandler *GCPClusterHandler) ListIID() ([]*irs.IID, error) {
13931490 }
13941491 return iidList , nil
13951492}
1493+
1494+ // 클러스터에 진행 중인 작업이 있는지 확인하는 함수
1495+ func (ClusterHandler * GCPClusterHandler ) hasActiveOperations (projectID , zone , clusterName string ) (bool , error ) {
1496+ listOperationsParent := fmt .Sprintf ("projects/%s/locations/%s" , projectID , zone )
1497+
1498+ // 진행 중인 작업 목록 조회
1499+ operations , err := ClusterHandler .ContainerClient .Projects .Locations .Operations .List (listOperationsParent ).Do ()
1500+ if err != nil {
1501+ return false , err
1502+ }
1503+
1504+ // 클러스터와 관련된 진행 중인 작업 검색
1505+ clusterPattern := fmt .Sprintf ("/clusters/%s/" , clusterName )
1506+ for _ , op := range operations .Operations {
1507+ if strings .Contains (op .TargetLink , clusterPattern ) && op .Status != "DONE" {
1508+ cblogger .Info (fmt .Sprintf ("Found active operation: %s, status: %s" , op .Name , op .Status ))
1509+ return true , nil
1510+ }
1511+ }
1512+
1513+ return false , nil
1514+ }
0 commit comments