Skip to content

Commit

Permalink
frontend: Add CheckForConflict method
Browse files Browse the repository at this point in the history
Returns a "409 Conflict" error response if the provisioning state
of the resource or any parent resources (in the same namespace)
should block a request from proceeding.

Furthermore, if a DELETE request is accepted, mark any active
operation on the resource as canceled. (Cluster Service will
handle the actual cancellation of operations.)
  • Loading branch information
Matthew Barnes committed Oct 28, 2024
1 parent 35fe99a commit 571b2e1
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 5 deletions.
43 changes: 41 additions & 2 deletions frontend/pkg/frontend/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"strconv"
"strings"
"sync/atomic"
"time"

cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
"golang.org/x/sync/errgroup"
Expand Down Expand Up @@ -408,6 +409,14 @@ func (f *Frontend) ArmResourceCreateOrUpdate(writer http.ResponseWriter, request
doc = database.NewResourceDocument(resourceID)
}

// CheckForProvisioningStateConflict does not log conflict errors
// but does log unexpected errors like database failures.
cloudError := f.CheckForProvisioningStateConflict(ctx, operationRequest, doc)
if cloudError != nil {
arm.WriteCloudError(writer, cloudError)
return
}

body, err := BodyFromContext(ctx)
if err != nil {
f.logger.Error(err.Error())
Expand All @@ -420,7 +429,7 @@ func (f *Frontend) ArmResourceCreateOrUpdate(writer http.ResponseWriter, request
return
}

cloudError := versionedRequestCluster.ValidateStatic(versionedCurrentCluster, updating, request.Method)
cloudError = versionedRequestCluster.ValidateStatic(versionedCurrentCluster, updating, request.Method)
if cloudError != nil {
f.logger.Error(cloudError.Error())
arm.WriteCloudError(writer, cloudError)
Expand Down Expand Up @@ -563,14 +572,44 @@ func (f *Frontend) ArmResourceDelete(writer http.ResponseWriter, request *http.R
return
}

operationRequest := database.OperationRequestDelete

// CheckForProvisioningStateConflict does not log conflict errors
// but does log unexpected errors like database failures.
cloudError := f.CheckForProvisioningStateConflict(ctx, operationRequest, doc)
if cloudError != nil {
arm.WriteCloudError(writer, cloudError)
return
}

err = f.clusterServiceClient.DeleteCSCluster(ctx, doc.InternalID)
if err != nil {
f.logger.Error(fmt.Sprintf("failed to delete cluster %s: %v", resourceID, err))
arm.WriteInternalServerError(writer)
return
}

operationDoc, err := f.StartOperation(writer, request, doc, database.OperationRequestDelete)
// Deletion is underway; mark any active operation as canceled.
if doc.ActiveOperationID != "" {
updated, err := f.dbClient.UpdateOperationDoc(ctx, doc.ActiveOperationID, func(updateDoc *database.OperationDocument) bool {
if updateDoc.Status != arm.ProvisioningStateCanceled {
updateDoc.LastTransitionTime = time.Now()
updateDoc.Status = arm.ProvisioningStateCanceled
return true
}
return false
})
if err != nil {
f.logger.Error(err.Error())
arm.WriteInternalServerError(writer)
return
}
if updated {
f.logger.Info(fmt.Sprintf("canceled operation '%s'", doc.ActiveOperationID))
}
}

operationDoc, err := f.StartOperation(writer, request, doc, operationRequest)
if err != nil {
f.logger.Error(fmt.Sprintf("failed to write operation document: %v", err))
arm.WriteInternalServerError(writer)
Expand Down
52 changes: 52 additions & 0 deletions frontend/pkg/frontend/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"net/http"
"strings"

cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
ocmerrors "github.com/openshift-online/ocm-sdk-go/errors"
Expand All @@ -17,6 +18,57 @@ import (
"github.com/Azure/ARO-HCP/internal/database"
)

// CheckForProvisioningStateConflict returns a "409 Conflict" error response if the
// provisioning state of the resource is non-terminal, or any of its parent resources
// within the same provider namespace are in a "Deleting" state.
func (f *Frontend) CheckForProvisioningStateConflict(ctx context.Context, operationRequest database.OperationRequest, doc *database.ResourceDocument) *arm.CloudError {
switch operationRequest {
case database.OperationRequestCreate:
// Resource must already exist for there to be a conflict.
case database.OperationRequestDelete:
if doc.ProvisioningState == arm.ProvisioningStateDeleting {
return arm.NewCloudError(
http.StatusConflict,
arm.CloudErrorCodeConflict,
doc.Key.String(),
"Resource is already deleting")
}
case database.OperationRequestUpdate:
if !doc.ProvisioningState.IsTerminal() {
return arm.NewCloudError(
http.StatusConflict,
arm.CloudErrorCodeConflict,
doc.Key.String(),
"Cannot update resource while resource is %s",
strings.ToLower(string(doc.ProvisioningState)))
}
}

parent := doc.Key.GetParent()

// ResourceType casing is preserved for parents in the same namespace.
for parent.ResourceType.Namespace == doc.Key.ResourceType.Namespace {
parentDoc, err := f.dbClient.GetResourceDoc(ctx, parent)
if err != nil {
f.logger.Error(err.Error())
return arm.NewInternalServerError()
}

if parentDoc.ProvisioningState == arm.ProvisioningStateDeleting {
return arm.NewCloudError(
http.StatusConflict,
arm.CloudErrorCodeConflict,
doc.Key.String(),
"Cannot %s resource while parent resource is deleting",
strings.ToLower(string(operationRequest)))
}

parent = parent.GetParent()
}

return nil
}

func (f *Frontend) MarshalResource(ctx context.Context, resourceID *arm.ResourceID, versionedInterface api.Version) ([]byte, *arm.CloudError) {
var responseBody []byte

Expand Down
43 changes: 41 additions & 2 deletions frontend/pkg/frontend/node_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"maps"
"net/http"
"time"

cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"

Expand Down Expand Up @@ -123,6 +124,14 @@ func (f *Frontend) CreateOrUpdateNodePool(writer http.ResponseWriter, request *h
doc = database.NewResourceDocument(resourceID)
}

// CheckForProvisioningStateConflict does not log conflict errors
// but does log unexpected errors like database failures.
cloudError := f.CheckForProvisioningStateConflict(ctx, operationRequest, doc)
if cloudError != nil {
arm.WriteCloudError(writer, cloudError)
return
}

body, err := BodyFromContext(ctx)
if err != nil {
f.logger.Error(err.Error())
Expand All @@ -135,7 +144,7 @@ func (f *Frontend) CreateOrUpdateNodePool(writer http.ResponseWriter, request *h
return
}

cloudError := versionedRequestNodePool.ValidateStatic(versionedCurrentNodePool, updating, request.Method)
cloudError = versionedRequestNodePool.ValidateStatic(versionedCurrentNodePool, updating, request.Method)
if cloudError != nil {
f.logger.Error(cloudError.Error())
arm.WriteCloudError(writer, cloudError)
Expand Down Expand Up @@ -281,14 +290,44 @@ func (f *Frontend) DeleteNodePool(writer http.ResponseWriter, request *http.Requ
return
}

operationRequest := database.OperationRequestDelete

// CheckForProvisioningStateConflict does not log conflict errors
// but does log unexpected errors like database failures.
cloudError := f.CheckForProvisioningStateConflict(ctx, operationRequest, doc)
if cloudError != nil {
arm.WriteCloudError(writer, cloudError)
return
}

err = f.clusterServiceClient.DeleteCSNodePool(ctx, doc.InternalID)
if err != nil {
f.logger.Error(fmt.Sprintf("failed to delete node pool %s: %v", resourceID, err))
arm.WriteInternalServerError(writer)
return
}

operationDoc, err := f.StartOperation(writer, request, doc, database.OperationRequestDelete)
// Deletion is underway; mark any active operation as canceled.
if doc.ActiveOperationID != "" {
updated, err := f.dbClient.UpdateOperationDoc(ctx, doc.ActiveOperationID, func(updateDoc *database.OperationDocument) bool {
if updateDoc.Status != arm.ProvisioningStateCanceled {
updateDoc.LastTransitionTime = time.Now()
updateDoc.Status = arm.ProvisioningStateCanceled
return true
}
return false
})
if err != nil {
f.logger.Error(err.Error())
arm.WriteInternalServerError(writer)
return
}
if updated {
f.logger.Info(fmt.Sprintf("canceled operation '%s'", doc.ActiveOperationID))
}
}

operationDoc, err := f.StartOperation(writer, request, doc, operationRequest)
if err != nil {
f.logger.Error(fmt.Sprintf("failed to write operation document: %v", err))
arm.WriteInternalServerError(writer)
Expand Down
10 changes: 9 additions & 1 deletion internal/database/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,22 @@ type OperationDocument struct {
func NewOperationDocument(request OperationRequest) *OperationDocument {
now := time.Now().UTC()

return &OperationDocument{
doc := &OperationDocument{
BaseDocument: newBaseDocument(),
PartitionKey: operationsPartitionKey,
Request: request,
StartTime: now,
LastTransitionTime: now,
Status: arm.ProvisioningStateAccepted,
}

// When deleting, set Status directly to ProvisioningStateDeleting
// so any further deletion requests are rejected with 409 Conflict.
if request == OperationRequestDelete {
doc.Status = arm.ProvisioningStateDeleting
}

return doc
}

// ToStatus converts an OperationDocument to the ARM operation status format.
Expand Down

0 comments on commit 571b2e1

Please sign in to comment.