Skip to content

Commit 20f98f3

Browse files
committed
DRA: update helper packages
Publishing ResourceSlices now supports network-attached devices and the new v1alpha3 API. The logic for splitting up across different slices is missing.
1 parent 91d7882 commit 20f98f3

File tree

5 files changed

+235
-377
lines changed

5 files changed

+235
-377
lines changed

staging/src/k8s.io/dynamic-resource-allocation/controller/controller.go

Lines changed: 47 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -62,20 +62,6 @@ type Controller interface {
6262

6363
// Driver provides the actual allocation and deallocation operations.
6464
type Driver interface {
65-
// GetClassParameters is called to retrieve the parameter object
66-
// referenced by a class. The content should be validated now if
67-
// possible. class.Parameters may be nil.
68-
//
69-
// The caller wraps the error to include the parameter reference.
70-
GetClassParameters(ctx context.Context, class *resourceapi.ResourceClass) (interface{}, error)
71-
72-
// GetClaimParameters is called to retrieve the parameter object
73-
// referenced by a claim. The content should be validated now if
74-
// possible. claim.Spec.Parameters may be nil.
75-
//
76-
// The caller wraps the error to include the parameter reference.
77-
GetClaimParameters(ctx context.Context, claim *resourceapi.ResourceClaim, class *resourceapi.ResourceClass, classParameters interface{}) (interface{}, error)
78-
7965
// Allocate is called when all same-driver ResourceClaims for Pod are ready
8066
// to be allocated. The selectedNode is empty for ResourceClaims with immediate
8167
// allocation, in which case the resource driver decides itself where
@@ -136,11 +122,9 @@ type Driver interface {
136122
// ClaimAllocation represents information about one particular
137123
// pod.Spec.ResourceClaim entry.
138124
type ClaimAllocation struct {
139-
PodClaimName string
140-
Claim *resourceapi.ResourceClaim
141-
Class *resourceapi.ResourceClass
142-
ClaimParameters interface{}
143-
ClassParameters interface{}
125+
PodClaimName string
126+
Claim *resourceapi.ResourceClaim
127+
DeviceClasses map[string]*resourceapi.DeviceClass
144128

145129
// UnsuitableNodes needs to be filled in by the driver when
146130
// Driver.UnsuitableNodes gets called.
@@ -162,15 +146,12 @@ type controller struct {
162146
driver Driver
163147
setReservedFor bool
164148
kubeClient kubernetes.Interface
165-
claimNameLookup *resourceclaim.Lookup
166149
queue workqueue.TypedRateLimitingInterface[string]
167150
eventRecorder record.EventRecorder
168-
rcLister resourcelisters.ResourceClassLister
169-
rcSynced cache.InformerSynced
151+
dcLister resourcelisters.DeviceClassLister
170152
claimCache cache.MutationCache
171153
schedulingCtxLister resourcelisters.PodSchedulingContextLister
172-
claimSynced cache.InformerSynced
173-
schedulingCtxSynced cache.InformerSynced
154+
synced []cache.InformerSynced
174155
}
175156

176157
// TODO: make it configurable
@@ -184,10 +165,9 @@ func New(
184165
kubeClient kubernetes.Interface,
185166
informerFactory informers.SharedInformerFactory) Controller {
186167
logger := klog.LoggerWithName(klog.FromContext(ctx), "resource controller")
187-
rcInformer := informerFactory.Resource().V1alpha3().ResourceClasses()
168+
dcInformer := informerFactory.Resource().V1alpha3().DeviceClasses()
188169
claimInformer := informerFactory.Resource().V1alpha3().ResourceClaims()
189170
schedulingCtxInformer := informerFactory.Resource().V1alpha3().PodSchedulingContexts()
190-
claimNameLookup := resourceclaim.NewNameLookup(kubeClient)
191171

192172
eventBroadcaster := record.NewBroadcaster(record.WithContext(ctx))
193173
go func() {
@@ -228,15 +208,16 @@ func New(
228208
driver: driver,
229209
setReservedFor: true,
230210
kubeClient: kubeClient,
231-
claimNameLookup: claimNameLookup,
232-
rcLister: rcInformer.Lister(),
233-
rcSynced: rcInformer.Informer().HasSynced,
211+
dcLister: dcInformer.Lister(),
234212
claimCache: claimCache,
235-
claimSynced: claimInformer.Informer().HasSynced,
236213
schedulingCtxLister: schedulingCtxInformer.Lister(),
237-
schedulingCtxSynced: schedulingCtxInformer.Informer().HasSynced,
238214
queue: queue,
239215
eventRecorder: eventRecorder,
216+
synced: []cache.InformerSynced{
217+
dcInformer.Informer().HasSynced,
218+
claimInformer.Informer().HasSynced,
219+
schedulingCtxInformer.Informer().HasSynced,
220+
},
240221
}
241222

242223
loggerV6 := logger.V(6)
@@ -341,7 +322,7 @@ func (ctrl *controller) Run(workers int) {
341322

342323
stopCh := ctrl.ctx.Done()
343324

344-
if !cache.WaitForCacheSync(stopCh, ctrl.rcSynced, ctrl.claimSynced, ctrl.schedulingCtxSynced) {
325+
if !cache.WaitForCacheSync(stopCh, ctrl.synced...) {
345326
ctrl.logger.Error(nil, "Cannot sync caches")
346327
return
347328
}
@@ -471,20 +452,19 @@ func (ctrl *controller) syncClaim(ctx context.Context, claim *resourceapi.Resour
471452
if claim.Status.Allocation != nil {
472453
// Allocation was completed. Deallocate before proceeding.
473454
if err := ctrl.driver.Deallocate(ctx, claim); err != nil {
474-
return fmt.Errorf("deallocate: %v", err)
455+
return fmt.Errorf("deallocate: %w", err)
475456
}
476457
claim.Status.Allocation = nil
477-
claim.Status.DriverName = ""
478458
claim.Status.DeallocationRequested = false
479459
claim, err = ctrl.kubeClient.ResourceV1alpha3().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{})
480460
if err != nil {
481-
return fmt.Errorf("remove allocation: %v", err)
461+
return fmt.Errorf("remove allocation: %w", err)
482462
}
483463
ctrl.claimCache.Mutation(claim)
484464
} else {
485465
// Ensure that there is no on-going allocation.
486466
if err := ctrl.driver.Deallocate(ctx, claim); err != nil {
487-
return fmt.Errorf("stop allocation: %v", err)
467+
return fmt.Errorf("stop allocation: %w", err)
488468
}
489469
}
490470

@@ -493,15 +473,15 @@ func (ctrl *controller) syncClaim(ctx context.Context, claim *resourceapi.Resour
493473
claim.Status.DeallocationRequested = false
494474
claim, err = ctrl.kubeClient.ResourceV1alpha3().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{})
495475
if err != nil {
496-
return fmt.Errorf("remove deallocation: %v", err)
476+
return fmt.Errorf("remove deallocation: %w", err)
497477
}
498478
ctrl.claimCache.Mutation(claim)
499479
}
500480

501481
claim.Finalizers = ctrl.removeFinalizer(claim.Finalizers)
502482
claim, err = ctrl.kubeClient.ResourceV1alpha3().ResourceClaims(claim.Namespace).Update(ctx, claim, metav1.UpdateOptions{})
503483
if err != nil {
504-
return fmt.Errorf("remove finalizer: %v", err)
484+
return fmt.Errorf("remove finalizer: %w", err)
505485
}
506486
ctrl.claimCache.Mutation(claim)
507487
}
@@ -519,24 +499,6 @@ func (ctrl *controller) syncClaim(ctx context.Context, claim *resourceapi.Resour
519499
return nil
520500
}
521501

522-
func (ctrl *controller) getParameters(ctx context.Context, claim *resourceapi.ResourceClaim, class *resourceapi.ResourceClass, notifyClaim bool) (claimParameters, classParameters interface{}, err error) {
523-
classParameters, err = ctrl.driver.GetClassParameters(ctx, class)
524-
if err != nil {
525-
ctrl.eventRecorder.Event(class, v1.EventTypeWarning, "Failed", err.Error())
526-
err = fmt.Errorf("class parameters %s: %v", class.ParametersRef, err)
527-
return
528-
}
529-
claimParameters, err = ctrl.driver.GetClaimParameters(ctx, claim, class, classParameters)
530-
if err != nil {
531-
if notifyClaim {
532-
ctrl.eventRecorder.Event(claim, v1.EventTypeWarning, "Failed", err.Error())
533-
}
534-
err = fmt.Errorf("claim parameters %s: %v", claim.Spec.ParametersRef, err)
535-
return
536-
}
537-
return
538-
}
539-
540502
// allocateClaims filters list of claims, keeps those needing allocation and asks driver to do the allocations.
541503
// Driver is supposed to write the AllocationResult and Error field into argument claims slice.
542504
func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAllocation, selectedNode string, selectedUser *resourceapi.ResourceClaimConsumerReference) {
@@ -572,7 +534,7 @@ func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAlloc
572534
claim, err = ctrl.kubeClient.ResourceV1alpha3().ResourceClaims(claim.Namespace).Update(ctx, claim, metav1.UpdateOptions{})
573535
if err != nil {
574536
logger.Error(err, "add finalizer", "claim", claim.Name)
575-
claimAllocation.Error = fmt.Errorf("add finalizer: %v", err)
537+
claimAllocation.Error = fmt.Errorf("add finalizer: %w", err)
576538
// Do not save claim to ask for Allocate from Driver.
577539
continue
578540
}
@@ -602,14 +564,14 @@ func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAlloc
602564
logger.V(5).Info("successfully allocated", "claim", klog.KObj(claimAllocation.Claim))
603565
claim := claimAllocation.Claim.DeepCopy()
604566
claim.Status.Allocation = claimAllocation.Allocation
605-
claim.Status.DriverName = ctrl.name
567+
claim.Status.Allocation.Controller = ctrl.name
606568
if selectedUser != nil && ctrl.setReservedFor {
607569
claim.Status.ReservedFor = append(claim.Status.ReservedFor, *selectedUser)
608570
}
609571
logger.V(6).Info("Updating claim after allocation", "claim", claim)
610572
claim, err := ctrl.kubeClient.ResourceV1alpha3().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{})
611573
if err != nil {
612-
claimAllocation.Error = fmt.Errorf("add allocation: %v", err)
574+
claimAllocation.Error = fmt.Errorf("add allocation: %w", err)
613575
continue
614576
}
615577

@@ -619,7 +581,7 @@ func (ctrl *controller) allocateClaims(ctx context.Context, claims []*ClaimAlloc
619581
}
620582

621583
func (ctrl *controller) checkPodClaim(ctx context.Context, pod *v1.Pod, podClaim v1.PodResourceClaim) (*ClaimAllocation, error) {
622-
claimName, mustCheckOwner, err := ctrl.claimNameLookup.Name(pod, &podClaim)
584+
claimName, mustCheckOwner, err := resourceclaim.Name(pod, &podClaim)
623585
if err != nil {
624586
return nil, err
625587
}
@@ -642,26 +604,30 @@ func (ctrl *controller) checkPodClaim(ctx context.Context, pod *v1.Pod, podClaim
642604
// need to be done for the claim either.
643605
return nil, nil
644606
}
645-
class, err := ctrl.rcLister.Get(claim.Spec.ResourceClassName)
646-
if err != nil {
647-
return nil, err
648-
}
649-
if class.DriverName != ctrl.name {
607+
if claim.Spec.Controller != ctrl.name {
650608
return nil, nil
651609
}
652-
// Check parameters. Record event to claim and pod if parameters are invalid.
653-
claimParameters, classParameters, err := ctrl.getParameters(ctx, claim, class, true)
654-
if err != nil {
655-
ctrl.eventRecorder.Event(pod, v1.EventTypeWarning, "Failed", fmt.Sprintf("claim %v: %v", claim.Name, err.Error()))
656-
return nil, err
610+
611+
// Sanity checks and preparations...
612+
ca := &ClaimAllocation{
613+
PodClaimName: podClaim.Name,
614+
Claim: claim,
615+
DeviceClasses: make(map[string]*resourceapi.DeviceClass),
616+
}
617+
for _, request := range claim.Spec.Devices.Requests {
618+
if request.DeviceClassName == "" {
619+
// Some unknown request. Abort!
620+
return nil, fmt.Errorf("claim %s: unknown request type in request %s", klog.KObj(claim), request.Name)
621+
}
622+
deviceClassName := request.DeviceClassName
623+
class, err := ctrl.dcLister.Get(deviceClassName)
624+
if err != nil {
625+
return nil, fmt.Errorf("claim %s: request %s: class %s: %w", klog.KObj(claim), request.Name, deviceClassName, err)
626+
}
627+
ca.DeviceClasses[deviceClassName] = class
657628
}
658-
return &ClaimAllocation{
659-
PodClaimName: podClaim.Name,
660-
Claim: claim,
661-
Class: class,
662-
ClaimParameters: claimParameters,
663-
ClassParameters: classParameters,
664-
}, nil
629+
630+
return ca, nil
665631
}
666632

667633
// syncPodSchedulingContext determines which next action may be needed for a PodSchedulingContext object
@@ -709,7 +675,7 @@ func (ctrl *controller) syncPodSchedulingContexts(ctx context.Context, schedulin
709675
for _, podClaim := range pod.Spec.ResourceClaims {
710676
delayed, err := ctrl.checkPodClaim(ctx, pod, podClaim)
711677
if err != nil {
712-
return fmt.Errorf("pod claim %s: %v", podClaim.Name, err)
678+
return fmt.Errorf("pod claim %s: %w", podClaim.Name, err)
713679
}
714680
if delayed == nil {
715681
// Nothing to do for it. This can change, so keep checking.
@@ -739,7 +705,7 @@ func (ctrl *controller) syncPodSchedulingContexts(ctx context.Context, schedulin
739705
}
740706
if len(schedulingCtx.Spec.PotentialNodes) > 0 {
741707
if err := ctrl.driver.UnsuitableNodes(ctx, pod, claims, potentialNodes); err != nil {
742-
return fmt.Errorf("checking potential nodes: %v", err)
708+
return fmt.Errorf("checking potential nodes: %w", err)
743709
}
744710
}
745711
logger.V(5).Info("pending pod claims", "claims", claims, "selectedNode", selectedNode)
@@ -772,7 +738,7 @@ func (ctrl *controller) syncPodSchedulingContexts(ctx context.Context, schedulin
772738
allErrors = append(allErrors, delayed.Error)
773739
} else {
774740
// Include claim name, it's not in the underlying error.
775-
allErrors = append(allErrors, fmt.Errorf("claim %s: %v", delayed.Claim.Name, delayed.Error))
741+
allErrors = append(allErrors, fmt.Errorf("claim %s: %w", delayed.Claim.Name, delayed.Error))
776742
}
777743
}
778744
}
@@ -807,7 +773,7 @@ func (ctrl *controller) syncPodSchedulingContexts(ctx context.Context, schedulin
807773
if modified {
808774
logger.V(6).Info("Updating pod scheduling with modified unsuitable nodes", "podSchedulingCtx", schedulingCtx)
809775
if _, err := ctrl.kubeClient.ResourceV1alpha3().PodSchedulingContexts(schedulingCtx.Namespace).UpdateStatus(ctx, schedulingCtx, metav1.UpdateOptions{}); err != nil {
810-
return fmt.Errorf("update unsuitable node status: %v", err)
776+
return fmt.Errorf("update unsuitable node status: %w", err)
811777
}
812778
}
813779

0 commit comments

Comments
 (0)