Skip to content

Commit 9b1107b

Browse files
committed
Add concurrency controls for ResourcePools
1 parent 3882000 commit 9b1107b

10 files changed

+560
-15
lines changed

helm/crds/resourcehandles.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,18 @@ spec:
1616
subresources:
1717
status: {}
1818
additionalPrinterColumns:
19+
- name: Provider
20+
type: string
21+
jsonPath: .spec.provider.name
1922
- name: Pool
2023
type: string
2124
jsonPath: .spec.resourcePool.name
25+
- name: Healthy
26+
type: boolean
27+
jsonPath: .status.healthy
28+
- name: Ready
29+
type: boolean
30+
jsonPath: .status.ready
2231
- name: Claim Namespace
2332
type: string
2433
jsonPath: .spec.resourceClaim.namespace

helm/crds/resourcepools.yaml

+36
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,18 @@ spec:
1616
subresources:
1717
status: {}
1818
additionalPrinterColumns:
19+
- name: ResourceProvider
20+
jsonPath: .spec.provider.name
21+
type: string
1922
- name: Min
2023
type: integer
2124
jsonPath: .spec.minAvailable
25+
- name: Available
26+
type: integer
27+
jsonPath: .status.resourceHandleCount.available
28+
- name: Ready
29+
type: integer
30+
jsonPath: .status.resourceHandleCount.ready
2231
- name: Age
2332
type: date
2433
jsonPath: .metadata.creationTimestamp
@@ -48,6 +57,10 @@ spec:
4857
description: ResourcePool specification
4958
type: object
5059
properties:
60+
deleteUnhealthyResourceHandles:
61+
description: >-
62+
If set then any unbound ResourceHandle that fails health check will be automatically deleted.
63+
type: boolean
5164
lifespan:
5265
description: >-
5366
Lifespan configuration for ResourceHandle provisioned by the ResourcePool.
@@ -81,6 +94,11 @@ spec:
8194
Ex: "3d" for 3 days.
8295
type: string
8396
pattern: ^[0-9]+[smhd]$
97+
maxUnready:
98+
description: >-
99+
Maximum number of resource handles that do not pass readiness check.
100+
type: integer
101+
minimum: 0
84102
minAvailable:
85103
description: >-
86104
Minimum number of unclaimed resource handles to maintain for the
@@ -175,3 +193,21 @@ spec:
175193
description: Kopf status
176194
type: object
177195
x-kubernetes-preserve-unknown-fields: true
196+
resourceHandleCount:
197+
type: object
198+
properties:
199+
available:
200+
type: integer
201+
ready:
202+
type: integer
203+
resourceHandles:
204+
type: array
205+
items:
206+
type: object
207+
properties:
208+
healthy:
209+
type: boolean
210+
name:
211+
type: string
212+
ready:
213+
type: boolean

helm/templates/crds/resourcehandles.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,18 @@ spec:
1717
subresources:
1818
status: {}
1919
additionalPrinterColumns:
20+
- name: Provider
21+
type: string
22+
jsonPath: .spec.provider.name
2023
- name: Pool
2124
type: string
2225
jsonPath: .spec.resourcePool.name
26+
- name: Healthy
27+
type: boolean
28+
jsonPath: .status.healthy
29+
- name: Ready
30+
type: boolean
31+
jsonPath: .status.ready
2332
- name: Claim Namespace
2433
type: string
2534
jsonPath: .spec.resourceClaim.namespace

helm/templates/crds/resourcepools.yaml

+36
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,18 @@ spec:
1717
subresources:
1818
status: {}
1919
additionalPrinterColumns:
20+
- name: ResourceProvider
21+
jsonPath: .spec.provider.name
22+
type: string
2023
- name: Min
2124
type: integer
2225
jsonPath: .spec.minAvailable
26+
- name: Available
27+
type: integer
28+
jsonPath: .status.resourceHandleCount.available
29+
- name: Ready
30+
type: integer
31+
jsonPath: .status.resourceHandleCount.ready
2332
- name: Age
2433
type: date
2534
jsonPath: .metadata.creationTimestamp
@@ -49,6 +58,10 @@ spec:
4958
description: ResourcePool specification
5059
type: object
5160
properties:
61+
deleteUnhealthyResourceHandles:
62+
description: >-
63+
If set then any unbound ResourceHandle that fails health check will be automatically deleted.
64+
type: boolean
5265
lifespan:
5366
description: >-
5467
Lifespan configuration for ResourceHandle provisioned by the ResourcePool.
@@ -82,6 +95,11 @@ spec:
8295
Ex: "3d" for 3 days.
8396
type: string
8497
pattern: ^[0-9]+[smhd]$
98+
maxUnready:
99+
description: >-
100+
Maximum number of resource handles that do not pass readiness check.
101+
type: integer
102+
minimum: 0
85103
minAvailable:
86104
description: >-
87105
Minimum number of unclaimed resource handles to maintain for the
@@ -158,4 +176,22 @@ spec:
158176
description: Kopf status
159177
type: object
160178
x-kubernetes-preserve-unknown-fields: true
179+
resourceHandleCount:
180+
type: object
181+
properties:
182+
available:
183+
type: integer
184+
ready:
185+
type: integer
186+
resourceHandles:
187+
type: array
188+
items:
189+
type: object
190+
properties:
191+
healthy:
192+
type: boolean
193+
name:
194+
type: string
195+
ready:
196+
type: boolean
161197
{{- end -}}

operator/poolboy_templating.py

+7
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,13 @@ def seconds_to_interval(seconds:int) -> str:
126126
# name: alice
127127
type_filter_match_re = re.compile(r'^{{(?!.*{{).*\| *(bool|float|int|object) *}}$')
128128

129+
def check_condition(condition, template_style='jinja2', variables={}):
130+
return jinja2process(
131+
template="{{ " + condition + " | bool}}",
132+
template_style=template_style,
133+
variables=variables
134+
)
135+
129136
def j2now(utc=False, fmt=None):
130137
dt = datetime.now(timezone.utc if utc else None)
131138
return dt.strftime(fmt) if fmt else dt

operator/resourcehandle.py

+135-6
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ async def bind_handle_to_claim(
7575

7676
# Loop through unbound instances to find best match
7777
for resource_handle in cls.unbound_instances.values():
78+
# Skip unhealthy
79+
if resource_handle.is_healthy == False:
80+
continue
81+
7882
# Honor explicit pool requests
7983
if resource_claim.resource_pool_name \
8084
and resource_claim.resource_pool_name != resource_handle.resource_pool_name:
@@ -86,6 +90,16 @@ async def bind_handle_to_claim(
8690
continue
8791

8892
diff_count = 0
93+
94+
# Prefer handles with known healthy status
95+
if resource_handle.is_healthy == None:
96+
diff_count += 0.1
97+
# Prefer handles that are ready
98+
if resource_handle.is_ready == False:
99+
diff_count += 0.01
100+
elif resource_handle.is_ready == None:
101+
diff_count += 0.001
102+
89103
is_match = True
90104
handle_resources = resource_handle.resources
91105
if len(resource_claim_resources) < len(handle_resources):
@@ -131,7 +145,7 @@ async def bind_handle_to_claim(
131145
matched_resource_handles.append((diff_count, resource_handle))
132146

133147
# Bind the oldest ResourceHandle with the smallest difference score
134-
matched_resource_handles.sort(key=lambda item: f"{item[0]:09d} {item[1].creation_timestamp}")
148+
matched_resource_handles.sort(key=lambda item: f"{item[0]:012.3f} {item[1].creation_timestamp}")
135149
for matched_resource_handle_item in matched_resource_handles:
136150
matched_resource_handle = matched_resource_handle_item[1]
137151
patch = [
@@ -597,13 +611,21 @@ def is_deleting(self) -> bool:
597611
def is_from_resource_pool(self) -> bool:
598612
return 'resourcePool' in self.spec
599613

614+
@property
615+
def is_healthy(self) -> Optional[bool]:
616+
return self.status.get('healthy')
617+
600618
@property
601619
def is_past_lifespan_end(self) -> bool:
602620
dt = self.lifespan_end_datetime
603621
if not dt:
604622
return False
605623
return dt < datetime.now(timezone.utc)
606624

625+
@property
626+
def is_ready(self) -> Optional[bool]:
627+
return self.status.get('ready')
628+
607629
@property
608630
def lifespan_end_datetime(self) -> Any:
609631
timestamp = self.lifespan_end_timestamp
@@ -1129,18 +1151,125 @@ async def update_status(self,
11291151
"path": "/status",
11301152
"value": {},
11311153
})
1154+
if not 'resources' in self.status:
1155+
patch.append({
1156+
"op": "add",
1157+
"path": "/status/resources",
1158+
"value": [],
1159+
})
11321160

11331161
resources = deepcopy(self.resources)
11341162
resource_states = await self.get_resource_states(logger=logger)
11351163
for idx, state in enumerate(resource_states):
11361164
resources[idx]['state'] = state
1165+
if len(self.status_resources) < idx:
1166+
patch.append({
1167+
"op": "add",
1168+
"path": f"/status/resources/{idx}",
1169+
"value": {},
1170+
})
11371171

1138-
#for resource in resources:
1139-
# if
1140-
# await resourceprovider.ResourceProvider.get(resource['provider']['name'])
1172+
overall_ready = True
1173+
overall_healthy = True
11411174

1142-
# FIXME - add healthy
1143-
# FIXME - add ready
1175+
for idx, resource in enumerate(resources):
1176+
if resource['state']:
1177+
resource_provider = await resourceprovider.ResourceProvider.get(resource['provider']['name'])
1178+
resource_healthy = resource_provider.check_health(
1179+
logger = logger,
1180+
resource_handle = self,
1181+
resource_state = resource['state'],
1182+
)
1183+
if resource_healthy:
1184+
resource_ready = resource_provider.check_readiness(
1185+
logger = logger,
1186+
resource_handle = self,
1187+
resource_state = resource['state'],
1188+
)
1189+
else:
1190+
resource_ready = False
1191+
else:
1192+
resource_healthy = None
1193+
resource_ready = False
1194+
1195+
# If the resource is not healthy then it is overall unhealthy.
1196+
# If the resource health is unknown then he overall health is unknown unless it is unhealthy.
1197+
if resource_healthy == False:
1198+
overall_healthy = False
1199+
elif resource_healthy == None:
1200+
if overall_healthy:
1201+
overall_healthy = None
1202+
1203+
if resource_ready == False:
1204+
overall_ready = False
1205+
elif resource_ready == None:
1206+
if overall_ready:
1207+
overall_ready = None
1208+
1209+
if len(self.status_resources) <= idx:
1210+
if resource_healthy != None:
1211+
patch.append({
1212+
"op": "add",
1213+
"path": f"/status/resources/{idx}/healthy",
1214+
"value": resource_healthy,
1215+
})
1216+
if resource_ready != None:
1217+
patch.append({
1218+
"op": "add",
1219+
"path": f"/status/resources/{idx}/ready",
1220+
"value": resource_ready,
1221+
})
1222+
else:
1223+
if resource_healthy == None:
1224+
if 'healthy' in self.status_resources[idx]:
1225+
patch.append({
1226+
"op": "remove",
1227+
"path": f"/status/resources/{idx}/healthy",
1228+
})
1229+
elif resource_healthy != self.status_resources[idx].get('healthy'):
1230+
patch.append({
1231+
"op": "add",
1232+
"path": f"/status/resources/{idx}/healthy",
1233+
"value": resource_healthy,
1234+
})
1235+
if resource_ready == None:
1236+
if 'ready' in self.status_resources[idx]:
1237+
patch.append({
1238+
"op": "remove",
1239+
"path": f"/status/resources/{idx}/ready",
1240+
})
1241+
elif resource_ready != self.status_resources[idx].get('ready'):
1242+
patch.append({
1243+
"op": "add",
1244+
"path": f"/status/resources/{idx}/ready",
1245+
"value": resource_ready,
1246+
})
1247+
1248+
if overall_healthy == None:
1249+
if 'healthy' in self.status:
1250+
patch.append({
1251+
"op": "remove",
1252+
"path": f"/status/healthy",
1253+
})
1254+
elif overall_healthy != self.status.get('healthy'):
1255+
patch.append({
1256+
"op": "add",
1257+
"path": f"/status/healthy",
1258+
"value": overall_healthy,
1259+
})
1260+
1261+
if overall_ready == None:
1262+
if 'ready' in self.status:
1263+
patch.append({
1264+
"op": "remove",
1265+
"path": f"/status/ready",
1266+
})
1267+
elif overall_ready != self.status.get('ready'):
1268+
patch.append({
1269+
"op": "add",
1270+
"path": f"/status/ready",
1271+
"value": overall_ready,
1272+
})
11441273

11451274
resource_provider = await self.get_resource_provider()
11461275
if resource_provider.status_summary_template:

0 commit comments

Comments
 (0)