Skip to content

Commit 7e147fc

Browse files
committed
added sub experiments support, fixed some examples
added ray example. fixed utc datetime issue. fixed account order issue. added sub experiments examples.
1 parent 4accd26 commit 7e147fc

File tree

14 files changed

+272
-120
lines changed

14 files changed

+272
-120
lines changed

deepkit/__init__.py

+29-11
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,41 @@ def log(s):
1717
deepkit.globals.last_logs.write(s)
1818

1919

20-
def experiment(project=None, account=None, new=False) -> Experiment:
20+
def experiment(project=None, account=None) -> Experiment:
2121
"""
22+
Per default this method returns a singleton.
23+
24+
If you start an experiment using the Deepkit cli (`deepkit run`) or the Deepkit app, the experiment
25+
is created beforehand and this method picks it up. If an experiment is run without cli or app,
26+
then this method creates a new one. In any case, this method returns always the same instance, so
27+
you don't strictly need to save or pass around its return value.
28+
29+
If you want to create new sub experiments you should use:
30+
31+
import deepkit
32+
root_experiment = deepkit.experiment()
33+
sub_experiment = root_experiment.create_sub_experiment()
34+
35+
This will create _always_ a new child experiments. In this cases, make sure to call `experiment.done()`,
36+
(or abort, crashed, failed) manually to end the created experiment and pass around the created experiment
37+
instance manually (since its not tracked).
38+
2239
:param project: If the current folder is not linked and you don't specify a project here, an error is raised since
2340
Deepkit isn't able to know to which project the experiments data should be sent.
24-
:param account: Per default the account linked to this folder is used (see `deepkit link`),
41+
:param account: Per default the first account linked to this folder is used (see `deepkit link` or `deepkit-sdk auth -l`),
2542
this is on a new system `localhost`.
2643
You can overwrite which account is used by specifying the name here (see `deepkit id` for
2744
available accounts in your system).
28-
:param new: Per default this method returns a singleton. Force a new experiment creation with new=True.
2945
:return:
3046
"""
3147
"""
3248
:return: returns either a new experiment or the last created one.
3349
"""
34-
if deepkit.globals.last_experiment and new is False:
35-
return deepkit.globals.last_experiment
50+
if not deepkit.globals.last_experiment:
51+
deepkit.globals.last_experiment = Experiment(project=project, account=account, monitoring=True,
52+
try_pick_up=True)
3653

37-
return Experiment(ExperimentOptions(project=project, account=account))
54+
return deepkit.globals.last_experiment
3855

3956

4057
if deepkit.utils.in_self_execution():
@@ -84,8 +101,8 @@ def login(
84101
):
85102
"""
86103
In environments (like Jupyter Notebooks/Google Colab) where its not possible to use the Deepkit CLI to authenticate
87-
with a Deepkit server (deepkit auth) or where "deepkit run" is not used, it's required to provide an access_key
88-
directly. Either by specifying one or by providing username/password.
104+
with a Deepkit server (deepkit auth) or where "deepkit run" is not used, it's required to provide an access-key
105+
or login via username/password.
89106
90107
It's important to call this method BEFORE deepkit.experiment() is called.
91108
"""
@@ -108,8 +125,9 @@ def login(
108125
access_key = access_key_map[cache_key]
109126
else:
110127
print("No access_key provided. Please provide username and password.")
111-
print(f"Note: You can create an access_key directly in the CLI using `deepkit access_key {host} --port {port}`")
112-
client = Client(ExperimentOptions())
128+
print(
129+
f"Note: You can create an access_key directly in the CLI using `deepkit access-key {host} --port {port}`")
130+
client = Client()
113131
client.host = host
114132
client.port = port
115133
client.ssl = ssl
@@ -122,7 +140,7 @@ def login(
122140
if not access_key:
123141
raise Exception("Credentials check failed")
124142

125-
print("Login successful.")
143+
print("Login successful. Access key is " + access_key)
126144
access_key_map[cache_key] = access_key
127145

128146
os.environ['DEEPKIT_HOST'] = host

deepkit/client.py

+64-35
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,17 @@
55
import sys
66
import threading
77
from asyncio import Future
8-
from datetime import datetime
8+
import datetime
99
from enum import Enum
10-
from typing import Dict, List, Optional
11-
import numpy as np
10+
from typing import Dict, Optional
1211

12+
import numpy as np
1313
import websockets
1414
from rx.subject import BehaviorSubject
1515

1616
import deepkit.globals
1717
from deepkit.home import get_home_config
18-
from deepkit.model import ExperimentOptions, FolderLink
18+
from deepkit.model import FolderLink
1919

2020

2121
def is_in_directory(filepath, directory):
@@ -31,10 +31,15 @@ def json_converter(obj):
3131
return int(obj)
3232
elif isinstance(obj, np.floating):
3333
return float(obj)
34+
elif isinstance(obj, np.float):
35+
return float(obj)
3436
elif isinstance(obj, np.ndarray):
3537
return obj.tolist()
3638
elif isinstance(obj, datetime.datetime):
37-
return obj.__str__()
39+
# we assume all datetime instances are UTC
40+
return obj.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
41+
else:
42+
return str(obj)
3843

3944

4045
class JobStatus(Enum):
@@ -47,23 +52,36 @@ class JobStatus(Enum):
4752
class Client(threading.Thread):
4853
connection: websockets.WebSocketClientProtocol
4954

50-
def __init__(self, options: ExperimentOptions):
55+
def __init__(self, project: Optional[str] = None,
56+
account: Optional[str] = None,
57+
try_pick_up=False,
58+
parent_experiment=None,
59+
silent=False):
5160
self.connected = BehaviorSubject(False)
52-
self.options: ExperimentOptions = options
61+
self.project = project
62+
self.account = account
63+
self.parent_experiment = parent_experiment
64+
self.silent = silent
5365

5466
self.host = os.environ.get('DEEPKIT_HOST', '127.0.0.1')
5567
self.socket_path = os.environ.get('DEEPKIT_SOCKET', None)
5668
self.ssl = os.environ.get('DEEPKIT_SSL', '0') is '1'
5769
self.port = int(os.environ.get('DEEPKIT_PORT', '8960'))
58-
self.job_token = os.environ.get('DEEPKIT_JOB_ACCESSTOKEN', None)
59-
self.job_id = os.environ.get('DEEPKIT_JOB_ID', None)
6070

71+
self.job_token = None
72+
self.job_id = None
73+
74+
if try_pick_up:
75+
# is set by Deepkit cli
76+
self.job_token = os.environ.get('DEEPKIT_JOB_ACCESSTOKEN', None)
77+
self.job_id = os.environ.get('DEEPKIT_JOB_ID', None)
78+
79+
# is set by deepkit.login()
6180
self.token = os.environ.get('DEEPKIT_ACCESSTOKEN', None)
6281

6382
self.result_status = None
6483

6584
self.message_id = 0
66-
self.account = 'localhost'
6785
self.callbacks: Dict[int, asyncio.Future] = {}
6886
self.subscriber: Dict[int, any] = {}
6987
self.stopping = False
@@ -102,24 +120,25 @@ def shutdown(self):
102120
async def stop_and_sync(self):
103121
self.stopping = True
104122

105-
if deepkit.utils.in_self_execution():
123+
if deepkit.utils.in_self_execution() or self.result_status:
106124
# only when we are in self execution do we set status, time stamps etc
107-
# otherwise the CLI is doing that and the server.
125+
# otherwise the CLI is doing that and the server. Or when
126+
# the experiment set result_state explicitly.
108127

109128
# done = 150, //when all tasks are done
110129
# aborted = 200, //when at least one task aborted
111130
# failed = 250, //when at least one task failed
112131
# crashed = 300, //when at least one task crashed
113132
self.patches['status'] = 150
114-
self.patches['ended'] = datetime.now().isoformat()
115-
self.patches['tasks.main.ended'] = datetime.now().isoformat()
133+
self.patches['ended'] = datetime.datetime.utcnow()
134+
self.patches['tasks.main.ended'] = datetime.datetime.utcnow()
116135

117136
# done = 500,
118137
# aborted = 550,
119138
# failed = 600,
120139
# crashed = 650,
121140
self.patches['tasks.main.status'] = 500
122-
self.patches['tasks.main.instances.0.ended'] = datetime.now().isoformat()
141+
self.patches['tasks.main.instances.0.ended'] = datetime.datetime.utcnow()
123142

124143
# done = 500,
125144
# aborted = 550,
@@ -338,12 +357,12 @@ async def send_messages(self, connection):
338357
try:
339358
j = json.dumps(m, default=json_converter)
340359
except TypeError as e:
341-
print('Could not send message since JSON error', e, m)
360+
print('Could not send message since JSON error', e, m, file=sys.stderr)
342361
continue
343362
await connection.send(j)
344363
self.queue.remove(m)
345364
except Exception as e:
346-
print("Failed sending, exit send_messages")
365+
print("Failed sending, exit send_messages", file=sys.stderr)
347366
raise e
348367

349368
if len(self.patches) > 0:
@@ -363,7 +382,7 @@ async def send_messages(self, connection):
363382
send
364383
],
365384
'timeout': 60
366-
}))
385+
}, default=json_converter))
367386

368387
for i in send.keys():
369388
if self.patches[i] == send[i]:
@@ -396,7 +415,7 @@ async def handle_messages(self, connection):
396415
del self.callbacks[res['id']]
397416

398417
if not self.stopping:
399-
print("Deepkit: lost connection. reconnect ...")
418+
self.log("Deepkit: lost connection. reconnect ...")
400419
self.connecting = self.loop.create_future()
401420
self.connected.on_next(False)
402421
self.loop.create_task(self._connect())
@@ -411,7 +430,7 @@ async def _connected(self, id: str, token: str):
411430
self.connection = await websockets.connect(url)
412431
except Exception as e:
413432
# try again later
414-
print('Unable to connect', e)
433+
self.log('Unable to connect', e)
415434
await asyncio.sleep(1)
416435
self.loop.create_task(self._connect())
417436
return
@@ -430,7 +449,7 @@ async def _connected(self, id: str, token: str):
430449
}
431450
}, lock=False)
432451

433-
await self.connection.send(json.dumps(message))
452+
await self.connection.send(json.dumps(message, default=json_converter))
434453

435454
res = await self.callbacks[message['id']]
436455
if not res['result'] or res['result'] is not True:
@@ -440,7 +459,7 @@ async def _connected(self, id: str, token: str):
440459

441460
self.connecting.set_result(True)
442461
if self.connections > 0:
443-
print("Deepkit: Reconnected.")
462+
self.log("Deepkit: Reconnected.")
444463

445464
self.connected.on_next(True)
446465
self.connections += 1
@@ -469,20 +488,21 @@ async def _connect(self):
469488
link: Optional[FolderLink] = None
470489

471490
user_token = self.token
472-
account_name = 'dynamic'
491+
account_name = 'none'
473492

474493
if not user_token:
475494
config = get_home_config()
476495
# when no user_token is given (via deepkit.login() for example)
477496
# we need to find the host, port, token from the user config in ~/.deepkit/config
478-
if self.options.account:
479-
account_config = config.get_account_for_name(self.options.account)
480-
elif not self.options.project:
497+
if not self.account and not self.project:
498+
# find both, start with
481499
link = config.get_folder_link_of_directory(sys.path[0])
482500
account_config = config.get_account_for_id(link.accountId)
501+
elif self.account and not self.project:
502+
account_config = config.get_account_for_name(self.account)
483503
else:
484-
# default to localhost
485-
account_config = config.get_account_for_name('localhost')
504+
# default to first account configured
505+
account_config = config.get_first_account()
486506

487507
account_name = account_config.name
488508
self.host = account_config.host
@@ -496,7 +516,7 @@ async def _connect(self):
496516
self.connection = await websockets.connect(url)
497517
except Exception as e:
498518
self.offline = True
499-
print(f"Deepkit: App not started or server not reachable. Monitoring disabled. {e}")
519+
print(f"Deepkit: App not started or server not reachable. Monitoring disabled. {e}", file=sys.stderr)
500520
self.connecting.set_result(False)
501521
return
502522

@@ -513,24 +533,30 @@ async def _connect(self):
513533
if not res['result']:
514534
raise Exception('Login invalid')
515535

536+
project_name = ''
516537
if link:
538+
project_name = link.name
517539
projectId = link.projectId
518540
else:
519-
if not self.options.project:
520-
raise Exception('No project defined. Please use project="project-name"'
541+
if not self.project:
542+
raise Exception('No project defined. Please use project="project-name" '
521543
'to specify which project to use.')
522544

523-
project = await self._action('app', 'getProjectForPublicName', [self.options.project], lock=False)
545+
project = await self._action('app', 'getProjectForPublicName', [self.project], lock=False)
546+
524547
if not project:
525548
raise Exception(
526-
f'No project found for name {self.options.project}. Make sure it exists before using it. '
549+
f'No project found for name {self.project}. Make sure it exists before using it. '
527550
f'Do you use the correct account? (used {account_name})')
528-
551+
project_name = project['name']
529552
projectId = project['id']
530553

531-
job = await self._action('app', 'createJob', [projectId],
554+
job = await self._action('app', 'createJob', [projectId, self.parent_experiment],
532555
lock=False)
533556

557+
prefix = "Sub experiment" if self.parent_experiment else "Experiment"
558+
self.log(f"{prefix} #{job['number']} created in project {project_name} using account {account_name}")
559+
534560
deepkit.globals.loaded_job_config = job['config']['config']
535561
self.job_token = await self._action('app', 'getJobAccessToken', [job['id']], lock=False)
536562
self.job_id = job['id']
@@ -542,3 +568,6 @@ async def _connect(self):
542568
self.connecting.set_exception(e)
543569

544570
self.queue = queue_copy + self.queue
571+
572+
def log(self, *message: str):
573+
if not self.silent: print(*message)

0 commit comments

Comments
 (0)