Skip to content

Commit

Permalink
add more logs to debug
Browse files Browse the repository at this point in the history
  • Loading branch information
asauray committed Oct 16, 2019
1 parent aa3aedf commit 224ad71
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 46 deletions.
84 changes: 43 additions & 41 deletions clipper_admin/clipper_admin/nomad/mgmt_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,57 +6,59 @@ def mgmt_job_prefix(cluster_name):

""" Nomad payload to deploy a new mgmt """
def mgmt_deployment(job_id, datacenters, cluster_name, image, redis_ip, redis_port, num_replicas):
job = { 'Job': {
'ID': job_id,
'Datacenters': datacenters,
'Type': 'service',
'TaskGroups': [
job = {
'Job':
{
'Name': nomad_job_prefix(cluster_name),
'Count': num_replicas,
'Tasks': [
'ID': job_id,
'Datacenters': datacenters,
'Type': 'service',
'TaskGroups': [
{
'Name': mgmt_job_prefix(cluster_name),
'Driver': 'docker',
'Config': {
'args': [
"--redis_ip={}".format(redis_ip or os.environ('REDIS_SERVICE_IP')), # If redis_service_host == None, default to env var
"--redis_port={}".format(redis_port or os.environ('REDIS_SERVICE_PORT') or True)
],
'image': image,
'port_map': [
{'http': 1338}
]
},
'Resources': {
'CPU': 500,
'MemoryMB': 256,
'Networks': [
{
'DynamicPorts': [{'Label': 'http', 'Value': 1338}]
}
]
},
'Services': [
'Name': nomad_job_prefix(cluster_name),
'Count': num_replicas,
'Tasks': [
{
'Name': '{}-mgmt'.format(nomad_job_prefix(cluster_name)),
'Tags': ['machine-learning', 'model', 'clipper', 'mgmt'],
'PortLabel': 'http',
'Checks': [
'Name': mgmt_job_prefix(cluster_name),
'Driver': 'docker',
'Config': {
'args': [
"--redis_ip={}".format(redis_ip or os.environ('REDIS_SERVICE_IP')), # If redis_service_host == None, default to env var
"--redis_port={}".format(redis_port or os.environ('REDIS_SERVICE_PORT') or True)
],
'image': image,
'port_map': [
{'http': 1338}
]
},
'Resources': {
'CPU': 500,
'MemoryMB': 256,
'Networks': [
{
'DynamicPorts': [{'Label': 'http', 'Value': 1338}]
}
]
},
'Services': [
{
'Name': 'alive',
'Type': 'tcp',
'interval': 1000000000000,
'timeout': 20000000000
'Name': '{}-mgmt'.format(nomad_job_prefix(cluster_name)),
'Tags': ['machine-learning', 'model', 'clipper', 'mgmt'],
'PortLabel': 'http',
'Checks': [
{
'Name': 'alive',
'Type': 'tcp',
'interval': 3000000000,
'timeout': 2000000000
}
]
}
]
}
]
}
]
}
]

}
}
}
return job
4 changes: 2 additions & 2 deletions clipper_admin/clipper_admin/nomad/model_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def model_deployment(job_id, datacenters, cluster_name, name, version, input_typ
{
'Name': 'alive',
'Type': 'tcp',
'interval': 1000000000000,
'timeout': 20000000000
'interval': 3000000000,
'timeout': 2000000000
}
]
}
Expand Down
7 changes: 4 additions & 3 deletions clipper_admin/clipper_admin/nomad/nomad_container_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def connect(self):

def deploy_model(self, name, version, input_type, image, num_replicas=1):
check_name = model_check_name(self.cluster_name, name, version)
job_id = 'clipper-model-{}-{}'.format(name, version)
job_id = '{}-{}-{}'.format(model_job_prefix(self.cluster_name), name, version)
self.nomad.job.register_job(
job_id,
model_deployment(job_id, self.datacenters, self.cluster_name, name, version, input_type, image, num_replicas)
Expand Down Expand Up @@ -287,7 +287,9 @@ def stop_models(self, models):
raise e

def stop_all_model_containers(self):
print('model job prefix {}', model_job_prefix(self.cluster_name))
jobs = self.nomad.jobs.get_jobs(prefix=model_job_prefix(self.cluster_name))
print('jobs: {}', jobs)
for job in jobs:
self.logger.warning('nomad job below')
self.logger.warning(job)
Expand Down Expand Up @@ -315,8 +317,7 @@ def get_query_addr(self):
query_ip, query_port= self.dns.resolveSRV(check_name)
self.query_ip = query_ip
self.query_port = query_port
#return '{}:{}'.format(query_ip, query_port)
return '10.65.30.43:29293'
return '{}:{}'.format(query_ip, query_port)
except NXDOMAIN:
return ''

Expand Down

0 comments on commit 224ad71

Please sign in to comment.