dockerjob.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. # Copyright 2015 gRPC authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Helpers to run docker instances as jobs."""
  15. from __future__ import print_function
  16. import json
  17. import os
  18. import subprocess
  19. import sys
  20. import tempfile
  21. import time
  22. import uuid
  23. sys.path.append(os.path.dirname(os.path.abspath(__file__)))
  24. import jobset
  25. _DEVNULL = open(os.devnull, 'w')
  26. def random_name(base_name):
  27. """Randomizes given base name."""
  28. return '%s_%s' % (base_name, uuid.uuid4())
  29. def docker_kill(cid):
  30. """Kills a docker container. Returns True if successful."""
  31. return subprocess.call(['docker', 'kill', str(cid)],
  32. stdin=subprocess.PIPE,
  33. stdout=_DEVNULL,
  34. stderr=subprocess.STDOUT) == 0
  35. def docker_mapped_port(cid, port, timeout_seconds=15):
  36. """Get port mapped to internal given internal port for given container."""
  37. started = time.time()
  38. while time.time() - started < timeout_seconds:
  39. try:
  40. output = subprocess.check_output('docker port %s %s' % (cid, port),
  41. stderr=_DEVNULL,
  42. shell=True).decode()
  43. return int(output.split(':', 2)[1])
  44. except subprocess.CalledProcessError as e:
  45. pass
  46. raise Exception('Failed to get exposed port %s for container %s.' %
  47. (port, cid))
  48. def docker_ip_address(cid, timeout_seconds=15):
  49. """Get port mapped to internal given internal port for given container."""
  50. started = time.time()
  51. while time.time() - started < timeout_seconds:
  52. cmd = 'docker inspect %s' % cid
  53. try:
  54. output = subprocess.check_output(cmd, stderr=_DEVNULL,
  55. shell=True).decode()
  56. json_info = json.loads(output)
  57. assert len(json_info) == 1
  58. out = json_info[0]['NetworkSettings']['IPAddress']
  59. if not out:
  60. continue
  61. return out
  62. except subprocess.CalledProcessError as e:
  63. pass
  64. raise Exception(
  65. 'Non-retryable error: Failed to get ip address of container %s.' % cid)
  66. def wait_for_healthy(cid, shortname, timeout_seconds):
  67. """Wait timeout_seconds for the container to become healthy"""
  68. started = time.time()
  69. while time.time() - started < timeout_seconds:
  70. try:
  71. output = subprocess.check_output([
  72. 'docker', 'inspect', '--format="{{.State.Health.Status}}"', cid
  73. ],
  74. stderr=_DEVNULL).decode()
  75. if output.strip('\n') == 'healthy':
  76. return
  77. except subprocess.CalledProcessError as e:
  78. pass
  79. time.sleep(1)
  80. raise Exception('Timed out waiting for %s (%s) to pass health check' %
  81. (shortname, cid))
  82. def finish_jobs(jobs, suppress_failure=True):
  83. """Kills given docker containers and waits for corresponding jobs to finish"""
  84. for job in jobs:
  85. job.kill(suppress_failure=suppress_failure)
  86. while any(job.is_running() for job in jobs):
  87. time.sleep(1)
  88. def image_exists(image):
  89. """Returns True if given docker image exists."""
  90. return subprocess.call(['docker', 'inspect', image],
  91. stdin=subprocess.PIPE,
  92. stdout=_DEVNULL,
  93. stderr=subprocess.STDOUT) == 0
  94. def remove_image(image, skip_nonexistent=False, max_retries=10):
  95. """Attempts to remove docker image with retries."""
  96. if skip_nonexistent and not image_exists(image):
  97. return True
  98. for attempt in range(0, max_retries):
  99. if subprocess.call(['docker', 'rmi', '-f', image],
  100. stdin=subprocess.PIPE,
  101. stdout=_DEVNULL,
  102. stderr=subprocess.STDOUT) == 0:
  103. return True
  104. time.sleep(2)
  105. print('Failed to remove docker image %s' % image)
  106. return False
  107. class DockerJob:
  108. """Encapsulates a job"""
  109. def __init__(self, spec):
  110. self._spec = spec
  111. self._job = jobset.Job(spec,
  112. newline_on_success=True,
  113. travis=True,
  114. add_env={})
  115. self._container_name = spec.container_name
  116. def mapped_port(self, port):
  117. return docker_mapped_port(self._container_name, port)
  118. def ip_address(self):
  119. return docker_ip_address(self._container_name)
  120. def wait_for_healthy(self, timeout_seconds):
  121. wait_for_healthy(self._container_name, self._spec.shortname,
  122. timeout_seconds)
  123. def kill(self, suppress_failure=False):
  124. """Sends kill signal to the container."""
  125. if suppress_failure:
  126. self._job.suppress_failure_message()
  127. return docker_kill(self._container_name)
  128. def is_running(self):
  129. """Polls a job and returns True if given job is still running."""
  130. return self._job.state() == jobset._RUNNING