search.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # Copyright 2019 the gRPC authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """A search algorithm over the space of all bytestrings."""
  15. from __future__ import absolute_import
  16. from __future__ import division
  17. from __future__ import print_function
  18. import base64
  19. import hashlib
  20. import itertools
  21. import logging
  22. import struct
  23. from examples.python.cancellation import hash_name_pb2
  24. _LOGGER = logging.getLogger(__name__)
  25. _BYTE_MAX = 255
  26. def _get_hamming_distance(a, b):
  27. """Calculates hamming distance between strings of equal length."""
  28. distance = 0
  29. for char_a, char_b in zip(a, b):
  30. if char_a != char_b:
  31. distance += 1
  32. return distance
  33. def _get_substring_hamming_distance(candidate, target):
  34. """Calculates the minimum hamming distance between between the target
  35. and any substring of the candidate.
  36. Args:
  37. candidate: The string whose substrings will be tested.
  38. target: The target string.
  39. Returns:
  40. The minimum Hamming distance between candidate and target.
  41. """
  42. min_distance = None
  43. if len(target) > len(candidate):
  44. raise ValueError("Candidate must be at least as long as target.")
  45. for i in range(len(candidate) - len(target) + 1):
  46. distance = _get_hamming_distance(candidate[i:i + len(target)].lower(),
  47. target.lower())
  48. if min_distance is None or distance < min_distance:
  49. min_distance = distance
  50. return min_distance
  51. def _get_hash(secret):
  52. hasher = hashlib.sha1()
  53. hasher.update(secret)
  54. return base64.b64encode(hasher.digest()).decode('ascii')
  55. class ResourceLimitExceededError(Exception):
  56. """Signifies the request has exceeded configured limits."""
  57. def _bytestrings_of_length(length):
  58. """Generates a stream containing all bytestrings of a given length.
  59. Args:
  60. length: A positive integer length.
  61. Yields:
  62. All bytestrings of length `length`.
  63. """
  64. for digits in itertools.product(range(_BYTE_MAX), repeat=length):
  65. yield b''.join(struct.pack('B', i) for i in digits)
  66. def _all_bytestrings():
  67. """Generates a stream containing all possible bytestrings.
  68. This generator does not terminate.
  69. Yields:
  70. All bytestrings in ascending order of length.
  71. """
  72. for bytestring in itertools.chain.from_iterable(
  73. _bytestrings_of_length(length) for length in itertools.count()):
  74. yield bytestring
  75. def search(target,
  76. ideal_distance,
  77. stop_event,
  78. maximum_hashes,
  79. interesting_hamming_distance=None):
  80. """Find candidate strings.
  81. Search through the space of all bytestrings, in order of increasing length,
  82. indefinitely, until a hash with a Hamming distance of `maximum_distance` or
  83. less has been found.
  84. Args:
  85. target: The search string.
  86. ideal_distance: The desired Hamming distance.
  87. stop_event: An event indicating whether the RPC should terminate.
  88. maximum_hashes: The maximum number of hashes to check before stopping.
  89. interesting_hamming_distance: If specified, strings with a Hamming
  90. distance from the target below this value will be yielded.
  91. Yields:
  92. Instances of HashNameResponse. The final entry in the stream will be of
  93. `maximum_distance` Hamming distance or less from the target string,
  94. while all others will be of less than `interesting_hamming_distance`.
  95. Raises:
  96. ResourceLimitExceededError: If the computation exceeds `maximum_hashes`
  97. iterations.
  98. """
  99. hashes_computed = 0
  100. for secret in _all_bytestrings():
  101. if stop_event.is_set():
  102. return
  103. candidate_hash = _get_hash(secret)
  104. distance = _get_substring_hamming_distance(candidate_hash, target)
  105. if interesting_hamming_distance is not None and distance <= interesting_hamming_distance:
  106. # Surface interesting candidates, but don't stop.
  107. yield hash_name_pb2.HashNameResponse(
  108. secret=base64.b64encode(secret),
  109. hashed_name=candidate_hash,
  110. hamming_distance=distance)
  111. elif distance <= ideal_distance:
  112. # Yield ideal candidate and end the stream.
  113. yield hash_name_pb2.HashNameResponse(
  114. secret=base64.b64encode(secret),
  115. hashed_name=candidate_hash,
  116. hamming_distance=distance)
  117. return
  118. hashes_computed += 1
  119. if hashes_computed == maximum_hashes:
  120. raise ResourceLimitExceededError()