raw_hash_set_benchmark.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. // Copyright 2018 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/container/internal/raw_hash_set.h"
  15. #include <numeric>
  16. #include <random>
  17. #include "absl/base/internal/raw_logging.h"
  18. #include "absl/container/internal/hash_function_defaults.h"
  19. #include "absl/strings/str_format.h"
  20. #include "benchmark/benchmark.h"
  21. namespace absl {
  22. ABSL_NAMESPACE_BEGIN
  23. namespace container_internal {
  24. struct RawHashSetTestOnlyAccess {
  25. template <typename C>
  26. static auto GetSlots(const C& c) -> decltype(c.slots_) {
  27. return c.slots_;
  28. }
  29. };
  30. namespace {
  31. struct IntPolicy {
  32. using slot_type = int64_t;
  33. using key_type = int64_t;
  34. using init_type = int64_t;
  35. static void construct(void*, int64_t* slot, int64_t v) { *slot = v; }
  36. static void destroy(void*, int64_t*) {}
  37. static void transfer(void*, int64_t* new_slot, int64_t* old_slot) {
  38. *new_slot = *old_slot;
  39. }
  40. static int64_t& element(slot_type* slot) { return *slot; }
  41. template <class F>
  42. static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) {
  43. return std::forward<F>(f)(x, x);
  44. }
  45. };
  46. class StringPolicy {
  47. template <class F, class K, class V,
  48. class = typename std::enable_if<
  49. std::is_convertible<const K&, absl::string_view>::value>::type>
  50. decltype(std::declval<F>()(
  51. std::declval<const absl::string_view&>(), std::piecewise_construct,
  52. std::declval<std::tuple<K>>(),
  53. std::declval<V>())) static apply_impl(F&& f,
  54. std::pair<std::tuple<K>, V> p) {
  55. const absl::string_view& key = std::get<0>(p.first);
  56. return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first),
  57. std::move(p.second));
  58. }
  59. public:
  60. struct slot_type {
  61. struct ctor {};
  62. template <class... Ts>
  63. slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {}
  64. std::pair<std::string, std::string> pair;
  65. };
  66. using key_type = std::string;
  67. using init_type = std::pair<std::string, std::string>;
  68. template <class allocator_type, class... Args>
  69. static void construct(allocator_type* alloc, slot_type* slot, Args... args) {
  70. std::allocator_traits<allocator_type>::construct(
  71. *alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...);
  72. }
  73. template <class allocator_type>
  74. static void destroy(allocator_type* alloc, slot_type* slot) {
  75. std::allocator_traits<allocator_type>::destroy(*alloc, slot);
  76. }
  77. template <class allocator_type>
  78. static void transfer(allocator_type* alloc, slot_type* new_slot,
  79. slot_type* old_slot) {
  80. construct(alloc, new_slot, std::move(old_slot->pair));
  81. destroy(alloc, old_slot);
  82. }
  83. static std::pair<std::string, std::string>& element(slot_type* slot) {
  84. return slot->pair;
  85. }
  86. template <class F, class... Args>
  87. static auto apply(F&& f, Args&&... args)
  88. -> decltype(apply_impl(std::forward<F>(f),
  89. PairArgs(std::forward<Args>(args)...))) {
  90. return apply_impl(std::forward<F>(f),
  91. PairArgs(std::forward<Args>(args)...));
  92. }
  93. };
  94. struct StringHash : container_internal::hash_default_hash<absl::string_view> {
  95. using is_transparent = void;
  96. };
  97. struct StringEq : std::equal_to<absl::string_view> {
  98. using is_transparent = void;
  99. };
  100. struct StringTable
  101. : raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> {
  102. using Base = typename StringTable::raw_hash_set;
  103. StringTable() {}
  104. using Base::Base;
  105. };
  106. struct IntTable
  107. : raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>,
  108. std::equal_to<int64_t>, std::allocator<int64_t>> {
  109. using Base = typename IntTable::raw_hash_set;
  110. IntTable() {}
  111. using Base::Base;
  112. };
  113. struct string_generator {
  114. template <class RNG>
  115. std::string operator()(RNG& rng) const {
  116. std::string res;
  117. res.resize(12);
  118. std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E);
  119. std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); });
  120. return res;
  121. }
  122. size_t size;
  123. };
  124. // Model a cache in steady state.
  125. //
  126. // On a table of size N, keep deleting the LRU entry and add a random one.
  127. void BM_CacheInSteadyState(benchmark::State& state) {
  128. std::random_device rd;
  129. std::mt19937 rng(rd());
  130. string_generator gen{12};
  131. StringTable t;
  132. std::deque<std::string> keys;
  133. while (t.size() < state.range(0)) {
  134. auto x = t.emplace(gen(rng), gen(rng));
  135. if (x.second) keys.push_back(x.first->first);
  136. }
  137. ABSL_RAW_CHECK(state.range(0) >= 10, "");
  138. while (state.KeepRunning()) {
  139. // Some cache hits.
  140. std::deque<std::string>::const_iterator it;
  141. for (int i = 0; i != 90; ++i) {
  142. if (i % 10 == 0) it = keys.end();
  143. ::benchmark::DoNotOptimize(t.find(*--it));
  144. }
  145. // Some cache misses.
  146. for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng)));
  147. ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str());
  148. keys.pop_front();
  149. while (true) {
  150. auto x = t.emplace(gen(rng), gen(rng));
  151. if (x.second) {
  152. keys.push_back(x.first->first);
  153. break;
  154. }
  155. }
  156. }
  157. state.SetItemsProcessed(state.iterations());
  158. state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor()));
  159. }
  160. template <typename Benchmark>
  161. void CacheInSteadyStateArgs(Benchmark* bm) {
  162. // The default.
  163. const float max_load_factor = 0.875;
  164. // When the cache is at the steady state, the probe sequence will equal
  165. // capacity if there is no reclamation of deleted slots. Pick a number large
  166. // enough to make the benchmark slow for that case.
  167. const size_t capacity = 1 << 10;
  168. // Check N data points to cover load factors in [0.4, 0.8).
  169. const size_t kNumPoints = 10;
  170. for (size_t i = 0; i != kNumPoints; ++i)
  171. bm->Arg(std::ceil(
  172. capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2));
  173. }
  174. BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs);
  175. void BM_EndComparison(benchmark::State& state) {
  176. std::random_device rd;
  177. std::mt19937 rng(rd());
  178. string_generator gen{12};
  179. StringTable t;
  180. while (t.size() < state.range(0)) {
  181. t.emplace(gen(rng), gen(rng));
  182. }
  183. for (auto _ : state) {
  184. for (auto it = t.begin(); it != t.end(); ++it) {
  185. benchmark::DoNotOptimize(it);
  186. benchmark::DoNotOptimize(t);
  187. benchmark::DoNotOptimize(it != t.end());
  188. }
  189. }
  190. }
  191. BENCHMARK(BM_EndComparison)->Arg(400);
  192. void BM_CopyCtor(benchmark::State& state) {
  193. std::random_device rd;
  194. std::mt19937 rng(rd());
  195. IntTable t;
  196. std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
  197. while (t.size() < state.range(0)) {
  198. t.emplace(dist(rng));
  199. }
  200. for (auto _ : state) {
  201. IntTable t2 = t;
  202. benchmark::DoNotOptimize(t2);
  203. }
  204. }
  205. BENCHMARK(BM_CopyCtor)->Range(128, 4096);
  206. void BM_CopyAssign(benchmark::State& state) {
  207. std::random_device rd;
  208. std::mt19937 rng(rd());
  209. IntTable t;
  210. std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
  211. while (t.size() < state.range(0)) {
  212. t.emplace(dist(rng));
  213. }
  214. IntTable t2;
  215. for (auto _ : state) {
  216. t2 = t;
  217. benchmark::DoNotOptimize(t2);
  218. }
  219. }
  220. BENCHMARK(BM_CopyAssign)->Range(128, 4096);
  221. void BM_RangeCtor(benchmark::State& state) {
  222. std::random_device rd;
  223. std::mt19937 rng(rd());
  224. std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
  225. std::vector<int> values;
  226. const size_t desired_size = state.range(0);
  227. while (values.size() < desired_size) {
  228. values.emplace_back(dist(rng));
  229. }
  230. for (auto unused : state) {
  231. IntTable t{values.begin(), values.end()};
  232. benchmark::DoNotOptimize(t);
  233. }
  234. }
  235. BENCHMARK(BM_RangeCtor)->Range(128, 65536);
  236. void BM_NoOpReserveIntTable(benchmark::State& state) {
  237. IntTable t;
  238. t.reserve(100000);
  239. for (auto _ : state) {
  240. benchmark::DoNotOptimize(t);
  241. t.reserve(100000);
  242. }
  243. }
  244. BENCHMARK(BM_NoOpReserveIntTable);
  245. void BM_NoOpReserveStringTable(benchmark::State& state) {
  246. StringTable t;
  247. t.reserve(100000);
  248. for (auto _ : state) {
  249. benchmark::DoNotOptimize(t);
  250. t.reserve(100000);
  251. }
  252. }
  253. BENCHMARK(BM_NoOpReserveStringTable);
  254. void BM_ReserveIntTable(benchmark::State& state) {
  255. int reserve_size = state.range(0);
  256. for (auto _ : state) {
  257. state.PauseTiming();
  258. IntTable t;
  259. state.ResumeTiming();
  260. benchmark::DoNotOptimize(t);
  261. t.reserve(reserve_size);
  262. }
  263. }
  264. BENCHMARK(BM_ReserveIntTable)->Range(128, 4096);
  265. void BM_ReserveStringTable(benchmark::State& state) {
  266. int reserve_size = state.range(0);
  267. for (auto _ : state) {
  268. state.PauseTiming();
  269. StringTable t;
  270. state.ResumeTiming();
  271. benchmark::DoNotOptimize(t);
  272. t.reserve(reserve_size);
  273. }
  274. }
  275. BENCHMARK(BM_ReserveStringTable)->Range(128, 4096);
  276. // Like std::iota, except that ctrl_t doesn't support operator++.
  277. template <typename CtrlIter>
  278. void Iota(CtrlIter begin, CtrlIter end, int value) {
  279. for (; begin != end; ++begin, ++value) {
  280. *begin = static_cast<ctrl_t>(value);
  281. }
  282. }
  283. void BM_Group_Match(benchmark::State& state) {
  284. std::array<ctrl_t, Group::kWidth> group;
  285. Iota(group.begin(), group.end(), -4);
  286. Group g{group.data()};
  287. h2_t h = 1;
  288. for (auto _ : state) {
  289. ::benchmark::DoNotOptimize(h);
  290. ::benchmark::DoNotOptimize(g.Match(h));
  291. }
  292. }
  293. BENCHMARK(BM_Group_Match);
  294. void BM_Group_MatchEmpty(benchmark::State& state) {
  295. std::array<ctrl_t, Group::kWidth> group;
  296. Iota(group.begin(), group.end(), -4);
  297. Group g{group.data()};
  298. for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmpty());
  299. }
  300. BENCHMARK(BM_Group_MatchEmpty);
  301. void BM_Group_MatchEmptyOrDeleted(benchmark::State& state) {
  302. std::array<ctrl_t, Group::kWidth> group;
  303. Iota(group.begin(), group.end(), -4);
  304. Group g{group.data()};
  305. for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmptyOrDeleted());
  306. }
  307. BENCHMARK(BM_Group_MatchEmptyOrDeleted);
  308. void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) {
  309. std::array<ctrl_t, Group::kWidth> group;
  310. Iota(group.begin(), group.end(), -2);
  311. Group g{group.data()};
  312. for (auto _ : state)
  313. ::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted());
  314. }
  315. BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted);
  316. void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) {
  317. std::array<ctrl_t, Group::kWidth> group;
  318. Iota(group.begin(), group.end(), -2);
  319. Group g{group.data()};
  320. for (auto _ : state) ::benchmark::DoNotOptimize(*g.MatchEmptyOrDeleted());
  321. }
  322. BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted);
  323. void BM_DropDeletes(benchmark::State& state) {
  324. constexpr size_t capacity = (1 << 20) - 1;
  325. std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth);
  326. ctrl[capacity] = ctrl_t::kSentinel;
  327. std::vector<ctrl_t> pattern = {ctrl_t::kEmpty, static_cast<ctrl_t>(2),
  328. ctrl_t::kDeleted, static_cast<ctrl_t>(2),
  329. ctrl_t::kEmpty, static_cast<ctrl_t>(1),
  330. ctrl_t::kDeleted};
  331. for (size_t i = 0; i != capacity; ++i) {
  332. ctrl[i] = pattern[i % pattern.size()];
  333. }
  334. while (state.KeepRunning()) {
  335. state.PauseTiming();
  336. std::vector<ctrl_t> ctrl_copy = ctrl;
  337. state.ResumeTiming();
  338. ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity);
  339. ::benchmark::DoNotOptimize(ctrl_copy[capacity]);
  340. }
  341. }
  342. BENCHMARK(BM_DropDeletes);
  343. } // namespace
  344. } // namespace container_internal
  345. ABSL_NAMESPACE_END
  346. } // namespace absl
  347. // These methods are here to make it easy to examine the assembly for targeted
  348. // parts of the API.
  349. auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table,
  350. int64_t key) -> decltype(table->find(key)) {
  351. return table->find(key);
  352. }
  353. bool CodegenAbslRawHashSetInt64FindNeEnd(
  354. absl::container_internal::IntTable* table, int64_t key) {
  355. return table->find(key) != table->end();
  356. }
  357. auto CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable* table,
  358. int64_t key)
  359. -> decltype(table->insert(key)) {
  360. return table->insert(key);
  361. }
  362. bool CodegenAbslRawHashSetInt64Contains(
  363. absl::container_internal::IntTable* table, int64_t key) {
  364. return table->contains(key);
  365. }
  366. void CodegenAbslRawHashSetInt64Iterate(
  367. absl::container_internal::IntTable* table) {
  368. for (auto x : *table) benchmark::DoNotOptimize(x);
  369. }
  370. int odr =
  371. (::benchmark::DoNotOptimize(std::make_tuple(
  372. &CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd,
  373. &CodegenAbslRawHashSetInt64Insert,
  374. &CodegenAbslRawHashSetInt64Contains,
  375. &CodegenAbslRawHashSetInt64Iterate)),
  376. 1);