memutil_benchmark.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. // Copyright 2018 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/strings/internal/memutil.h"
  15. #include <algorithm>
  16. #include <cstdlib>
  17. #include "benchmark/benchmark.h"
  18. #include "absl/strings/ascii.h"
  19. // We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
  20. // That gives us:
  21. // - an easy search: 'b'
  22. // - a medium search: 'ab'. That means every letter is a possible match.
  23. // - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
  24. // We benchmark case-sensitive and case-insensitive versions of
  25. // three memmem implementations:
  26. // - memmem() from memutil.h
  27. // - search() from STL
  28. // - memmatch(), a custom implementation using memchr and memcmp.
  29. // Here are sample results:
  30. //
  31. // Run on (12 X 3800 MHz CPU s)
  32. // CPU Caches:
  33. // L1 Data 32K (x6)
  34. // L1 Instruction 32K (x6)
  35. // L2 Unified 256K (x6)
  36. // L3 Unified 15360K (x1)
  37. // ----------------------------------------------------------------
  38. // Benchmark Time CPU Iterations
  39. // ----------------------------------------------------------------
  40. // BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s
  41. // BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s
  42. // BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s
  43. // BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s
  44. // BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s
  45. // BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s
  46. // BM_Search 1610 ns 1609 ns 431321 5.78672GB/s
  47. // BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s
  48. // BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s
  49. // BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s
  50. // BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s
  51. // BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s
  52. // BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s
  53. // BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s
  54. // BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s
  55. // BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s
  56. // BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s
  57. // BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s
  58. // BM_MemmemStartup 5 ns 5 ns 150821500
  59. // BM_SearchStartup 5 ns 5 ns 150644203
  60. // BM_MemmatchStartup 7 ns 7 ns 97068802
  61. //
  62. // Conclusions:
  63. //
  64. // The following recommendations are based on the sample results above. However,
  65. // we have found that the performance of STL search can vary significantly
  66. // depending on compiler and standard library implementation. We recommend you
  67. // run the benchmarks for yourself on relevant platforms.
  68. //
  69. // If you need case-insensitive, STL search is slightly better than memmem for
  70. // all cases.
  71. //
  72. // Case-sensitive is more subtle:
  73. // Custom memmatch is _very_ fast at scanning, so if you have very few possible
  74. // matches in your haystack, that's the way to go. Performance drops
  75. // significantly with more matches.
  76. //
  77. // STL search is slightly faster than memmem in the medium and pathological
  78. // benchmarks. However, the performance of memmem is currently more dependable
  79. // across platforms and build configurations.
  80. namespace {
  81. constexpr int kHaystackSize = 10000;
  82. constexpr int64_t kHaystackSize64 = kHaystackSize;
  83. const char* MakeHaystack() {
  84. char* haystack = new char[kHaystackSize];
  85. for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
  86. haystack[kHaystackSize - 1] = 'b';
  87. return haystack;
  88. }
  89. const char* const kHaystack = MakeHaystack();
  90. void BM_Memmem(benchmark::State& state) {
  91. for (auto _ : state) {
  92. benchmark::DoNotOptimize(
  93. absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
  94. }
  95. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  96. }
  97. BENCHMARK(BM_Memmem);
  98. void BM_MemmemMedium(benchmark::State& state) {
  99. for (auto _ : state) {
  100. benchmark::DoNotOptimize(
  101. absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
  102. }
  103. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  104. }
  105. BENCHMARK(BM_MemmemMedium);
  106. void BM_MemmemPathological(benchmark::State& state) {
  107. for (auto _ : state) {
  108. benchmark::DoNotOptimize(absl::strings_internal::memmem(
  109. kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
  110. kHaystackSize - kHaystackSize / 2));
  111. }
  112. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  113. }
  114. BENCHMARK(BM_MemmemPathological);
  115. void BM_Memcasemem(benchmark::State& state) {
  116. for (auto _ : state) {
  117. benchmark::DoNotOptimize(
  118. absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
  119. }
  120. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  121. }
  122. BENCHMARK(BM_Memcasemem);
  123. void BM_MemcasememMedium(benchmark::State& state) {
  124. for (auto _ : state) {
  125. benchmark::DoNotOptimize(
  126. absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
  127. }
  128. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  129. }
  130. BENCHMARK(BM_MemcasememMedium);
  131. void BM_MemcasememPathological(benchmark::State& state) {
  132. for (auto _ : state) {
  133. benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
  134. kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
  135. kHaystackSize - kHaystackSize / 2));
  136. }
  137. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  138. }
  139. BENCHMARK(BM_MemcasememPathological);
  140. bool case_eq(const char a, const char b) {
  141. return absl::ascii_tolower(a) == absl::ascii_tolower(b);
  142. }
  143. void BM_Search(benchmark::State& state) {
  144. for (auto _ : state) {
  145. benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
  146. kHaystack + kHaystackSize - 1,
  147. kHaystack + kHaystackSize));
  148. }
  149. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  150. }
  151. BENCHMARK(BM_Search);
  152. void BM_SearchMedium(benchmark::State& state) {
  153. for (auto _ : state) {
  154. benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
  155. kHaystack + kHaystackSize - 2,
  156. kHaystack + kHaystackSize));
  157. }
  158. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  159. }
  160. BENCHMARK(BM_SearchMedium);
  161. void BM_SearchPathological(benchmark::State& state) {
  162. for (auto _ : state) {
  163. benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
  164. kHaystack + kHaystackSize / 2,
  165. kHaystack + kHaystackSize));
  166. }
  167. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  168. }
  169. BENCHMARK(BM_SearchPathological);
  170. void BM_Searchcase(benchmark::State& state) {
  171. for (auto _ : state) {
  172. benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
  173. kHaystack + kHaystackSize - 1,
  174. kHaystack + kHaystackSize, case_eq));
  175. }
  176. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  177. }
  178. BENCHMARK(BM_Searchcase);
  179. void BM_SearchcaseMedium(benchmark::State& state) {
  180. for (auto _ : state) {
  181. benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
  182. kHaystack + kHaystackSize - 2,
  183. kHaystack + kHaystackSize, case_eq));
  184. }
  185. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  186. }
  187. BENCHMARK(BM_SearchcaseMedium);
  188. void BM_SearchcasePathological(benchmark::State& state) {
  189. for (auto _ : state) {
  190. benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
  191. kHaystack + kHaystackSize / 2,
  192. kHaystack + kHaystackSize, case_eq));
  193. }
  194. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  195. }
  196. BENCHMARK(BM_SearchcasePathological);
  197. char* memcasechr(const char* s, int c, size_t slen) {
  198. c = absl::ascii_tolower(c);
  199. for (; slen; ++s, --slen) {
  200. if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
  201. }
  202. return nullptr;
  203. }
  204. const char* memcasematch(const char* phaystack, size_t haylen,
  205. const char* pneedle, size_t neelen) {
  206. if (0 == neelen) {
  207. return phaystack; // even if haylen is 0
  208. }
  209. if (haylen < neelen) return nullptr;
  210. const char* match;
  211. const char* hayend = phaystack + haylen - neelen + 1;
  212. while ((match = static_cast<char*>(
  213. memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
  214. if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
  215. return match;
  216. else
  217. phaystack = match + 1;
  218. }
  219. return nullptr;
  220. }
  221. void BM_Memmatch(benchmark::State& state) {
  222. for (auto _ : state) {
  223. benchmark::DoNotOptimize(
  224. absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
  225. }
  226. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  227. }
  228. BENCHMARK(BM_Memmatch);
  229. void BM_MemmatchMedium(benchmark::State& state) {
  230. for (auto _ : state) {
  231. benchmark::DoNotOptimize(
  232. absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
  233. }
  234. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  235. }
  236. BENCHMARK(BM_MemmatchMedium);
  237. void BM_MemmatchPathological(benchmark::State& state) {
  238. for (auto _ : state) {
  239. benchmark::DoNotOptimize(absl::strings_internal::memmatch(
  240. kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
  241. kHaystackSize - kHaystackSize / 2));
  242. }
  243. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  244. }
  245. BENCHMARK(BM_MemmatchPathological);
  246. void BM_Memcasematch(benchmark::State& state) {
  247. for (auto _ : state) {
  248. benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
  249. }
  250. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  251. }
  252. BENCHMARK(BM_Memcasematch);
  253. void BM_MemcasematchMedium(benchmark::State& state) {
  254. for (auto _ : state) {
  255. benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
  256. }
  257. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  258. }
  259. BENCHMARK(BM_MemcasematchMedium);
  260. void BM_MemcasematchPathological(benchmark::State& state) {
  261. for (auto _ : state) {
  262. benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
  263. kHaystack + kHaystackSize / 2,
  264. kHaystackSize - kHaystackSize / 2));
  265. }
  266. state.SetBytesProcessed(kHaystackSize64 * state.iterations());
  267. }
  268. BENCHMARK(BM_MemcasematchPathological);
  269. void BM_MemmemStartup(benchmark::State& state) {
  270. for (auto _ : state) {
  271. benchmark::DoNotOptimize(absl::strings_internal::memmem(
  272. kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
  273. }
  274. }
  275. BENCHMARK(BM_MemmemStartup);
  276. void BM_SearchStartup(benchmark::State& state) {
  277. for (auto _ : state) {
  278. benchmark::DoNotOptimize(
  279. std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
  280. kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
  281. }
  282. }
  283. BENCHMARK(BM_SearchStartup);
  284. void BM_MemmatchStartup(benchmark::State& state) {
  285. for (auto _ : state) {
  286. benchmark::DoNotOptimize(absl::strings_internal::memmatch(
  287. kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
  288. }
  289. }
  290. BENCHMARK(BM_MemmatchStartup);
  291. } // namespace