escaping.cc 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. // Copyright 2020 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/strings/internal/escaping.h"
  15. #include "absl/base/internal/endian.h"
  16. #include "absl/base/internal/raw_logging.h"
  17. namespace absl {
  18. ABSL_NAMESPACE_BEGIN
  19. namespace strings_internal {
  20. const char kBase64Chars[] =
  21. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  22. size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
  23. // Base64 encodes three bytes of input at a time. If the input is not
  24. // divisible by three, we pad as appropriate.
  25. //
  26. // (from https://tools.ietf.org/html/rfc3548)
  27. // Special processing is performed if fewer than 24 bits are available
  28. // at the end of the data being encoded. A full encoding quantum is
  29. // always completed at the end of a quantity. When fewer than 24 input
  30. // bits are available in an input group, zero bits are added (on the
  31. // right) to form an integral number of 6-bit groups. Padding at the
  32. // end of the data is performed using the '=' character. Since all base
  33. // 64 input is an integral number of octets, only the following cases
  34. // can arise:
  35. // Base64 encodes each three bytes of input into four bytes of output.
  36. size_t len = (input_len / 3) * 4;
  37. if (input_len % 3 == 0) {
  38. // (from https://tools.ietf.org/html/rfc3548)
  39. // (1) the final quantum of encoding input is an integral multiple of 24
  40. // bits; here, the final unit of encoded output will be an integral
  41. // multiple of 4 characters with no "=" padding,
  42. } else if (input_len % 3 == 1) {
  43. // (from https://tools.ietf.org/html/rfc3548)
  44. // (2) the final quantum of encoding input is exactly 8 bits; here, the
  45. // final unit of encoded output will be two characters followed by two
  46. // "=" padding characters, or
  47. len += 2;
  48. if (do_padding) {
  49. len += 2;
  50. }
  51. } else { // (input_len % 3 == 2)
  52. // (from https://tools.ietf.org/html/rfc3548)
  53. // (3) the final quantum of encoding input is exactly 16 bits; here, the
  54. // final unit of encoded output will be three characters followed by one
  55. // "=" padding character.
  56. len += 3;
  57. if (do_padding) {
  58. len += 1;
  59. }
  60. }
  61. assert(len >= input_len); // make sure we didn't overflow
  62. return len;
  63. }
  64. size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
  65. size_t szdest, const char* base64,
  66. bool do_padding) {
  67. static const char kPad64 = '=';
  68. if (szsrc * 4 > szdest * 3) return 0;
  69. char* cur_dest = dest;
  70. const unsigned char* cur_src = src;
  71. char* const limit_dest = dest + szdest;
  72. const unsigned char* const limit_src = src + szsrc;
  73. // Three bytes of data encodes to four characters of cyphertext.
  74. // So we can pump through three-byte chunks atomically.
  75. if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
  76. while (cur_src < limit_src - 3) { // While we have >= 32 bits.
  77. uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
  78. cur_dest[0] = base64[in >> 18];
  79. in &= 0x3FFFF;
  80. cur_dest[1] = base64[in >> 12];
  81. in &= 0xFFF;
  82. cur_dest[2] = base64[in >> 6];
  83. in &= 0x3F;
  84. cur_dest[3] = base64[in];
  85. cur_dest += 4;
  86. cur_src += 3;
  87. }
  88. }
  89. // To save time, we didn't update szdest or szsrc in the loop. So do it now.
  90. szdest = limit_dest - cur_dest;
  91. szsrc = limit_src - cur_src;
  92. /* now deal with the tail (<=3 bytes) */
  93. switch (szsrc) {
  94. case 0:
  95. // Nothing left; nothing more to do.
  96. break;
  97. case 1: {
  98. // One byte left: this encodes to two characters, and (optionally)
  99. // two pad characters to round out the four-character cypherblock.
  100. if (szdest < 2) return 0;
  101. uint32_t in = cur_src[0];
  102. cur_dest[0] = base64[in >> 2];
  103. in &= 0x3;
  104. cur_dest[1] = base64[in << 4];
  105. cur_dest += 2;
  106. szdest -= 2;
  107. if (do_padding) {
  108. if (szdest < 2) return 0;
  109. cur_dest[0] = kPad64;
  110. cur_dest[1] = kPad64;
  111. cur_dest += 2;
  112. szdest -= 2;
  113. }
  114. break;
  115. }
  116. case 2: {
  117. // Two bytes left: this encodes to three characters, and (optionally)
  118. // one pad character to round out the four-character cypherblock.
  119. if (szdest < 3) return 0;
  120. uint32_t in = absl::big_endian::Load16(cur_src);
  121. cur_dest[0] = base64[in >> 10];
  122. in &= 0x3FF;
  123. cur_dest[1] = base64[in >> 4];
  124. in &= 0x00F;
  125. cur_dest[2] = base64[in << 2];
  126. cur_dest += 3;
  127. szdest -= 3;
  128. if (do_padding) {
  129. if (szdest < 1) return 0;
  130. cur_dest[0] = kPad64;
  131. cur_dest += 1;
  132. szdest -= 1;
  133. }
  134. break;
  135. }
  136. case 3: {
  137. // Three bytes left: same as in the big loop above. We can't do this in
  138. // the loop because the loop above always reads 4 bytes, and the fourth
  139. // byte is past the end of the input.
  140. if (szdest < 4) return 0;
  141. uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
  142. cur_dest[0] = base64[in >> 18];
  143. in &= 0x3FFFF;
  144. cur_dest[1] = base64[in >> 12];
  145. in &= 0xFFF;
  146. cur_dest[2] = base64[in >> 6];
  147. in &= 0x3F;
  148. cur_dest[3] = base64[in];
  149. cur_dest += 4;
  150. szdest -= 4;
  151. break;
  152. }
  153. default:
  154. // Should not be reached: blocks of 4 bytes are handled
  155. // in the while loop before this switch statement.
  156. ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
  157. break;
  158. }
  159. return (cur_dest - dest);
  160. }
  161. } // namespace strings_internal
  162. ABSL_NAMESPACE_END
  163. } // namespace absl