parse-data-url.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", {
  3. value: true
  4. });
  5. exports.default = parseDataUrl;
  6. const removeLeadingAndTrailingHTTPWhitespace = string => string.replace(/^[ \t\n\r]+/, "").replace(/[ \t\n\r]+$/, "");
  7. const removeTrailingHTTPWhitespace = string => string.replace(/[ \t\n\r]+$/, "");
  8. const isHTTPWhitespaceChar = char => char === " " || char === "\t" || char === "\n" || char === "\r";
  9. const solelyContainsHTTPTokenCodePoints = string => /^[-!#$%&'*+.^_`|~A-Za-z0-9]*$/.test(string);
  10. const soleyContainsHTTPQuotedStringTokenCodePoints = string => /^[\t\u0020-\u007E\u0080-\u00FF]*$/.test(string);
  11. const asciiLowercase = string => string.replace(/[A-Z]/g, l => l.toLowerCase());
  12. const collectAnHTTPQuotedString = (input, position) => {
  13. let value = "";
  14. // eslint-disable-next-line no-param-reassign
  15. position += 1;
  16. // eslint-disable-next-line no-constant-condition
  17. while (true) {
  18. while (position < input.length && input[position] !== '"' && input[position] !== "\\") {
  19. value += input[position];
  20. // eslint-disable-next-line no-param-reassign
  21. position += 1;
  22. }
  23. if (position >= input.length) {
  24. break;
  25. }
  26. const quoteOrBackslash = input[position];
  27. // eslint-disable-next-line no-param-reassign
  28. position += 1;
  29. if (quoteOrBackslash === "\\") {
  30. if (position >= input.length) {
  31. value += "\\";
  32. break;
  33. }
  34. value += input[position];
  35. // eslint-disable-next-line no-param-reassign
  36. position += 1;
  37. } else {
  38. break;
  39. }
  40. }
  41. return [value, position];
  42. };
  43. function isASCIIHex(c) {
  44. return c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x46 || c >= 0x61 && c <= 0x66;
  45. }
  46. function percentDecodeBytes(input) {
  47. const output = new Uint8Array(input.byteLength);
  48. let outputIndex = 0;
  49. for (let i = 0; i < input.byteLength; ++i) {
  50. const byte = input[i];
  51. if (byte !== 0x25) {
  52. output[outputIndex] = byte;
  53. } else if (byte === 0x25 && (!isASCIIHex(input[i + 1]) || !isASCIIHex(input[i + 2]))) {
  54. output[outputIndex] = byte;
  55. } else {
  56. output[outputIndex] = parseInt(String.fromCodePoint(input[i + 1], input[i + 2]), 16);
  57. i += 2;
  58. }
  59. outputIndex += 1;
  60. }
  61. return output.slice(0, outputIndex);
  62. }
  63. /**
  64. * A lookup table for atob(), which converts an ASCII character to the
  65. * corresponding six-bit number.
  66. */
  67. const characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  68. function atobLookup(chr) {
  69. const index = characters.indexOf(chr);
  70. // Throw exception if character is not in the lookup string; should not be hit in tests
  71. // eslint-disable-next-line no-undefined
  72. return index < 0 ? undefined : index;
  73. }
  74. /**
  75. * Implementation of atob() according to the HTML and Infra specs, except that
  76. * instead of throwing INVALID_CHARACTER_ERR we return null.
  77. */
  78. function atob(input) {
  79. /* eslint-disable no-bitwise */
  80. // Web IDL requires DOMStrings to just be converted using ECMAScript
  81. // ToString, which in our case amounts to using a template literal.
  82. let data = `${input}`;
  83. // "Remove all ASCII whitespace from data."
  84. data = data.replace(/[ \t\n\f\r]/g, "");
  85. // "If data's length divides by 4 leaving no remainder, then: if data ends
  86. // with one or two U+003D (=) code points, then remove them from data."
  87. if (data.length % 4 === 0) {
  88. data = data.replace(/==?$/, "");
  89. }
  90. // "If data's length divides by 4 leaving a remainder of 1, then return
  91. // failure."
  92. //
  93. // "If data contains a code point that is not one of
  94. //
  95. // U+002B (+)
  96. // U+002F (/)
  97. // ASCII alphanumeric
  98. //
  99. // then return failure."
  100. if (data.length % 4 === 1 || /[^+/0-9A-Za-z]/.test(data)) {
  101. return null;
  102. }
  103. // "Let output be an empty byte sequence."
  104. let output = "";
  105. // "Let buffer be an empty buffer that can have bits appended to it."
  106. //
  107. // We append bits via left-shift and or. accumulatedBits is used to track
  108. // when we've gotten to 24 bits.
  109. let buffer = 0;
  110. let accumulatedBits = 0;
  111. // "Let position be a position variable for data, initially pointing at the
  112. // start of data."
  113. //
  114. // "While position does not point past the end of data:"
  115. for (let i = 0; i < data.length; i++) {
  116. // "Find the code point pointed to by position in the second column of
  117. // Table 1: The Base 64 Alphabet of RFC 4648. Let n be the number given in
  118. // the first cell of the same row.
  119. //
  120. // "Append to buffer the six bits corresponding to n, most significant bit
  121. // first."
  122. //
  123. // atobLookup() implements the table from RFC 4648.
  124. // eslint-disable-next-line no-bitwise
  125. buffer <<= 6;
  126. // eslint-disable-next-line no-bitwise
  127. buffer |= atobLookup(data[i]);
  128. accumulatedBits += 6;
  129. // "If buffer has accumulated 24 bits, interpret them as three 8-bit
  130. // big-endian numbers. Append three bytes with values equal to those
  131. // numbers to output, in the same order, and then empty buffer."
  132. if (accumulatedBits === 24) {
  133. output += String.fromCharCode((buffer & 0xff0000) >> 16);
  134. output += String.fromCharCode((buffer & 0xff00) >> 8);
  135. output += String.fromCharCode(buffer & 0xff);
  136. accumulatedBits = 0;
  137. buffer = 0;
  138. }
  139. // "Advance position by 1."
  140. }
  141. // "If buffer is not empty, it contains either 12 or 18 bits. If it contains
  142. // 12 bits, then discard the last four and interpret the remaining eight as
  143. // an 8-bit big-endian number. If it contains 18 bits, then discard the last
  144. // two and interpret the remaining 16 as two 8-bit big-endian numbers. Append
  145. // the one or two bytes with values equal to those one or two numbers to
  146. // output, in the same order."
  147. if (accumulatedBits === 12) {
  148. buffer >>= 4;
  149. output += String.fromCharCode(buffer);
  150. } else if (accumulatedBits === 18) {
  151. buffer >>= 2;
  152. output += String.fromCharCode((buffer & 0xff00) >> 8);
  153. output += String.fromCharCode(buffer & 0xff);
  154. }
  155. /* eslint-enable no-bitwise */
  156. // "Return output."
  157. return output;
  158. }
  159. function parseDataUrl(stringInput) {
  160. let parsedUrl;
  161. try {
  162. parsedUrl = new URL(stringInput);
  163. } catch (error) {
  164. return null;
  165. }
  166. if (parsedUrl.protocol !== "data:") {
  167. return null;
  168. }
  169. parsedUrl.hash = "";
  170. // `5` is value of `'data:'.length`
  171. const input = parsedUrl.toString().substring(5);
  172. let position = 0;
  173. let mediaType = "";
  174. while (position < input.length && input[position] !== ",") {
  175. mediaType += input[position];
  176. position += 1;
  177. }
  178. mediaType = mediaType.replace(/^[ \t\n\f\r]+/, "").replace(/[ \t\n\f\r]+$/, "");
  179. if (position === input.length) {
  180. return null;
  181. }
  182. position += 1;
  183. const encodedBody = input.substring(position);
  184. let body = Buffer.from(percentDecodeBytes(Buffer.from(encodedBody, "utf-8")));
  185. // Can't use /i regexp flag because it isn't restricted to ASCII.
  186. const mimeTypeBase64MatchResult = /(.*); *[Bb][Aa][Ss][Ee]64$/.exec(mediaType);
  187. if (mimeTypeBase64MatchResult) {
  188. const stringBody = body.toString("binary");
  189. const asString = atob(stringBody);
  190. if (asString === null) {
  191. return null;
  192. }
  193. body = Buffer.from(asString, "binary");
  194. [, mediaType] = mimeTypeBase64MatchResult;
  195. }
  196. if (mediaType.startsWith(";")) {
  197. mediaType = `text/plain ${mediaType}`;
  198. }
  199. const result = {
  200. // eslint-disable-next-line no-undefined
  201. type: undefined,
  202. // eslint-disable-next-line no-undefined
  203. subtype: undefined,
  204. parameters: new Map(),
  205. isBase64: Boolean(mimeTypeBase64MatchResult),
  206. body
  207. };
  208. if (!mediaType) {
  209. return result;
  210. }
  211. const inputMediaType = removeLeadingAndTrailingHTTPWhitespace(mediaType);
  212. let positionMediaType = 0;
  213. let type = "";
  214. while (positionMediaType < inputMediaType.length && inputMediaType[positionMediaType] !== "/") {
  215. type += inputMediaType[positionMediaType];
  216. positionMediaType += 1;
  217. }
  218. if (type.length === 0 || !solelyContainsHTTPTokenCodePoints(type)) {
  219. return result;
  220. }
  221. if (positionMediaType >= inputMediaType.length) {
  222. return result;
  223. }
  224. // Skips past "/"
  225. positionMediaType += 1;
  226. let subtype = "";
  227. while (positionMediaType < inputMediaType.length && inputMediaType[positionMediaType] !== ";") {
  228. subtype += inputMediaType[positionMediaType];
  229. positionMediaType += 1;
  230. }
  231. subtype = removeTrailingHTTPWhitespace(subtype);
  232. if (subtype.length === 0 || !solelyContainsHTTPTokenCodePoints(subtype)) {
  233. return result;
  234. }
  235. result.type = asciiLowercase(type);
  236. result.subtype = asciiLowercase(subtype);
  237. while (positionMediaType < inputMediaType.length) {
  238. // Skip past ";"
  239. positionMediaType += 1;
  240. while (isHTTPWhitespaceChar(inputMediaType[positionMediaType])) {
  241. positionMediaType += 1;
  242. }
  243. let parameterName = "";
  244. while (positionMediaType < inputMediaType.length && inputMediaType[positionMediaType] !== ";" && inputMediaType[positionMediaType] !== "=") {
  245. parameterName += inputMediaType[positionMediaType];
  246. positionMediaType += 1;
  247. }
  248. parameterName = asciiLowercase(parameterName);
  249. if (positionMediaType < inputMediaType.length) {
  250. if (inputMediaType[positionMediaType] === ";") {
  251. // eslint-disable-next-line no-continue
  252. continue;
  253. }
  254. // Skip past "="
  255. positionMediaType += 1;
  256. }
  257. let parameterValue = "";
  258. if (inputMediaType[positionMediaType] === '"') {
  259. [parameterValue, positionMediaType] = collectAnHTTPQuotedString(inputMediaType, positionMediaType);
  260. while (positionMediaType < inputMediaType.length && inputMediaType[positionMediaType] !== ";") {
  261. positionMediaType += 1;
  262. }
  263. } else {
  264. while (positionMediaType < inputMediaType.length && inputMediaType[positionMediaType] !== ";") {
  265. parameterValue += inputMediaType[positionMediaType];
  266. positionMediaType += 1;
  267. }
  268. parameterValue = removeTrailingHTTPWhitespace(parameterValue);
  269. if (parameterValue === "") {
  270. // eslint-disable-next-line no-continue
  271. continue;
  272. }
  273. }
  274. if (parameterName.length > 0 && solelyContainsHTTPTokenCodePoints(parameterName) && soleyContainsHTTPQuotedStringTokenCodePoints(parameterValue) && !result.parameters.has(parameterName)) {
  275. result.parameters.set(parameterName, parameterValue);
  276. }
  277. }
  278. return result;
  279. }