mdb_load.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. /* mdb_load.c - memory-mapped database load tool */
  2. /*
  3. * Copyright 2011-2021 Howard Chu, Symas Corp.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted only as authorized by the OpenLDAP
  8. * Public License.
  9. *
  10. * A copy of this license is available in the file LICENSE in the
  11. * top-level directory of the distribution or, alternatively, at
  12. * <http://www.OpenLDAP.org/license.html>.
  13. */
  14. #include <stdio.h>
  15. #include <stdlib.h>
  16. #include <errno.h>
  17. #include <string.h>
  18. #include <ctype.h>
  19. #include <unistd.h>
  20. #include "lmdb.h"
  21. #define PRINT 1
  22. #define NOHDR 2
  23. static int mode;
  24. static char *subname = NULL;
  25. static size_t lineno;
  26. static int version;
  27. static int flags;
  28. static char *prog;
  29. static int Eof;
  30. static MDB_envinfo info;
  31. static MDB_val kbuf, dbuf;
  32. static MDB_val k0buf;
  33. #ifdef _WIN32
  34. #define Z "I"
  35. #else
  36. #define Z "z"
  37. #endif
  38. #define STRLENOF(s) (sizeof(s)-1)
  39. typedef struct flagbit {
  40. int bit;
  41. char *name;
  42. int len;
  43. } flagbit;
  44. #define S(s) s, STRLENOF(s)
  45. flagbit dbflags[] = {
  46. { MDB_REVERSEKEY, S("reversekey") },
  47. { MDB_DUPSORT, S("dupsort") },
  48. { MDB_INTEGERKEY, S("integerkey") },
  49. { MDB_DUPFIXED, S("dupfixed") },
  50. { MDB_INTEGERDUP, S("integerdup") },
  51. { MDB_REVERSEDUP, S("reversedup") },
  52. { 0, NULL, 0 }
  53. };
  54. static void readhdr(void)
  55. {
  56. char *ptr;
  57. flags = 0;
  58. while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
  59. lineno++;
  60. if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
  61. version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION="));
  62. if (version > 3) {
  63. fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n",
  64. prog, lineno, version);
  65. exit(EXIT_FAILURE);
  66. }
  67. } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
  68. break;
  69. } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
  70. if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
  71. mode |= PRINT;
  72. else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
  73. fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n",
  74. prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
  75. exit(EXIT_FAILURE);
  76. }
  77. } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
  78. ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
  79. if (ptr) *ptr = '\0';
  80. if (subname) free(subname);
  81. subname = strdup((char *)dbuf.mv_data+STRLENOF("database="));
  82. } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
  83. if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) {
  84. fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n",
  85. prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
  86. exit(EXIT_FAILURE);
  87. }
  88. } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) {
  89. int i;
  90. ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
  91. if (ptr) *ptr = '\0';
  92. i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr);
  93. if (i != 1) {
  94. fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n",
  95. prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr="));
  96. exit(EXIT_FAILURE);
  97. }
  98. } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) {
  99. int i;
  100. ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
  101. if (ptr) *ptr = '\0';
  102. i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize);
  103. if (i != 1) {
  104. fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n",
  105. prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize="));
  106. exit(EXIT_FAILURE);
  107. }
  108. } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) {
  109. int i;
  110. ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
  111. if (ptr) *ptr = '\0';
  112. i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders);
  113. if (i != 1) {
  114. fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n",
  115. prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders="));
  116. exit(EXIT_FAILURE);
  117. }
  118. } else {
  119. int i;
  120. for (i=0; dbflags[i].bit; i++) {
  121. if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
  122. ((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
  123. flags |= dbflags[i].bit;
  124. break;
  125. }
  126. }
  127. if (!dbflags[i].bit) {
  128. ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
  129. if (!ptr) {
  130. fprintf(stderr, "%s: line %" Z "d: unexpected format\n",
  131. prog, lineno);
  132. exit(EXIT_FAILURE);
  133. } else {
  134. *ptr = '\0';
  135. fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n",
  136. prog, lineno, (char *)dbuf.mv_data);
  137. }
  138. }
  139. }
  140. }
  141. }
  142. static void badend(void)
  143. {
  144. fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n",
  145. prog, lineno);
  146. }
  147. static int unhex(unsigned char *c2)
  148. {
  149. int x, c;
  150. x = *c2++ & 0x4f;
  151. if (x & 0x40)
  152. x -= 55;
  153. c = x << 4;
  154. x = *c2 & 0x4f;
  155. if (x & 0x40)
  156. x -= 55;
  157. c |= x;
  158. return c;
  159. }
  160. static int readline(MDB_val *out, MDB_val *buf)
  161. {
  162. unsigned char *c1, *c2, *end;
  163. size_t len, l2;
  164. int c;
  165. if (!(mode & NOHDR)) {
  166. c = fgetc(stdin);
  167. if (c == EOF) {
  168. Eof = 1;
  169. return EOF;
  170. }
  171. if (c != ' ') {
  172. lineno++;
  173. if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
  174. badend:
  175. Eof = 1;
  176. badend();
  177. return EOF;
  178. }
  179. if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
  180. return EOF;
  181. goto badend;
  182. }
  183. }
  184. if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
  185. Eof = 1;
  186. return EOF;
  187. }
  188. lineno++;
  189. c1 = buf->mv_data;
  190. len = strlen((char *)c1);
  191. l2 = len;
  192. /* Is buffer too short? */
  193. while (c1[len-1] != '\n') {
  194. buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
  195. if (!buf->mv_data) {
  196. Eof = 1;
  197. fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n",
  198. prog, lineno);
  199. return EOF;
  200. }
  201. c1 = buf->mv_data;
  202. c1 += l2;
  203. if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) {
  204. Eof = 1;
  205. badend();
  206. return EOF;
  207. }
  208. buf->mv_size *= 2;
  209. len = strlen((char *)c1);
  210. l2 += len;
  211. }
  212. c1 = c2 = buf->mv_data;
  213. len = l2;
  214. c1[--len] = '\0';
  215. end = c1 + len;
  216. if (mode & PRINT) {
  217. while (c2 < end) {
  218. if (*c2 == '\\') {
  219. if (c2[1] == '\\') {
  220. *c1++ = *c2;
  221. } else {
  222. if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
  223. Eof = 1;
  224. badend();
  225. return EOF;
  226. }
  227. *c1++ = unhex(++c2);
  228. }
  229. c2 += 2;
  230. } else {
  231. /* copies are redundant when no escapes were used */
  232. *c1++ = *c2++;
  233. }
  234. }
  235. } else {
  236. /* odd length not allowed */
  237. if (len & 1) {
  238. Eof = 1;
  239. badend();
  240. return EOF;
  241. }
  242. while (c2 < end) {
  243. if (!isxdigit(*c2) || !isxdigit(c2[1])) {
  244. Eof = 1;
  245. badend();
  246. return EOF;
  247. }
  248. *c1++ = unhex(c2);
  249. c2 += 2;
  250. }
  251. }
  252. c2 = out->mv_data = buf->mv_data;
  253. out->mv_size = c1 - c2;
  254. return 0;
  255. }
  256. static void usage(void)
  257. {
  258. fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog);
  259. exit(EXIT_FAILURE);
  260. }
  261. static int greater(const MDB_val *a, const MDB_val *b)
  262. {
  263. return 1;
  264. }
  265. int main(int argc, char *argv[])
  266. {
  267. int i, rc;
  268. MDB_env *env;
  269. MDB_txn *txn;
  270. MDB_cursor *mc;
  271. MDB_dbi dbi;
  272. char *envname;
  273. int envflags = MDB_NOSYNC, putflags = 0;
  274. int dohdr = 0, append = 0;
  275. MDB_val prevk;
  276. prog = argv[0];
  277. if (argc < 2) {
  278. usage();
  279. }
  280. /* -a: append records in input order
  281. * -f: load file instead of stdin
  282. * -n: use NOSUBDIR flag on env_open
  283. * -s: load into named subDB
  284. * -N: use NOOVERWRITE on puts
  285. * -T: read plaintext
  286. * -V: print version and exit
  287. */
  288. while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) {
  289. switch(i) {
  290. case 'V':
  291. printf("%s\n", MDB_VERSION_STRING);
  292. exit(0);
  293. break;
  294. case 'a':
  295. append = 1;
  296. break;
  297. case 'f':
  298. if (freopen(optarg, "r", stdin) == NULL) {
  299. fprintf(stderr, "%s: %s: reopen: %s\n",
  300. prog, optarg, strerror(errno));
  301. exit(EXIT_FAILURE);
  302. }
  303. break;
  304. case 'n':
  305. envflags |= MDB_NOSUBDIR;
  306. break;
  307. case 's':
  308. subname = strdup(optarg);
  309. break;
  310. case 'N':
  311. putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
  312. break;
  313. case 'T':
  314. mode |= NOHDR | PRINT;
  315. break;
  316. default:
  317. usage();
  318. }
  319. }
  320. if (optind != argc - 1)
  321. usage();
  322. dbuf.mv_size = 4096;
  323. dbuf.mv_data = malloc(dbuf.mv_size);
  324. if (!(mode & NOHDR))
  325. readhdr();
  326. envname = argv[optind];
  327. rc = mdb_env_create(&env);
  328. if (rc) {
  329. fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc));
  330. return EXIT_FAILURE;
  331. }
  332. mdb_env_set_maxdbs(env, 2);
  333. if (info.me_maxreaders)
  334. mdb_env_set_maxreaders(env, info.me_maxreaders);
  335. if (info.me_mapsize)
  336. mdb_env_set_mapsize(env, info.me_mapsize);
  337. if (info.me_mapaddr)
  338. envflags |= MDB_FIXEDMAP;
  339. rc = mdb_env_open(env, envname, envflags, 0664);
  340. if (rc) {
  341. fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
  342. goto env_close;
  343. }
  344. kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2;
  345. kbuf.mv_data = malloc(kbuf.mv_size * 2);
  346. k0buf.mv_size = kbuf.mv_size;
  347. k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size;
  348. prevk.mv_data = k0buf.mv_data;
  349. while(!Eof) {
  350. MDB_val key, data;
  351. int batch = 0;
  352. flags = 0;
  353. int appflag;
  354. if (!dohdr) {
  355. dohdr = 1;
  356. } else if (!(mode & NOHDR))
  357. readhdr();
  358. rc = mdb_txn_begin(env, NULL, 0, &txn);
  359. if (rc) {
  360. fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
  361. goto env_close;
  362. }
  363. rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi);
  364. if (rc) {
  365. fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
  366. goto txn_abort;
  367. }
  368. prevk.mv_size = 0;
  369. if (append) {
  370. mdb_set_compare(txn, dbi, greater);
  371. if (flags & MDB_DUPSORT)
  372. mdb_set_dupsort(txn, dbi, greater);
  373. }
  374. rc = mdb_cursor_open(txn, dbi, &mc);
  375. if (rc) {
  376. fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
  377. goto txn_abort;
  378. }
  379. while(1) {
  380. rc = readline(&key, &kbuf);
  381. if (rc) /* rc == EOF */
  382. break;
  383. rc = readline(&data, &dbuf);
  384. if (rc) {
  385. fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno);
  386. goto txn_abort;
  387. }
  388. if (append) {
  389. appflag = MDB_APPEND;
  390. if (flags & MDB_DUPSORT) {
  391. if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size))
  392. appflag = MDB_CURRENT|MDB_APPENDDUP;
  393. else {
  394. memcpy(prevk.mv_data, key.mv_data, key.mv_size);
  395. prevk.mv_size = key.mv_size;
  396. }
  397. }
  398. } else {
  399. appflag = 0;
  400. }
  401. rc = mdb_cursor_put(mc, &key, &data, putflags|appflag);
  402. if (rc == MDB_KEYEXIST && putflags)
  403. continue;
  404. if (rc) {
  405. fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc));
  406. goto txn_abort;
  407. }
  408. batch++;
  409. if (batch == 100) {
  410. rc = mdb_txn_commit(txn);
  411. if (rc) {
  412. fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
  413. prog, lineno, mdb_strerror(rc));
  414. goto env_close;
  415. }
  416. rc = mdb_txn_begin(env, NULL, 0, &txn);
  417. if (rc) {
  418. fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
  419. goto env_close;
  420. }
  421. rc = mdb_cursor_open(txn, dbi, &mc);
  422. if (rc) {
  423. fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
  424. goto txn_abort;
  425. }
  426. if (appflag & MDB_APPENDDUP) {
  427. MDB_val k, d;
  428. mdb_cursor_get(mc, &k, &d, MDB_LAST);
  429. }
  430. batch = 0;
  431. }
  432. }
  433. rc = mdb_txn_commit(txn);
  434. txn = NULL;
  435. if (rc) {
  436. fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
  437. prog, lineno, mdb_strerror(rc));
  438. goto env_close;
  439. }
  440. mdb_dbi_close(env, dbi);
  441. }
  442. txn_abort:
  443. mdb_txn_abort(txn);
  444. env_close:
  445. mdb_env_close(env);
  446. return rc ? EXIT_FAILURE : EXIT_SUCCESS;
  447. }