// // dbx2txt.exe - Convert Outlook Express dbx email files to plain text. // // - Tested against 150 DBX files; over 1 GB of Outlook Express 5.0 email. // Seems to work on Outlook Express 6 also. // // - Hints on DBX file format obtained via hex dumps and Google. // // - There is some error checking but no fancy error recovery. // // 03-Jan-2005 /tvb // #include #include #include #include #include #include typedef __int32 int32; typedef __int64 int64; int verbose = 1; int debug = 0; #define DEBUG debug && printf #define WARNING ++Sum.warnings, printf char *dbxpath; FILE *dbxfile; char *outpath; FILE *output; int first_message, last_message; struct { long headers; long messages; long bytes; long warnings; } Sum; #define MAGIC 0xfe12adcf #define MB (1024 * 1024) // // Define help message. // char Usage[] = { "Usage: dbx2txt input.dbx output.txt [start] [end] [debug]\n" "\n" "Options:\n" "- single message number\n" "- starting and ending message numbers\n" "- debug level: 0 (off) to 5 (max)\n" "\n" "Examples:\n" " dbx2txt Inbox.dbx inbox.txt\n" " dbx2txt Inbox.dbx m87.txt 87\n" " dbx2txt Inbox.dbx - | grep cesium\n" " dbx2txt Inbox.dbx - 1 10 | more\n" " dbx2txt Inbox.dbx NUL 1 0 5 > debug.out\n" }; // // Define macro to read input file at given offset. // #define DBX_READ(offset,buf,size) \ if ((fseek(dbxfile, offset, SEEK_SET) != 0) || \ (fread(buf, size, 1, dbxfile) != 1)) { \ fprintf(stderr, "\n*** Fatal error while processing: %s\n", dbxpath); \ fprintf(stderr, "*** fseek/fread failed, source %s, line %d\n", __FILE__, __LINE__); \ fprintf(stderr, "*** Seek offset %ld (0x%.8lx)", offset, offset); \ fprintf(stderr, ", Read size %ld (0x%.8lx)\n", size, size); \ exit(1); \ } void list_header (int offset, int parent); void mail_header (int offset); void mail_message (long offset); // // Rough notes on DBX format (struct's defined in code below): // // Root - Fixed location block with pointers to first header. // Head - Linked list of headers and base of variable length List[] array. // List - 3-word element with mail pointer and/or nested header pointer. // Mail - One per mail message and base of variable length Info[] array. // Info - 1-word element containing 1-byte key and 3-byte value. // Keys - Describes various parts of a mail message; sender, date, body, etc. // Text - Linked list of segments of message text. // void main (int argc, char *argv[]) { double pct; struct _stat st; struct { int32 magic[4]; int32 pad1[45]; int32 count; int32 pad2[7]; int32 offset; int32 pad3[70]; } Root; assert(sizeof Root == 512); // Get input and output filenames. if (argc > 2) { dbxpath = argv[1]; outpath = argv[2]; } else { fprintf(stderr, Usage); exit(0); } // Get optional message range and debug level. if (argc > 3) { first_message = last_message = atoi(argv[3]); verbose = 0; if (argc > 4) { last_message = atoi(argv[4]); } } if (argc > 5) { debug = atoi(argv[5]); } // Open output file (or stdout). if (strcmp(outpath, "-") == 0) { output = stdout; } else { output = fopen(outpath, "wb"); if (output == NULL) { fprintf(stderr, "%s: Open failed\n", outpath); exit(1); } } // Open dbx file, check signature, and find first header block. dbxfile = fopen(dbxpath, "rb"); if (dbxfile == NULL) { fprintf(stderr, "%s: Open failed\n", dbxpath); exit(1); } DBX_READ(0, &Root, sizeof Root); if (Root.magic[0] != MAGIC) { fprintf(stderr, "%s: Not in DBX format (magic 0x%.8lx != 0x%.8lx)\n", dbxpath, Root.magic[0], MAGIC); exit(1); } if ((Root.offset == 0) != (Root.count == 0)) { WARNING("%s: Root.offset %x && Root.count %x\n", dbxpath, Root.offset, Root.count); } if (Root.offset && Root.count) { list_header(Root.offset, 0); } if (Root.count != Sum.messages) { WARNING("%s: Root.count %d != Sum.messages %d\n", dbxpath, Root.count, Sum.messages); } if (_fstat(_fileno(dbxfile), &st) == 0 && st.st_size != 0) { pct = 100.0 * (double) Sum.bytes / (double) st.st_size; } else { pct = 0.0; } printf("%3ld header blocks, %4ld messages, %7.3lf MB, %3.0lf%%, %s", Sum.headers, Sum.messages, (double) Sum.bytes / MB, pct, dbxpath); if (Sum.warnings) { printf(" -- %ld warnings", Sum.warnings); } printf("\n"); fclose(dbxfile); fclose(output); } // // Process nested or linked list of arrays of message pointers. // void list_header (int offset, int parent) { int i, n; struct { int32 self; int32 zero; int32 next; int32 back; int32 count; int32 int6; } Head; do { Sum.headers += 1; // Read 6-word header. DBX_READ(offset, &Head, sizeof Head); if (debug >= 1) { printf("\n"); printf("%.8lx: header::\n", offset); printf(" Head.self %.8lx\n", Head.self); printf(" Head.zero %.8lx\n", Head.zero); printf(" Head.next %.8lx\n", Head.next); printf(" Head.back %.8lx\n", Head.back); printf(" Head.count %.8lx\n", Head.count); printf(" Head.int6 %.8lx\n", Head.int6); } if (Head.self != offset) { WARNING("%s: Head.self (%.8lx) != offset (%.8lx)\n", dbxpath, Head.self, offset); } if (Head.back != parent) { WARNING("%s: Head.back (%.8lx) != parent (%.8lx)\n", dbxpath, Head.back, parent); } if (Head.zero != 0) { WARNING("%s: Head.zero (%.8lx) != 0\n", dbxpath, Head.zero); } offset += sizeof Head; // Read array of 3-word message descriptors. n = (Head.count >> 8) & 0xff; for (i = 0; i < n; i += 1) { struct { int32 mail; int32 head; int32 int3; } List; DBX_READ(offset, &List, sizeof List); if (debug >= 2) { printf("%.8lx: list:: index %d / %d\n", offset, i, n); printf(" List.mail %.8lx\n", List.mail); printf(" List.head %.8lx\n", List.head); printf(" List.int3 %.8lx\n", List.int3); } if (List.mail) { mail_header(List.mail); } if (List.head) { list_header(List.head, Head.self); } offset += sizeof List; } parent = Head.self; offset = Head.next; } while (offset != 0); } // // Process one mail message. // void mail_header (int offset) { int i, n; int indirect; int32 date_offset, sender_offset, message_offset; struct { int32 self; int32 int2; int32 count; } Mail; Sum.messages += 1; if (verbose) { fprintf(stderr, "\r%d\r", Sum.messages); } if (first_message && Sum.messages < first_message) { return; } if (last_message && Sum.messages > last_message) { exit(0); } DBX_READ(offset, &Mail, sizeof Mail); if (debug >= 3) { printf("%.8lx: mail::\n", offset); printf(" Mail.self %.8lx\n", Mail.self); printf(" Mail.int2 %.8lx\n", Mail.int2); printf(" Mail.count %.8lx\n", Mail.count); } if (Mail.self != offset) { WARNING("%s: Mail.self (%.8lx) != offset (%.8lx)\n", dbxpath, Mail.self, offset); } offset += sizeof Mail; // Search components of the message for sender, date, and body. date_offset = 0; sender_offset = 0; message_offset = 0; indirect = 0; n = (Mail.count >> 16) & 0xff; if (n) { if (debug >= 4) { printf("%.8lx: keys:: count %d", offset, n); } for (i = 0; i < n; i += 1) { struct { unsigned key : 8; unsigned value : 24; } Info; DBX_READ(offset, &Info, sizeof Info); if (debug >= 4) { printf("%s%.6x-%.2x ", (i%6)? "":"\n ", Info.value, Info.key); } switch (Info.key) { case 0x0E : sender_offset = Info.value; break; case 0x12 : date_offset = Info.value; break; case 0x04 : indirect = 1; case 0x84 : message_offset = Info.value; break; } offset += sizeof Info; } if (debug >= 4) { printf("\n"); } } if (sender_offset == 0) { WARNING("%s: Message %ld: No sender address\n", dbxpath, Sum.messages); } else if (date_offset == 0) { WARNING("%s: Message %ld: Missing time stamp\n", dbxpath, Sum.messages); } else if (message_offset == 0) { WARNING("%s: Message %ld: Empty message text\n", dbxpath, Sum.messages); } // Create the special "From " line at top of email message. if (sender_offset && date_offset) { char sender[16]; time_t t32; // unix 1-Jan-1970 epoch, 1 s units int64 t64; // win32 1-Jan-1601 epoch, 100 ns units // Read null terminated sender string in chunks. fputs("From ", output); n = sizeof sender - 1; sender[n] = '\0'; do { DBX_READ(offset + sender_offset, sender, n); sender_offset += n; fputs(sender, output); } while (n - strlen(sender) == 0); fputs(" ", output); // Read Win32 FILETIME and convert to UNIX time_t/ctime. DBX_READ(offset + date_offset, &t64, sizeof t64); t64 /= (int64) 10000000; // 100 ns to s t64 -= (int64) 11644473600; // 134774 days t32 = (time_t) t64; fputs(ctime(&t32), output); } else { fputs("From -\n", output); } if (message_offset) { if (indirect) { DBX_READ(offset + message_offset, &message_offset, sizeof (int32)); } mail_message(message_offset); fputc('\n', output); } } // // Extract pieces of message, one linked text buffer at a time. // void mail_message (long offset) { char buf[512]; char c; int i, n; int resid; struct { int32 self; int32 size; int32 count; int32 next; } Text; do { DBX_READ(offset, &Text, sizeof Text); if (debug >= 5) { printf("\n%.8lx: text::\n", offset); printf(" Text.self %.8lx\n", Text.self); printf(" Text.size %.8lx\n", Text.size); printf(" Text.count %.8lx\n", Text.count); printf(" Text.next %.8lx\n", Text.next); } offset += sizeof Text; resid = Text.count; // N.B. Text amount may be more or less than buffer size. do { n = min(resid, sizeof buf); DBX_READ(offset, buf, n); for (i = 0; i < n; i += 1) { c = buf[i]; if (c != '\r') { fputc(c, output); } } offset += n; Sum.bytes += n; resid -= n; } while (resid > 0); offset = Text.next; } while (offset != 0); if (c != '\n') { WARNING("%s: Message %ld: Missing final newline\n", dbxpath, Sum.messages); fputc('\n', output); } }