diff --git a/tools/gpgparsemail.c b/tools/gpgparsemail.c index b12209755..c5f4e6cd0 100644 --- a/tools/gpgparsemail.c +++ b/tools/gpgparsemail.c @@ -1,816 +1,816 @@ /* gpgparsemail.c - Standalone crypto mail parser * Copyright (C) 2004, 2007 Free Software Foundation, Inc. * * This file is part of GnuPG. * * GnuPG is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * GnuPG is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ /* This utility prints an RFC822, possible MIME structured, message in an annotated format with the first column having an indicator for the content of the line. Several options are available to scrutinize the message. S/MIME and OpenPGP support is included. */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "rfc822parse.h" #define PGM "gpgparsemail" /* Option flags. */ static int verbose; static int debug; static int opt_crypto; /* Decrypt or verify messages. */ static int opt_no_header; /* Don't output the header lines. */ /* Structure used to communicate with the parser callback. */ struct parse_info_s { int show_header; /* Show the header lines. */ int show_data; /* Show the data lines. */ unsigned int skip_show; /* Temporary disable above for these number of lines. */ int show_data_as_note; /* The next data line should be shown as a note. */ int show_boundary; int nesting_level; int is_pkcs7; /* Old style S/MIME message. */ int smfm_state; /* State of PGP/MIME or S/MIME parsing. */ int is_smime; /* This is S/MIME and not PGP/MIME. */ const char *signing_protocol; const char *signing_protocol_2; /* there are two ways to present PKCS7 */ int hashing_level; /* The nesting level we are hashing. */ int hashing; FILE *hash_file; FILE *sig_file; /* Signature part with MIME or full pkcs7 data if IS_PCKS7 is set. */ int verify_now; /* Flag set when all signature data is available. */ }; /* Print diagnostic message and exit with failure. */ static void die (const char *format, ...) { va_list arg_ptr; fflush (stdout); fprintf (stderr, "%s: ", PGM); va_start (arg_ptr, format); vfprintf (stderr, format, arg_ptr); va_end (arg_ptr); putc ('\n', stderr); exit (1); } /* Print diagnostic message. */ static void err (const char *format, ...) { va_list arg_ptr; fflush (stdout); fprintf (stderr, "%s: ", PGM); va_start (arg_ptr, format); vfprintf (stderr, format, arg_ptr); va_end (arg_ptr); putc ('\n', stderr); } static void * xmalloc (size_t n) { void *p = malloc (n); if (!p) die ("out of core: %s", strerror (errno)); return p; } /* static void * */ /* xcalloc (size_t n, size_t m) */ /* { */ /* void *p = calloc (n, m); */ /* if (!p) */ /* die ("out of core: %s", strerror (errno)); */ /* return p; */ /* } */ /* static void * */ /* xrealloc (void *old, size_t n) */ /* { */ /* void *p = realloc (old, n); */ /* if (!p) */ /* die ("out of core: %s", strerror (errno)); */ /* return p; */ /* } */ /* static char * */ /* xstrdup (const char *string) */ /* { */ /* void *p = malloc (strlen (string)+1); */ /* if (!p) */ /* die ("out of core: %s", strerror (errno)); */ /* strcpy (p, string); */ /* return p; */ /* } */ #ifndef HAVE_STPCPY static char * stpcpy (char *a,const char *b) { while (*b) *a++ = *b++; *a = 0; return (char*)a; } #endif static int run_gnupg (int smime, int sig_fd, int data_fd, int *close_list) { int rp[2]; pid_t pid; int i, c, is_status; unsigned int pos; char status_buf[10]; FILE *fp; if (pipe (rp) == -1) die ("error creating a pipe: %s", strerror (errno)); pid = fork (); if (pid == -1) die ("error forking process: %s", strerror (errno)); if (!pid) { /* Child. */ char data_fd_buf[50]; int fd; /* Connect our signature fd to stdin. */ if (sig_fd != 0) { if (dup2 (sig_fd, 0) == -1) die ("dup2 stdin failed: %s", strerror (errno)); } /* Keep our data fd and format it for gpg/gpgsm use. */ if (data_fd == -1) *data_fd_buf = 0; else sprintf (data_fd_buf, "-&%d", data_fd); /* Send stdout to the bit bucket. */ fd = open ("/dev/null", O_WRONLY); if (fd == -1) die ("can't open '/dev/null': %s", strerror (errno)); if (fd != 1) { if (dup2 (fd, 1) == -1) die ("dup2 stderr failed: %s", strerror (errno)); } /* Connect stderr to our pipe. */ if (rp[1] != 2) { if (dup2 (rp[1], 2) == -1) die ("dup2 stderr failed: %s", strerror (errno)); } /* Close other files. */ for (i=0; (fd=close_list[i]) != -1; i++) if (fd > 2 && fd != data_fd) close (fd); errno = 0; if (smime) execlp ("gpgsm", "gpgsm", "--enable-special-filenames", "--status-fd", "2", "--assume-base64", "--verify", "--", "-", data_fd == -1? NULL : data_fd_buf, NULL); else execlp ("gpg", "gpg", "--enable-special-filenames", "--status-fd", "2", "--verify", "--debug=512", "--", "-", data_fd == -1? NULL : data_fd_buf, NULL); die ("failed to exec the crypto command: %s", strerror (errno)); } /* Parent. */ close (rp[1]); fp = fdopen (rp[0], "r"); if (!fp) die ("can't fdopen pipe for reading: %s", strerror (errno)); pos = 0; is_status = 0; assert (sizeof status_buf > 9); while ((c=getc (fp)) != EOF) { if (pos < 9) status_buf[pos] = c; else { if (pos == 9) { is_status = !memcmp (status_buf, "[GNUPG:] ", 9); if (is_status) fputs ( "c ", stdout); else if (verbose) fputs ( "# ", stdout); fwrite (status_buf, 9, 1, stdout); } putchar (c); } if (c == '\n') { if (verbose && pos < 9) { fputs ( "# ", stdout); fwrite (status_buf, pos+1, 1, stdout); } pos = 0; } else pos++; } if (pos) { if (verbose && pos < 9) { fputs ( "# ", stdout); fwrite (status_buf, pos+1, 1, stdout); } putchar ('\n'); } fclose (fp); while ( (i=waitpid (pid, NULL, 0)) == -1 && errno == EINTR) ; if (i == -1) die ("waiting for child failed: %s", strerror (errno)); return 0; } /* Verify the signature in the current temp files. */ static void verify_signature (struct parse_info_s *info) { int close_list[10]; if (info->is_pkcs7) { assert (!info->hash_file); assert (info->sig_file); rewind (info->sig_file); } else { assert (info->hash_file); assert (info->sig_file); rewind (info->hash_file); rewind (info->sig_file); } /* printf ("# Begin hashed data\n"); */ /* while ( (c=getc (info->hash_file)) != EOF) */ /* putchar (c); */ /* printf ("# End hashed data signature\n"); */ /* printf ("# Begin signature\n"); */ /* while ( (c=getc (info->sig_file)) != EOF) */ /* putchar (c); */ /* printf ("# End signature\n"); */ /* rewind (info->hash_file); */ /* rewind (info->sig_file); */ close_list[0] = -1; run_gnupg (info->is_smime, fileno (info->sig_file), info->hash_file ? fileno (info->hash_file) : -1, close_list); } /* Prepare for a multipart/signed. FIELD_CTX is the parsed context of the content-type header.*/ static void mime_signed_begin (struct parse_info_s *info, rfc822parse_t msg, rfc822parse_field_t field_ctx) { const char *s; (void)msg; s = rfc822parse_query_parameter (field_ctx, "protocol", 1); if (s) { printf ("h signed.protocol: %s\n", s); if (!strcmp (s, "application/pgp-signature")) { if (info->smfm_state) err ("note: ignoring nested PGP/MIME or S/MIME signature"); else { info->smfm_state = 1; info->is_smime = 0; info->signing_protocol = "application/pgp-signature"; info->signing_protocol_2 = NULL; } } else if (!strcmp (s, "application/pkcs7-signature") || !strcmp (s, "application/x-pkcs7-signature")) { if (info->smfm_state) err ("note: ignoring nested PGP/MIME or S/MIME signature"); else { info->smfm_state = 1; info->is_smime = 1; info->signing_protocol = "application/pkcs7-signature"; info->signing_protocol_2 = "application/x-pkcs7-signature"; } } else if (verbose) printf ("# this protocol is not supported\n"); } } /* Prepare for a multipart/encrypted. FIELD_CTX is the parsed context of the content-type header.*/ static void mime_encrypted_begin (struct parse_info_s *info, rfc822parse_t msg, rfc822parse_field_t field_ctx) { const char *s; (void)info; (void)msg; s = rfc822parse_query_parameter (field_ctx, "protocol", 0); if (s) printf ("h encrypted.protocol: %s\n", s); } /* Prepare for old-style pkcs7 messages. */ static void pkcs7_begin (struct parse_info_s *info, rfc822parse_t msg, rfc822parse_field_t field_ctx) { const char *s; (void)msg; s = rfc822parse_query_parameter (field_ctx, "name", 0); if (s) printf ("h pkcs7.name: %s\n", s); if (info->is_pkcs7) err ("note: ignoring nested pkcs7 data"); else { info->is_pkcs7 = 1; if (opt_crypto) { assert (!info->sig_file); info->sig_file = tmpfile (); if (!info->sig_file) die ("error creating temp file: %s", strerror (errno)); } } } /* Print the event received by the parser for debugging as comment line. */ static void show_event (rfc822parse_event_t event) { const char *s; switch (event) { case RFC822PARSE_OPEN: s= "Open"; break; case RFC822PARSE_CLOSE: s= "Close"; break; case RFC822PARSE_CANCEL: s= "Cancel"; break; case RFC822PARSE_T2BODY: s= "T2Body"; break; case RFC822PARSE_FINISH: s= "Finish"; break; case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break; case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break; case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break; case RFC822PARSE_BOUNDARY: s= "Boundary"; break; case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break; case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break; case RFC822PARSE_PREAMBLE: s= "Preamble"; break; case RFC822PARSE_EPILOGUE: s= "Epilogue"; break; default: s= "[unknown event]"; break; } printf ("# *** got RFC822 event %s\n", s); } /* This function is called by the parser to communicate events. This callback comminucates with the main program using a structure passed in OPAQUE. Should return 0 or set errno and return -1. */ static int message_cb (void *opaque, rfc822parse_event_t event, rfc822parse_t msg) { struct parse_info_s *info = opaque; if (debug) show_event (event); if (event == RFC822PARSE_BEGIN_HEADER || event == RFC822PARSE_T2BODY) { /* We need to check here whether to start collecting signed data because attachments might come without header lines and thus we won't see the BEGIN_HEADER event. */ if (info->smfm_state == 1) { printf ("c begin_hash\n"); info->hashing = 1; info->hashing_level = info->nesting_level; info->smfm_state++; if (opt_crypto) { assert (!info->hash_file); info->hash_file = tmpfile (); if (!info->hash_file) die ("failed to create temporary file: %s", strerror (errno)); } } } if (event == RFC822PARSE_OPEN) { /* Initialize for a new message. */ info->show_header = 1; } else if (event == RFC822PARSE_T2BODY) { rfc822parse_field_t ctx; ctx = rfc822parse_parse_field (msg, "Content-Type", -1); if (ctx) { const char *s1, *s2; s1 = rfc822parse_query_media_type (ctx, &s2); if (s1) { printf ("h media: %*s%s %s\n", info->nesting_level*2, "", s1, s2); if (info->smfm_state == 3) { char *buf = xmalloc (strlen (s1) + strlen (s2) + 2); strcpy (stpcpy (stpcpy (buf, s1), "/"), s2); assert (info->signing_protocol); if (strcmp (buf, info->signing_protocol) && (!info->signing_protocol_2 || strcmp (buf,info->signing_protocol_2))) err ("invalid %s structure; expected %s%s%s, found '%s'", info->is_smime? "S/MIME":"PGP/MIME", info->signing_protocol, info->signing_protocol_2 ? " or " : "", info->signing_protocol_2 ? info->signing_protocol_2:"", buf); else { printf ("c begin_signature\n"); info->smfm_state++; if (opt_crypto) { assert (!info->sig_file); info->sig_file = tmpfile (); if (!info->sig_file) die ("error creating temp file: %s", strerror (errno)); } } free (buf); } else if (!strcmp (s1, "multipart")) { if (!strcmp (s2, "signed")) mime_signed_begin (info, msg, ctx); else if (!strcmp (s2, "encrypted")) mime_encrypted_begin (info, msg, ctx); } else if (!strcmp (s1, "application") && (!strcmp (s2, "pkcs7-mime") || !strcmp (s2, "x-pkcs7-mime"))) pkcs7_begin (info, msg, ctx); } else printf ("h media: %*s none\n", info->nesting_level*2, ""); rfc822parse_release_field (ctx); } else printf ("h media: %*stext plain [assumed]\n", info->nesting_level*2, ""); info->show_header = 0; info->show_data = 1; info->skip_show = 1; } else if (event == RFC822PARSE_PREAMBLE) info->show_data_as_note = 1; else if (event == RFC822PARSE_LEVEL_DOWN) { printf ("b down\n"); info->nesting_level++; } else if (event == RFC822PARSE_LEVEL_UP) { printf ("b up\n"); if (info->nesting_level) info->nesting_level--; else err ("invalid structure (bad nesting level)"); } else if (event == RFC822PARSE_BOUNDARY || event == RFC822PARSE_LAST_BOUNDARY) { info->show_data = 0; info->show_boundary = 1; if (event == RFC822PARSE_BOUNDARY) { info->show_header = 1; info->skip_show = 1; printf ("b part\n"); } else printf ("b last\n"); if (info->smfm_state == 2 && info->nesting_level == info->hashing_level) { printf ("c end_hash\n"); info->smfm_state++; info->hashing = 0; } else if (info->smfm_state == 4) { printf ("c end_signature\n"); info->verify_now = 1; } } return 0; } /* Read a message from FP and process it according to the global options. */ static void parse_message (FILE *fp) { char line[5000]; size_t length; rfc822parse_t msg; unsigned int lineno = 0; int no_cr_reported = 0; struct parse_info_s info; memset (&info, 0, sizeof info); msg = rfc822parse_open (message_cb, &info); if (!msg) die ("can't open parser: %s", strerror (errno)); /* Fixme: We should not use fgets because it can't cope with embedded nul characters. */ while (fgets (line, sizeof (line), fp)) { lineno++; if (lineno == 1 && !strncmp (line, "From ", 5)) continue; /* We better ignore a leading From line. */ length = strlen (line); if (length && line[length - 1] == '\n') line[--length] = 0; else err ("line number %u too long or last line not terminated", lineno); if (length && line[length - 1] == '\r') line[--length] = 0; else if (verbose && !no_cr_reported) { err ("non canonical ended line detected (line %u)", lineno); no_cr_reported = 1; } if (rfc822parse_insert (msg, line, length)) - die ("parser failed: %s", strerror (errno)); + die ("parser failed"); if (info.hashing) { /* Delay hashing of the CR/LF because the last line ending belongs to the next boundary. */ if (debug) printf ("# hashing %s'%s'\n", info.hashing==2?"CR,LF+":"", line); if (opt_crypto) { if (info.hashing == 2) fputs ("\r\n", info.hash_file); fputs (line, info.hash_file); if (ferror (info.hash_file)) die ("error writing to temporary file: %s", strerror (errno)); } info.hashing = 2; } if (info.sig_file && opt_crypto) { if (info.verify_now) { verify_signature (&info); if (info.hash_file) fclose (info.hash_file); info.hash_file = NULL; fclose (info.sig_file); info.sig_file = NULL; info.smfm_state = 0; info.is_smime = 0; info.is_pkcs7 = 0; } else { fputs (line, info.sig_file); fputs ("\r\n", info.sig_file); if (ferror (info.sig_file)) die ("error writing to temporary file: %s", strerror (errno)); } } if (info.show_boundary) { if (!opt_no_header) printf (":%s\n", line); info.show_boundary = 0; } if (info.skip_show) info.skip_show--; else if (info.show_data) { if (info.show_data_as_note) { if (verbose) printf ("# DATA: %s\n", line); info.show_data_as_note = 0; } else printf (" %s\n", line); } else if (info.show_header && !opt_no_header) printf (".%s\n", line); } if (info.sig_file && opt_crypto && info.is_pkcs7) { verify_signature (&info); fclose (info.sig_file); info.sig_file = NULL; info.is_pkcs7 = 0; } rfc822parse_close (msg); } int main (int argc, char **argv) { int last_argc = -1; if (argc) { argc--; argv++; } while (argc && last_argc != argc ) { last_argc = argc; if (!strcmp (*argv, "--")) { argc--; argv++; break; } else if (!strcmp (*argv, "--help")) { puts ( "Usage: " PGM " [OPTION] [FILE]\n" "Parse a mail message into an annotated format.\n\n" " --crypto decrypt or verify messages\n" " --no-header don't output the header lines\n" " --verbose enable extra informational output\n" " --debug enable additional debug output\n" " --help display this help and exit\n\n" "With no FILE, or when FILE is -, read standard input.\n\n" "WARNING: This tool is under development.\n" " The semantics may change without notice\n\n" "Report bugs to ."); exit (0); } else if (!strcmp (*argv, "--verbose")) { verbose = 1; argc--; argv++; } else if (!strcmp (*argv, "--debug")) { verbose = debug = 1; argc--; argv++; } else if (!strcmp (*argv, "--crypto")) { opt_crypto = 1; argc--; argv++; } else if (!strcmp (*argv, "--no-header")) { opt_no_header = 1; argc--; argv++; } } if (argc > 1) die ("usage: " PGM " [OPTION] [FILE] (try --help for more information)\n"); signal (SIGPIPE, SIG_IGN); if (argc && strcmp (*argv, "-")) { FILE *fp = fopen (*argv, "rb"); if (!fp) die ("can't open '%s': %s", *argv, strerror (errno)); parse_message (fp); fclose (fp); } else parse_message (stdin); return 0; } /* Local Variables: compile-command: "gcc -Wall -Wno-pointer-sign -g -o gpgparsemail rfc822parse.c gpgparsemail.c" End: */ diff --git a/tools/rfc822parse.c b/tools/rfc822parse.c index f1e95bd34..0280796fe 100644 --- a/tools/rfc822parse.c +++ b/tools/rfc822parse.c @@ -1,1331 +1,1337 @@ /* rfc822parse.c - Simple mail and MIME parser * Copyright (C) 1999, 2000 Werner Koch, Duesseldorf * Copyright (C) 2003, 2004 g10 Code GmbH * * This file is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, see . */ /* According to RFC822 binary zeroes are allowed at many places. We do * not handle this correct especially in the field parsing code. It * should be easy to fix and the API provides a interfaces which * returns the length but in addition makes sure that returned strings * are always ended by a \0. * * Furthermore, the case of field names is changed and thus it is not * always a good idea to use these modified header * lines (e.g. signatures may break). */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include "rfc822parse.h" /* All valid characters in a header name. */ #define HEADER_NAME_CHARS ("abcdefghijklmnopqrstuvwxyz" \ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ "-01234567890") enum token_type { tSPACE, tATOM, tQUOTED, tDOMAINLIT, tSPECIAL }; /* For now we directly use our TOKEN as the parse context */ typedef struct rfc822parse_field_context *TOKEN; struct rfc822parse_field_context { TOKEN next; enum token_type type; struct { unsigned int cont:1; unsigned int lowered:1; } flags; /*TOKEN owner_pantry; */ char data[1]; }; struct hdr_line { struct hdr_line *next; int cont; /* This is a continuation of the previous line. */ unsigned char line[1]; }; typedef struct hdr_line *HDR_LINE; struct part { struct part *right; /* The next part. */ struct part *down; /* A contained part. */ HDR_LINE hdr_lines; /* Header lines os that part. */ HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */ char *boundary; /* Only used in the first part. */ }; typedef struct part *part_t; struct rfc822parse_context { rfc822parse_cb_t callback; void *callback_value; int callback_error; int in_body; int in_preamble; /* Whether we are before the first boundary. */ part_t parts; /* The tree of parts. */ part_t current_part; /* Whom we are processing (points into parts). */ const char *boundary; /* Current boundary. */ }; static HDR_LINE find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE * rprev); static size_t length_sans_trailing_ws (const unsigned char *line, size_t len) { const unsigned char *p, *mark; size_t n; for (mark=NULL, p=line, n=0; n < len; n++, p++) { if (strchr (" \t\r\n", *p )) { if( !mark ) mark = p; } else mark = NULL; } if (mark) return mark - line; return len; } static void lowercase_string (unsigned char *string) { for (; *string; string++) if (*string >= 'A' && *string <= 'Z') *string = *string - 'A' + 'a'; } static int my_toupper (int c) { if (c >= 'a' && c <= 'z') c &= ~0x20; return c; } /* This is the same as ascii_strcasecmp. */ static int my_strcasecmp (const char *a, const char *b) { if (a == b) return 0; for (; *a && *b; a++, b++) { if (*a != *b && my_toupper(*a) != my_toupper(*b)) break; } return *a == *b? 0 : (my_toupper (*a) - my_toupper (*b)); } #ifndef HAVE_STPCPY static char * my_stpcpy (char *a,const char *b) { while (*b) *a++ = *b++; *a = 0; return (char*)a; } #define stpcpy my_stpcpy #endif /* If a callback has been registered, call it for the event of type EVENT. */ static int do_callback (rfc822parse_t msg, rfc822parse_event_t event) { int rc; if (!msg->callback || msg->callback_error) return 0; rc = msg->callback (msg->callback_value, event, msg); if (rc) msg->callback_error = rc; return rc; } static part_t new_part (void) { part_t part; part = calloc (1, sizeof *part); if (part) { part->hdr_lines_tail = &part->hdr_lines; } return part; } static void release_part (part_t part) { part_t tmp; HDR_LINE hdr, hdr2; for (; part; part = tmp) { tmp = part->right; if (part->down) release_part (part->down); for (hdr = part->hdr_lines; hdr; hdr = hdr2) { hdr2 = hdr->next; free (hdr); } free (part->boundary); free (part); } } static void release_handle_data (rfc822parse_t msg) { release_part (msg->parts); msg->parts = NULL; msg->current_part = NULL; msg->boundary = NULL; } /* Check that the header name is valid. We allow all lower and * uppercase letters and, except for the first character, digits and * the dash. The check stops at the first colon or at string end. * Returns true if the name is valid. */ int rfc822_valid_header_name_p (const char *name) { const char *s; size_t namelen; if ((s=strchr (name, ':'))) namelen = s - name; else namelen = strlen (name); if (!namelen || strspn (name, HEADER_NAME_CHARS) != namelen || strchr ("-0123456789", *name)) return 0; return 1; } /* Transform a header NAME into a standard capitalized format. * Conversion stops at the colon. */ void rfc822_capitalize_header_name (char *name) { unsigned char *p = name; int first = 1; /* Special cases first. */ if (!my_strcasecmp (name, "MIME-Version")) { strcpy (name, "MIME-Version"); return; } /* Regular cases. */ for (; *p && *p != ':'; p++) { if (*p == '-') first = 1; else if (first) { if (*p >= 'a' && *p <= 'z') *p = *p - 'a' + 'A'; first = 0; } else if (*p >= 'A' && *p <= 'Z') *p = *p - 'A' + 'a'; } } /* Create a new parsing context for an entire rfc822 message and return it. CB and CB_VALUE may be given to callback for certain events. NULL is returned on error with errno set appropriately. */ rfc822parse_t rfc822parse_open (rfc822parse_cb_t cb, void *cb_value) { rfc822parse_t msg = calloc (1, sizeof *msg); if (msg) { msg->parts = msg->current_part = new_part (); if (!msg->parts) { free (msg); msg = NULL; } else { msg->callback = cb; msg->callback_value = cb_value; if (do_callback (msg, RFC822PARSE_OPEN)) { release_handle_data (msg); free (msg); msg = NULL; } } } return msg; } void rfc822parse_cancel (rfc822parse_t msg) { if (msg) { do_callback (msg, RFC822PARSE_CANCEL); release_handle_data (msg); free (msg); } } void rfc822parse_close (rfc822parse_t msg) { if (msg) { do_callback (msg, RFC822PARSE_CLOSE); release_handle_data (msg); free (msg); } } static part_t find_parent (part_t tree, part_t target) { part_t part; for (part = tree->down; part; part = part->right) { if (part == target) return tree; /* Found. */ if (part->down) { part_t tmp = find_parent (part, target); if (tmp) return tmp; } } return NULL; } static void set_current_part_to_parent (rfc822parse_t msg) { part_t parent; assert (msg->current_part); parent = find_parent (msg->parts, msg->current_part); if (!parent) return; /* Already at the top. */ #ifndef NDEBUG { part_t part; for (part = parent->down; part; part = part->right) if (part == msg->current_part) break; assert (part); } #endif msg->current_part = parent; parent = find_parent (msg->parts, parent); msg->boundary = parent? parent->boundary: NULL; } /**************** * We have read in all header lines and are about to receive the body * part. The delimiter line has already been processed. * * FIXME: we's better return an error in case of memory failures. */ static int transition_to_body (rfc822parse_t msg) { rfc822parse_field_t ctx; int rc; rc = do_callback (msg, RFC822PARSE_T2BODY); if (!rc) { /* Store the boundary if we have multipart type. */ ctx = rfc822parse_parse_field (msg, "Content-Type", -1); if (ctx) { const char *s; s = rfc822parse_query_media_type (ctx, NULL); if (s && !strcmp (s,"multipart")) { s = rfc822parse_query_parameter (ctx, "boundary", 0); if (s) { - assert (!msg->current_part->boundary); + if (msg->current_part->boundary) + return -1; + msg->current_part->boundary = malloc (strlen (s) + 1); if (msg->current_part->boundary) { part_t part; strcpy (msg->current_part->boundary, s); msg->boundary = msg->current_part->boundary; part = new_part (); if (!part) { int save_errno = errno; rfc822parse_release_field (ctx); errno = save_errno; return -1; } rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN); - assert (!msg->current_part->down); + if (msg->current_part->down) + return -1; msg->current_part->down = part; msg->current_part = part; msg->in_preamble = 1; } } } rfc822parse_release_field (ctx); } } return rc; } /* We have just passed a MIME boundary and need to prepare for new part. headers. */ static int transition_to_header (rfc822parse_t msg) { part_t part; - assert (msg->current_part); - assert (!msg->current_part->right); + if (!(msg->current_part + && !msg->current_part->right)) + return -1; part = new_part (); if (!part) return -1; msg->current_part->right = part; msg->current_part = part; return 0; } static int insert_header (rfc822parse_t msg, const unsigned char *line, size_t length) { HDR_LINE hdr; - assert (msg->current_part); + if (!msg->current_part) + return -1; + if (!length) { msg->in_body = 1; return transition_to_body (msg); } if (!msg->current_part->hdr_lines) do_callback (msg, RFC822PARSE_BEGIN_HEADER); length = length_sans_trailing_ws (line, length); hdr = malloc (sizeof (*hdr) + length); if (!hdr) return -1; hdr->next = NULL; hdr->cont = (*line == ' ' || *line == '\t'); memcpy (hdr->line, line, length); hdr->line[length] = 0; /* Make it a string. */ /* Transform a field name into canonical format. */ if (!hdr->cont && strchr (line, ':')) rfc822_capitalize_header_name (hdr->line); *msg->current_part->hdr_lines_tail = hdr; msg->current_part->hdr_lines_tail = &hdr->next; /* Lets help the caller to prevent mail loops and issue an event for * every Received header. */ if (length >= 9 && !memcmp (line, "Received:", 9)) do_callback (msg, RFC822PARSE_RCVD_SEEN); return 0; } /**************** * Note: We handle the body transparent to allow binary zeroes in it. */ static int insert_body (rfc822parse_t msg, const unsigned char *line, size_t length) { int rc = 0; if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary) { size_t blen = strlen (msg->boundary); if (length == blen + 2 && !memcmp (line+2, msg->boundary, blen)) { rc = do_callback (msg, RFC822PARSE_BOUNDARY); msg->in_body = 0; if (!rc && !msg->in_preamble) rc = transition_to_header (msg); msg->in_preamble = 0; } else if (length == blen + 4 && line[length-2] =='-' && line[length-1] == '-' && !memcmp (line+2, msg->boundary, blen)) { rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY); msg->boundary = NULL; /* No current boundary anymore. */ set_current_part_to_parent (msg); /* Fixme: The next should actually be send right before the next boundary, so that we can mark the epilogue. */ if (!rc) rc = do_callback (msg, RFC822PARSE_LEVEL_UP); } } if (msg->in_preamble && !rc) rc = do_callback (msg, RFC822PARSE_PREAMBLE); return rc; } /* Insert the next line into the parser. Return 0 on success or true on error with errno set appropriately. */ int rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length) { return (msg->in_body ? insert_body (msg, line, length) : insert_header (msg, line, length)); } /* Tell the parser that we have finished the message. */ int rfc822parse_finish (rfc822parse_t msg) { return do_callback (msg, RFC822PARSE_FINISH); } /**************** * Get a copy of a header line. The line is returned as one long * string with LF to separate the continuation line. Caller must free * the return buffer. WHICH may be used to enumerate over all lines. * Wildcards are allowed. This function works on the current headers; * i.e. the regular mail headers or the MIME headers of the current * part. * * WHICH gives the mode: * -1 := Take the last occurrence * n := Take the n-th one. * * Returns a newly allocated buffer or NULL on error. errno is set in * case of a memory failure or set to 0 if the requested field is not * available. * * If VALUEOFF is not NULL it will receive the offset of the first non * space character in the value part of the line (i.e. after the first * colon). */ char * rfc822parse_get_field (rfc822parse_t msg, const char *name, int which, size_t *valueoff) { HDR_LINE h, h2; char *buf, *p; size_t n; h = find_header (msg, name, which, NULL); if (!h) { errno = 0; return NULL; /* no such field */ } n = strlen (h->line) + 1; for (h2 = h->next; h2 && h2->cont; h2 = h2->next) n += strlen (h2->line) + 1; buf = p = malloc (n); if (buf) { p = stpcpy (p, h->line); *p++ = '\n'; for (h2 = h->next; h2 && h2->cont; h2 = h2->next) { p = stpcpy (p, h2->line); *p++ = '\n'; } p[-1] = 0; } if (valueoff) { p = strchr (buf, ':'); if (!p) *valueoff = 0; /* Oops: should never happen. */ else { p++; while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') p++; *valueoff = p - buf; } } return buf; } /**************** * Enumerate all header. Caller has to provide the address of a pointer * which has to be initialized to NULL, the caller should then never change this * pointer until he has closed the enumeration by passing again the address * of the pointer but with msg set to NULL. * The function returns pointers to all the header lines or NULL when * all lines have been enumerated or no headers are available. */ const char * rfc822parse_enum_header_lines (rfc822parse_t msg, void **context) { HDR_LINE l; if (!msg) /* Close. */ return NULL; if (*context == msg || !msg->current_part) return NULL; l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines; if (l) { *context = l->next ? (void *) (l->next) : (void *) msg; return l->line; } *context = msg; /* Mark end of list. */ return NULL; } /**************** * Find a header field. If the Name does end in an asterisk this is meant * to be a wildcard. * * which -1 : Retrieve the last field * >0 : Retrieve the n-th field * RPREV may be used to return the predecessor of the returned field; * which may be NULL for the very first one. It has to be initialized * to either NULL in which case the search start at the first header line, * or it may point to a headerline, where the search should start */ static HDR_LINE find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev) { HDR_LINE hdr, prev = NULL, mark = NULL; unsigned char *p; size_t namelen, n; int found = 0; int glob = 0; if (!msg->current_part) return NULL; namelen = strlen (name); if (namelen && name[namelen - 1] == '*') { namelen--; glob = 1; } hdr = msg->current_part->hdr_lines; if (rprev && *rprev) { /* spool forward to the requested starting place. * we cannot simply set this as we have to return * the previous list element too */ for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next) ; } for (; hdr; prev = hdr, hdr = hdr->next) { if (hdr->cont) continue; if (!(p = strchr (hdr->line, ':'))) continue; /* invalid header, just skip it. */ n = p - hdr->line; if (!n) continue; /* invalid name */ if ((glob ? (namelen <= n) : (namelen == n)) && !memcmp (hdr->line, name, namelen)) { found++; if (which == -1) mark = hdr; else if (found == which) { if (rprev) *rprev = prev; return hdr; } } } if (mark && rprev) *rprev = prev; return mark; } static const char * skip_ws (const char *s) { while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') s++; return s; } static void release_token_list (TOKEN t) { while (t) { TOKEN t2 = t->next; /* fixme: If we have owner_pantry, put the token back to * this pantry so that it can be reused later */ free (t); t = t2; } } static TOKEN new_token (enum token_type type, const char *buf, size_t length) { TOKEN t; /* fixme: look through our pantries to find a suitable * token for reuse */ t = malloc (sizeof *t + length); if (t) { t->next = NULL; t->type = type; memset (&t->flags, 0, sizeof (t->flags)); t->data[0] = 0; if (buf) { memcpy (t->data, buf, length); t->data[length] = 0; /* Make sure it is a C string. */ } else t->data[0] = 0; } return t; } static TOKEN append_to_token (TOKEN old, const char *buf, size_t length) { size_t n = strlen (old->data); TOKEN t; t = malloc (sizeof *t + n + length); if (t) { t->next = old->next; t->type = old->type; t->flags = old->flags; memcpy (t->data, old->data, n); memcpy (t->data + n, buf, length); t->data[n + length] = 0; old->next = NULL; release_token_list (old); } return t; } /* Parse a field into tokens as defined by rfc822. */ static TOKEN parse_field (HDR_LINE hdr) { static const char specials[] = "<>@.,;:\\[]\"()"; static const char specials2[] = "<>@.,;:"; static const char tspecials[] = "/?=<>@,;:\\[]\"()"; static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really include '.'?*/ static struct { const unsigned char *name; size_t namelen; } tspecial_header[] = { { "Content-Type", 12}, { "Content-Transfer-Encoding", 25}, { "Content-Disposition", 19}, { NULL, 0} }; const char *delimiters; const char *delimiters2; const unsigned char *line, *s, *s2; size_t n; int i, invalid = 0; TOKEN t, tok, *tok_tail; errno = 0; if (!hdr) return NULL; tok = NULL; tok_tail = &tok; line = hdr->line; if (!(s = strchr (line, ':'))) return NULL; /* oops */ n = s - line; if (!n) return NULL; /* oops: invalid name */ delimiters = specials; delimiters2 = specials2; for (i = 0; tspecial_header[i].name; i++) { if (n == tspecial_header[i].namelen && !memcmp (line, tspecial_header[i].name, n)) { delimiters = tspecials; delimiters2 = tspecials2; break; } } s++; /* Move over the colon. */ for (;;) { while (!*s) { if (!hdr->next || !hdr->next->cont) return tok; /* Ready. */ /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } if (*s == '(') { int level = 1; int in_quote = 0; invalid = 0; for (s++;; s++) { while (!*s) { if (!hdr->next || !hdr->next->cont) goto oparen_out; /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } if (in_quote) { if (*s == '\"') in_quote = 0; else if (*s == '\\' && s[1]) /* what about continuation? */ s++; } else if (*s == ')') { if (!--level) break; } else if (*s == '(') level++; else if (*s == '\"') in_quote = 1; } oparen_out: if (!*s) ; /* Actually this is an error, but we don't care about it. */ else s++; } else if (*s == '\"' || *s == '[') { /* We do not check for non-allowed nesting of domainliterals */ int term = *s == '\"' ? '\"' : ']'; invalid = 0; s++; t = NULL; for (;;) { for (s2 = s; *s2; s2++) { if (*s2 == term) break; else if (*s2 == '\\' && s2[1]) /* what about continuation? */ s2++; } t = (t ? append_to_token (t, s, s2 - s) : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s)); if (!t) goto failure; if (*s2 || !hdr->next || !hdr->next->cont) break; /* Next item is a header continuation line. */ hdr = hdr->next; s = hdr->line; } *tok_tail = t; tok_tail = &t->next; s = s2; if (*s) s++; /* skip the delimiter */ } else if ((s2 = strchr (delimiters2, *s))) { /* Special characters which are not handled above. */ invalid = 0; t = new_token (tSPECIAL, s, 1); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; s++; } else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') { invalid = 0; s = skip_ws (s + 1); } else if (*s > 0x20 && !(*s & 128)) { /* Atom. */ invalid = 0; for (s2 = s + 1; *s2 > 0x20 && !(*s2 & 128) && !strchr (delimiters, *s2); s2++) ; t = new_token (tATOM, s, s2 - s); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; s = s2; } else { /* Invalid character. */ if (!invalid) { /* For parsing we assume only one space. */ t = new_token (tSPACE, NULL, 0); if (!t) goto failure; *tok_tail = t; tok_tail = &t->next; invalid = 1; } s++; } } /*NOTREACHED*/ failure: { int save = errno; release_token_list (tok); errno = save; } return NULL; } /**************** * Find and parse a header field. * WHICH indicates what to do if there are multiple instance of the same * field (like "Received"); the following value are defined: * -1 := Take the last occurrence * 0 := Reserved * n := Take the n-th one. * Returns a handle for further operations on the parse context of the field * or NULL if the field was not found. */ rfc822parse_field_t rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which) { HDR_LINE hdr; if (!which) return NULL; hdr = find_header (msg, name, which, NULL); if (!hdr) return NULL; return parse_field (hdr); } void rfc822parse_release_field (rfc822parse_field_t ctx) { if (ctx) release_token_list (ctx); } /**************** * Check whether T points to a parameter. * A parameter starts with a semicolon and it is assumed that t * points to exactly this one. */ static int is_parameter (TOKEN t) { t = t->next; if (!t || t->type != tATOM) return 0; t = t->next; if (!t || !(t->type == tSPECIAL && t->data[0] == '=')) return 0; t = t->next; if (!t) return 1; /* We assume that an non existing value is an empty one. */ return t->type == tQUOTED || t->type == tATOM; } /* Some header (Content-type) have a special syntax where attribute=value pairs are used after a leading semicolon. The parse_field code knows about these fields and changes the parsing to the one defined in RFC2045. Returns a pointer to the value which is valid as long as the parse context is valid; NULL is returned in case that attr is not defined in the header, a missing value is reppresented by an empty string. With LOWER_VALUE set to true, a matching field value will be lowercased. Note, that ATTR should be lowercase. */ const char * rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr, int lower_value) { TOKEN t, a; for (t = ctx; t; t = t->next) { /* skip to the next semicolon */ for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next) ; if (!t) return NULL; if (is_parameter (t)) { /* Look closer. */ a = t->next; /* We know that this is an atom */ if ( !a->flags.lowered ) { lowercase_string (a->data); a->flags.lowered = 1; } if (!strcmp (a->data, attr)) { /* found */ t = a->next->next; /* Either T is now an atom, a quoted string or NULL in * which case we return an empty string. */ if ( lower_value && t && !t->flags.lowered ) { lowercase_string (t->data); t->flags.lowered = 1; } return t ? t->data : ""; } } } return NULL; } /**************** * This function may be used for the Content-Type header to figure out * the media type and subtype. Note, that the returned strings are * guaranteed to be lowercase as required by MIME. * * Returns: a pointer to the media type and if subtype is not NULL, * a pointer to the subtype. */ const char * rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype) { TOKEN t = ctx; const char *type; if (t->type != tATOM) return NULL; if (!t->flags.lowered) { lowercase_string (t->data); t->flags.lowered = 1; } type = t->data; t = t->next; if (!t || t->type != tSPECIAL || t->data[0] != '/') return NULL; t = t->next; if (!t || t->type != tATOM) return NULL; if (subtype) { if (!t->flags.lowered) { lowercase_string (t->data); t->flags.lowered = 1; } *subtype = t->data; } return type; } #ifdef TESTING /* Internal debug function to print the structure of the message. */ static void dump_structure (rfc822parse_t msg, part_t part, int indent) { if (!part) { printf ("*** Structure of this message:\n"); part = msg->parts; } for (; part; part = part->right) { rfc822parse_field_t ctx; part_t save_part; /* ugly hack - we should have a function to get part information. */ const char *s; save_part = msg->current_part; msg->current_part = part; ctx = rfc822parse_parse_field (msg, "Content-Type", -1); msg->current_part = save_part; if (ctx) { const char *s1, *s2; s1 = rfc822parse_query_media_type (ctx, &s2); if (s1) printf ("*** %*s %s/%s", indent*2, "", s1, s2); else printf ("*** %*s [not found]", indent*2, ""); s = rfc822parse_query_parameter (ctx, "boundary", 0); if (s) printf (" (boundary=\"%s\")", s); rfc822parse_release_field (ctx); } else printf ("*** %*s text/plain [assumed]", indent*2, ""); putchar('\n'); if (part->down) dump_structure (msg, part->down, indent + 1); } } static void show_param (rfc822parse_field_t ctx, const char *name) { const char *s; if (!ctx) return; s = rfc822parse_query_parameter (ctx, name, 0); if (s) printf ("*** %s: '%s'\n", name, s); } static void show_event (rfc822parse_event_t event) { const char *s; switch (event) { case RFC822PARSE_OPEN: s= "Open"; break; case RFC822PARSE_CLOSE: s= "Close"; break; case RFC822PARSE_CANCEL: s= "Cancel"; break; case RFC822PARSE_T2BODY: s= "T2Body"; break; case RFC822PARSE_FINISH: s= "Finish"; break; case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break; case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break; case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break; case RFC822PARSE_BOUNDARY: s= "Boundary"; break; case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break; case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break; case RFC822PARSE_PREAMBLE: s= "Preamble"; break; case RFC822PARSE_EPILOGUE: s= "Epilogue"; break; default: s= "***invalid event***"; break; } printf ("*** got RFC822 event %s\n", s); } static int msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg) { show_event (event); if (event == RFC822PARSE_T2BODY) { rfc822parse_field_t ctx; void *ectx; const char *line; for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); ) { printf ("*** HDR: %s\n", line); } rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */ ctx = rfc822parse_parse_field (msg, "Content-Type", -1); if (ctx) { const char *s1, *s2; s1 = rfc822parse_query_media_type (ctx, &s2); if (s1) printf ("*** media: '%s/%s'\n", s1, s2); else printf ("*** media: [not found]\n"); show_param (ctx, "boundary"); show_param (ctx, "protocol"); rfc822parse_release_field (ctx); } else printf ("*** media: text/plain [assumed]\n"); } return 0; } int main (int argc, char **argv) { char line[5000]; size_t length; rfc822parse_t msg; msg = rfc822parse_open (msg_cb, NULL); if (!msg) abort (); while (fgets (line, sizeof (line), stdin)) { length = strlen (line); if (length && line[length - 1] == '\n') line[--length] = 0; if (length && line[length - 1] == '\r') line[--length] = 0; if (rfc822parse_insert (msg, line, length)) abort (); } dump_structure (msg, NULL, 0); rfc822parse_close (msg); return 0; } #endif /* Local Variables: compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c" End: */