Page Menu
Home
GnuPG
Search
Configure Global Search
Log In
Files
F35221062
rfc822parse.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Size
30 KB
Subscribers
None
rfc822parse.c
View Options
/* rfc822parse.c - Simple mail and MIME parser
* Copyright (C) 1999, 2000 Werner Koch, Duesseldorf
* Copyright (C) 2003, 2004 g10 Code GmbH
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This file is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see <https://www.gnu.org/licenses/>.
*/
/* According to RFC822 binary zeroes are allowed at many places. We do
* not handle this correct especially in the field parsing code. It
* should be easy to fix and the API provides a interfaces which
* returns the length but in addition makes sure that returned strings
* are always ended by a \0.
*
* Furthermore, the case of field names is changed and thus it is not
* always a good idea to use these modified header
* lines (e.g. signatures may break).
*/
#ifdef HAVE_CONFIG_H
#include
<config.h>
#endif
#include
<stdlib.h>
#include
<stdio.h>
#include
<string.h>
#include
<errno.h>
#include
<stdarg.h>
#include
<assert.h>
#include
"rfc822parse.h"
/* All valid characters in a header name. */
#define HEADER_NAME_CHARS ("abcdefghijklmnopqrstuvwxyz" \
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
"-01234567890")
enum
token_type
{
tSPACE
,
tATOM
,
tQUOTED
,
tDOMAINLIT
,
tSPECIAL
};
/* For now we directly use our TOKEN as the parse context */
typedef
struct
rfc822parse_field_context
*
TOKEN
;
struct
rfc822parse_field_context
{
TOKEN
next
;
enum
token_type
type
;
struct
{
unsigned
int
cont
:
1
;
unsigned
int
lowered
:
1
;
}
flags
;
/*TOKEN owner_pantry; */
char
data
[
1
];
};
struct
hdr_line
{
struct
hdr_line
*
next
;
int
cont
;
/* This is a continuation of the previous line. */
unsigned
char
line
[
1
];
};
typedef
struct
hdr_line
*
HDR_LINE
;
struct
part
{
struct
part
*
right
;
/* The next part. */
struct
part
*
down
;
/* A contained part. */
HDR_LINE
hdr_lines
;
/* Header lines os that part. */
HDR_LINE
*
hdr_lines_tail
;
/* Helper for adding lines. */
char
*
boundary
;
/* Only used in the first part. */
};
typedef
struct
part
*
part_t
;
struct
rfc822parse_context
{
rfc822parse_cb_t
callback
;
void
*
callback_value
;
int
callback_error
;
int
in_body
;
int
in_preamble
;
/* Wether we are before the first boundary. */
part_t
parts
;
/* The tree of parts. */
part_t
current_part
;
/* Whom we are processing (points into parts). */
const
char
*
boundary
;
/* Current boundary. */
};
static
HDR_LINE
find_header
(
rfc822parse_t
msg
,
const
char
*
name
,
int
which
,
HDR_LINE
*
rprev
);
static
size_t
length_sans_trailing_ws
(
const
unsigned
char
*
line
,
size_t
len
)
{
const
unsigned
char
*
p
,
*
mark
;
size_t
n
;
for
(
mark
=
NULL
,
p
=
line
,
n
=
0
;
n
<
len
;
n
++
,
p
++
)
{
if
(
strchr
(
"
\t\r\n
"
,
*
p
))
{
if
(
!
mark
)
mark
=
p
;
}
else
mark
=
NULL
;
}
if
(
mark
)
return
mark
-
line
;
return
len
;
}
static
void
lowercase_string
(
unsigned
char
*
string
)
{
for
(;
*
string
;
string
++
)
if
(
*
string
>=
'A'
&&
*
string
<=
'Z'
)
*
string
=
*
string
-
'A'
+
'a'
;
}
static
int
my_toupper
(
int
c
)
{
if
(
c
>=
'a'
&&
c
<=
'z'
)
c
&=
~
0x20
;
return
c
;
}
/* This is the same as ascii_strcasecmp. */
static
int
my_strcasecmp
(
const
char
*
a
,
const
char
*
b
)
{
if
(
a
==
b
)
return
0
;
for
(;
*
a
&&
*
b
;
a
++
,
b
++
)
{
if
(
*
a
!=
*
b
&&
my_toupper
(
*
a
)
!=
my_toupper
(
*
b
))
break
;
}
return
*
a
==
*
b
?
0
:
(
my_toupper
(
*
a
)
-
my_toupper
(
*
b
));
}
#ifndef HAVE_STPCPY
static
char
*
my_stpcpy
(
char
*
a
,
const
char
*
b
)
{
while
(
*
b
)
*
a
++
=
*
b
++
;
*
a
=
0
;
return
(
char
*
)
a
;
}
#define stpcpy my_stpcpy
#endif
/* If a callback has been registerd, call it for the event of type
EVENT. */
static
int
do_callback
(
rfc822parse_t
msg
,
rfc822parse_event_t
event
)
{
int
rc
;
if
(
!
msg
->
callback
||
msg
->
callback_error
)
return
0
;
rc
=
msg
->
callback
(
msg
->
callback_value
,
event
,
msg
);
if
(
rc
)
msg
->
callback_error
=
rc
;
return
rc
;
}
static
part_t
new_part
(
void
)
{
part_t
part
;
part
=
calloc
(
1
,
sizeof
*
part
);
if
(
part
)
{
part
->
hdr_lines_tail
=
&
part
->
hdr_lines
;
}
return
part
;
}
static
void
release_part
(
part_t
part
)
{
part_t
tmp
;
HDR_LINE
hdr
,
hdr2
;
for
(;
part
;
part
=
tmp
)
{
tmp
=
part
->
right
;
if
(
part
->
down
)
release_part
(
part
->
down
);
for
(
hdr
=
part
->
hdr_lines
;
hdr
;
hdr
=
hdr2
)
{
hdr2
=
hdr
->
next
;
free
(
hdr
);
}
free
(
part
->
boundary
);
free
(
part
);
}
}
static
void
release_handle_data
(
rfc822parse_t
msg
)
{
release_part
(
msg
->
parts
);
msg
->
parts
=
NULL
;
msg
->
current_part
=
NULL
;
msg
->
boundary
=
NULL
;
}
/* Check that the header name is valid. We allow all lower and
* uppercase letters and, except for the first character, digits and
* the dash. The check stops at the first colon or at string end.
* Returns true if the name is valid. */
int
rfc822_valid_header_name_p
(
const
char
*
name
)
{
const
char
*
s
;
size_t
namelen
;
if
((
s
=
strchr
(
name
,
':'
)))
namelen
=
s
-
name
;
else
namelen
=
strlen
(
name
);
if
(
!
namelen
||
strspn
(
name
,
HEADER_NAME_CHARS
)
!=
namelen
||
strchr
(
"-0123456789"
,
*
name
))
return
0
;
return
1
;
}
/* Transform a header NAME into a standard capitalized format.
* Conversion stops at the colon. */
void
rfc822_capitalize_header_name
(
char
*
name
)
{
unsigned
char
*
p
=
name
;
int
first
=
1
;
/* Special cases first. */
if
(
!
my_strcasecmp
(
name
,
"MIME-Version"
))
{
strcpy
(
name
,
"MIME-Version"
);
return
;
}
/* Regular cases. */
for
(;
*
p
&&
*
p
!=
':'
;
p
++
)
{
if
(
*
p
==
'-'
)
first
=
1
;
else
if
(
first
)
{
if
(
*
p
>=
'a'
&&
*
p
<=
'z'
)
*
p
=
*
p
-
'a'
+
'A'
;
first
=
0
;
}
else
if
(
*
p
>=
'A'
&&
*
p
<=
'Z'
)
*
p
=
*
p
-
'A'
+
'a'
;
}
}
/* Create a new parsing context for an entire rfc822 message and
return it. CB and CB_VALUE may be given to callback for certain
events. NULL is returned on error with errno set appropriately. */
rfc822parse_t
rfc822parse_open
(
rfc822parse_cb_t
cb
,
void
*
cb_value
)
{
rfc822parse_t
msg
=
calloc
(
1
,
sizeof
*
msg
);
if
(
msg
)
{
msg
->
parts
=
msg
->
current_part
=
new_part
();
if
(
!
msg
->
parts
)
{
free
(
msg
);
msg
=
NULL
;
}
else
{
msg
->
callback
=
cb
;
msg
->
callback_value
=
cb_value
;
if
(
do_callback
(
msg
,
RFC822PARSE_OPEN
))
{
release_handle_data
(
msg
);
free
(
msg
);
msg
=
NULL
;
}
}
}
return
msg
;
}
void
rfc822parse_cancel
(
rfc822parse_t
msg
)
{
if
(
msg
)
{
do_callback
(
msg
,
RFC822PARSE_CANCEL
);
release_handle_data
(
msg
);
free
(
msg
);
}
}
void
rfc822parse_close
(
rfc822parse_t
msg
)
{
if
(
msg
)
{
do_callback
(
msg
,
RFC822PARSE_CLOSE
);
release_handle_data
(
msg
);
free
(
msg
);
}
}
static
part_t
find_parent
(
part_t
tree
,
part_t
target
)
{
part_t
part
;
for
(
part
=
tree
->
down
;
part
;
part
=
part
->
right
)
{
if
(
part
==
target
)
return
tree
;
/* Found. */
if
(
part
->
down
)
{
part_t
tmp
=
find_parent
(
part
,
target
);
if
(
tmp
)
return
tmp
;
}
}
return
NULL
;
}
static
void
set_current_part_to_parent
(
rfc822parse_t
msg
)
{
part_t
parent
;
assert
(
msg
->
current_part
);
parent
=
find_parent
(
msg
->
parts
,
msg
->
current_part
);
if
(
!
parent
)
return
;
/* Already at the top. */
#ifndef NDEBUG
{
part_t
part
;
for
(
part
=
parent
->
down
;
part
;
part
=
part
->
right
)
if
(
part
==
msg
->
current_part
)
break
;
assert
(
part
);
}
#endif
msg
->
current_part
=
parent
;
parent
=
find_parent
(
msg
->
parts
,
parent
);
msg
->
boundary
=
parent
?
parent
->
boundary
:
NULL
;
}
/****************
* We have read in all header lines and are about to receive the body
* part. The delimiter line has already been processed.
*
* FIXME: we's better return an error in case of memory failures.
*/
static
int
transition_to_body
(
rfc822parse_t
msg
)
{
rfc822parse_field_t
ctx
;
int
rc
;
rc
=
do_callback
(
msg
,
RFC822PARSE_T2BODY
);
if
(
!
rc
)
{
/* Store the boundary if we have multipart type. */
ctx
=
rfc822parse_parse_field
(
msg
,
"Content-Type"
,
-1
);
if
(
ctx
)
{
const
char
*
s
;
s
=
rfc822parse_query_media_type
(
ctx
,
NULL
);
if
(
s
&&
!
strcmp
(
s
,
"multipart"
))
{
s
=
rfc822parse_query_parameter
(
ctx
,
"boundary"
,
0
);
if
(
s
)
{
assert
(
!
msg
->
current_part
->
boundary
);
msg
->
current_part
->
boundary
=
malloc
(
strlen
(
s
)
+
1
);
if
(
msg
->
current_part
->
boundary
)
{
part_t
part
;
strcpy
(
msg
->
current_part
->
boundary
,
s
);
msg
->
boundary
=
msg
->
current_part
->
boundary
;
part
=
new_part
();
if
(
!
part
)
{
int
save_errno
=
errno
;
rfc822parse_release_field
(
ctx
);
errno
=
save_errno
;
return
-1
;
}
rc
=
do_callback
(
msg
,
RFC822PARSE_LEVEL_DOWN
);
assert
(
!
msg
->
current_part
->
down
);
msg
->
current_part
->
down
=
part
;
msg
->
current_part
=
part
;
msg
->
in_preamble
=
1
;
}
}
}
rfc822parse_release_field
(
ctx
);
}
}
return
rc
;
}
/* We have just passed a MIME boundary and need to prepare for new part.
headers. */
static
int
transition_to_header
(
rfc822parse_t
msg
)
{
part_t
part
;
assert
(
msg
->
current_part
);
assert
(
!
msg
->
current_part
->
right
);
part
=
new_part
();
if
(
!
part
)
return
-1
;
msg
->
current_part
->
right
=
part
;
msg
->
current_part
=
part
;
return
0
;
}
static
int
insert_header
(
rfc822parse_t
msg
,
const
unsigned
char
*
line
,
size_t
length
)
{
HDR_LINE
hdr
;
assert
(
msg
->
current_part
);
if
(
!
length
)
{
msg
->
in_body
=
1
;
return
transition_to_body
(
msg
);
}
if
(
!
msg
->
current_part
->
hdr_lines
)
do_callback
(
msg
,
RFC822PARSE_BEGIN_HEADER
);
length
=
length_sans_trailing_ws
(
line
,
length
);
hdr
=
malloc
(
sizeof
(
*
hdr
)
+
length
);
if
(
!
hdr
)
return
-1
;
hdr
->
next
=
NULL
;
hdr
->
cont
=
(
*
line
==
' '
||
*
line
==
'\t'
);
memcpy
(
hdr
->
line
,
line
,
length
);
hdr
->
line
[
length
]
=
0
;
/* Make it a string. */
/* Transform a field name into canonical format. */
if
(
!
hdr
->
cont
&&
strchr
(
line
,
':'
))
rfc822_capitalize_header_name
(
hdr
->
line
);
*
msg
->
current_part
->
hdr_lines_tail
=
hdr
;
msg
->
current_part
->
hdr_lines_tail
=
&
hdr
->
next
;
/* Lets help the caller to prevent mail loops and issue an event for
* every Received header. */
if
(
length
>=
9
&&
!
memcmp
(
line
,
"Received:"
,
9
))
do_callback
(
msg
,
RFC822PARSE_RCVD_SEEN
);
return
0
;
}
/****************
* Note: We handle the body transparent to allow binary zeroes in it.
*/
static
int
insert_body
(
rfc822parse_t
msg
,
const
unsigned
char
*
line
,
size_t
length
)
{
int
rc
=
0
;
if
(
length
>
2
&&
*
line
==
'-'
&&
line
[
1
]
==
'-'
&&
msg
->
boundary
)
{
size_t
blen
=
strlen
(
msg
->
boundary
);
if
(
length
==
blen
+
2
&&
!
memcmp
(
line
+
2
,
msg
->
boundary
,
blen
))
{
rc
=
do_callback
(
msg
,
RFC822PARSE_BOUNDARY
);
msg
->
in_body
=
0
;
if
(
!
rc
&&
!
msg
->
in_preamble
)
rc
=
transition_to_header
(
msg
);
msg
->
in_preamble
=
0
;
}
else
if
(
length
==
blen
+
4
&&
line
[
length
-2
]
==
'-'
&&
line
[
length
-1
]
==
'-'
&&
!
memcmp
(
line
+
2
,
msg
->
boundary
,
blen
))
{
rc
=
do_callback
(
msg
,
RFC822PARSE_LAST_BOUNDARY
);
msg
->
boundary
=
NULL
;
/* No current boundary anymore. */
set_current_part_to_parent
(
msg
);
/* Fixme: The next should actually be send right before the
next boundary, so that we can mark the epilogue. */
if
(
!
rc
)
rc
=
do_callback
(
msg
,
RFC822PARSE_LEVEL_UP
);
}
}
if
(
msg
->
in_preamble
&&
!
rc
)
rc
=
do_callback
(
msg
,
RFC822PARSE_PREAMBLE
);
return
rc
;
}
/* Insert the next line into the parser. Return 0 on success or true
on error with errno set appropriately. */
int
rfc822parse_insert
(
rfc822parse_t
msg
,
const
unsigned
char
*
line
,
size_t
length
)
{
return
(
msg
->
in_body
?
insert_body
(
msg
,
line
,
length
)
:
insert_header
(
msg
,
line
,
length
));
}
/* Tell the parser that we have finished the message. */
int
rfc822parse_finish
(
rfc822parse_t
msg
)
{
return
do_callback
(
msg
,
RFC822PARSE_FINISH
);
}
/****************
* Get a copy of a header line. The line is returned as one long
* string with LF to separate the continuation line. Caller must free
* the return buffer. WHICH may be used to enumerate over all lines.
* Wildcards are allowed. This function works on the current headers;
* i.e. the regular mail headers or the MIME headers of the current
* part.
*
* WHICH gives the mode:
* -1 := Take the last occurrence
* n := Take the n-th one.
*
* Returns a newly allocated buffer or NULL on error. errno is set in
* case of a memory failure or set to 0 if the requested field is not
* available.
*
* If VALUEOFF is not NULL it will receive the offset of the first non
* space character in the value part of the line (i.e. after the first
* colon).
*/
char
*
rfc822parse_get_field
(
rfc822parse_t
msg
,
const
char
*
name
,
int
which
,
size_t
*
valueoff
)
{
HDR_LINE
h
,
h2
;
char
*
buf
,
*
p
;
size_t
n
;
h
=
find_header
(
msg
,
name
,
which
,
NULL
);
if
(
!
h
)
{
errno
=
0
;
return
NULL
;
/* no such field */
}
n
=
strlen
(
h
->
line
)
+
1
;
for
(
h2
=
h
->
next
;
h2
&&
h2
->
cont
;
h2
=
h2
->
next
)
n
+=
strlen
(
h2
->
line
)
+
1
;
buf
=
p
=
malloc
(
n
);
if
(
buf
)
{
p
=
stpcpy
(
p
,
h
->
line
);
*
p
++
=
'\n'
;
for
(
h2
=
h
->
next
;
h2
&&
h2
->
cont
;
h2
=
h2
->
next
)
{
p
=
stpcpy
(
p
,
h2
->
line
);
*
p
++
=
'\n'
;
}
p
[
-1
]
=
0
;
}
if
(
valueoff
)
{
p
=
strchr
(
buf
,
':'
);
if
(
!
p
)
*
valueoff
=
0
;
/* Oops: should never happen. */
else
{
p
++
;
while
(
*
p
==
' '
||
*
p
==
'\t'
||
*
p
==
'\r'
||
*
p
==
'\n'
)
p
++
;
*
valueoff
=
p
-
buf
;
}
}
return
buf
;
}
/****************
* Enumerate all header. Caller has to provide the address of a pointer
* which has to be initialzed to NULL, the caller should then never change this
* pointer until he has closed the enumeration by passing again the address
* of the pointer but with msg set to NULL.
* The function returns pointers to all the header lines or NULL when
* all lines have been enumerated or no headers are available.
*/
const
char
*
rfc822parse_enum_header_lines
(
rfc822parse_t
msg
,
void
**
context
)
{
HDR_LINE
l
;
if
(
!
msg
)
/* Close. */
return
NULL
;
if
(
*
context
==
msg
||
!
msg
->
current_part
)
return
NULL
;
l
=
*
context
?
(
HDR_LINE
)
*
context
:
msg
->
current_part
->
hdr_lines
;
if
(
l
)
{
*
context
=
l
->
next
?
(
void
*
)
(
l
->
next
)
:
(
void
*
)
msg
;
return
l
->
line
;
}
*
context
=
msg
;
/* Mark end of list. */
return
NULL
;
}
/****************
* Find a header field. If the Name does end in an asterisk this is meant
* to be a wildcard.
*
* which -1 : Retrieve the last field
* >0 : Retrieve the n-th field
* RPREV may be used to return the predecessor of the returned field;
* which may be NULL for the very first one. It has to be initialzed
* to either NULL in which case the search start at the first header line,
* or it may point to a headerline, where the search should start
*/
static
HDR_LINE
find_header
(
rfc822parse_t
msg
,
const
char
*
name
,
int
which
,
HDR_LINE
*
rprev
)
{
HDR_LINE
hdr
,
prev
=
NULL
,
mark
=
NULL
;
unsigned
char
*
p
;
size_t
namelen
,
n
;
int
found
=
0
;
int
glob
=
0
;
if
(
!
msg
->
current_part
)
return
NULL
;
namelen
=
strlen
(
name
);
if
(
namelen
&&
name
[
namelen
-
1
]
==
'*'
)
{
namelen
--
;
glob
=
1
;
}
hdr
=
msg
->
current_part
->
hdr_lines
;
if
(
rprev
&&
*
rprev
)
{
/* spool forward to the requested starting place.
* we cannot simply set this as we have to return
* the previous list element too */
for
(;
hdr
&&
hdr
!=
*
rprev
;
prev
=
hdr
,
hdr
=
hdr
->
next
)
;
}
for
(;
hdr
;
prev
=
hdr
,
hdr
=
hdr
->
next
)
{
if
(
hdr
->
cont
)
continue
;
if
(
!
(
p
=
strchr
(
hdr
->
line
,
':'
)))
continue
;
/* invalid header, just skip it. */
n
=
p
-
hdr
->
line
;
if
(
!
n
)
continue
;
/* invalid name */
if
((
glob
?
(
namelen
<=
n
)
:
(
namelen
==
n
))
&&
!
memcmp
(
hdr
->
line
,
name
,
namelen
))
{
found
++
;
if
(
which
==
-1
)
mark
=
hdr
;
else
if
(
found
==
which
)
{
if
(
rprev
)
*
rprev
=
prev
;
return
hdr
;
}
}
}
if
(
mark
&&
rprev
)
*
rprev
=
prev
;
return
mark
;
}
static
const
char
*
skip_ws
(
const
char
*
s
)
{
while
(
*
s
==
' '
||
*
s
==
'\t'
||
*
s
==
'\r'
||
*
s
==
'\n'
)
s
++
;
return
s
;
}
static
void
release_token_list
(
TOKEN
t
)
{
while
(
t
)
{
TOKEN
t2
=
t
->
next
;
/* fixme: If we have owner_pantry, put the token back to
* this pantry so that it can be reused later */
free
(
t
);
t
=
t2
;
}
}
static
TOKEN
new_token
(
enum
token_type
type
,
const
char
*
buf
,
size_t
length
)
{
TOKEN
t
;
/* fixme: look through our pantries to find a suitable
* token for reuse */
t
=
malloc
(
sizeof
*
t
+
length
);
if
(
t
)
{
t
->
next
=
NULL
;
t
->
type
=
type
;
memset
(
&
t
->
flags
,
0
,
sizeof
(
t
->
flags
));
t
->
data
[
0
]
=
0
;
if
(
buf
)
{
memcpy
(
t
->
data
,
buf
,
length
);
t
->
data
[
length
]
=
0
;
/* Make sure it is a C string. */
}
else
t
->
data
[
0
]
=
0
;
}
return
t
;
}
static
TOKEN
append_to_token
(
TOKEN
old
,
const
char
*
buf
,
size_t
length
)
{
size_t
n
=
strlen
(
old
->
data
);
TOKEN
t
;
t
=
malloc
(
sizeof
*
t
+
n
+
length
);
if
(
t
)
{
t
->
next
=
old
->
next
;
t
->
type
=
old
->
type
;
t
->
flags
=
old
->
flags
;
memcpy
(
t
->
data
,
old
->
data
,
n
);
memcpy
(
t
->
data
+
n
,
buf
,
length
);
t
->
data
[
n
+
length
]
=
0
;
old
->
next
=
NULL
;
release_token_list
(
old
);
}
return
t
;
}
/*
Parse a field into tokens as defined by rfc822.
*/
static
TOKEN
parse_field
(
HDR_LINE
hdr
)
{
static
const
char
specials
[]
=
"<>@.,;:
\\
[]
\"
()"
;
static
const
char
specials2
[]
=
"<>@.,;:"
;
static
const
char
tspecials
[]
=
"/?=<>@,;:
\\
[]
\"
()"
;
static
const
char
tspecials2
[]
=
"/?=<>@.,;:"
;
/* FIXME: really
include '.'?*/
static
struct
{
const
unsigned
char
*
name
;
size_t
namelen
;
}
tspecial_header
[]
=
{
{
"Content-Type"
,
12
},
{
"Content-Transfer-Encoding"
,
25
},
{
"Content-Disposition"
,
19
},
{
NULL
,
0
}
};
const
char
*
delimiters
;
const
char
*
delimiters2
;
const
unsigned
char
*
line
,
*
s
,
*
s2
;
size_t
n
;
int
i
,
invalid
=
0
;
TOKEN
t
,
tok
,
*
tok_tail
;
errno
=
0
;
if
(
!
hdr
)
return
NULL
;
tok
=
NULL
;
tok_tail
=
&
tok
;
line
=
hdr
->
line
;
if
(
!
(
s
=
strchr
(
line
,
':'
)))
return
NULL
;
/* oops */
n
=
s
-
line
;
if
(
!
n
)
return
NULL
;
/* oops: invalid name */
delimiters
=
specials
;
delimiters2
=
specials2
;
for
(
i
=
0
;
tspecial_header
[
i
].
name
;
i
++
)
{
if
(
n
==
tspecial_header
[
i
].
namelen
&&
!
memcmp
(
line
,
tspecial_header
[
i
].
name
,
n
))
{
delimiters
=
tspecials
;
delimiters2
=
tspecials2
;
break
;
}
}
s
++
;
/* Move over the colon. */
for
(;;)
{
while
(
!*
s
)
{
if
(
!
hdr
->
next
||
!
hdr
->
next
->
cont
)
return
tok
;
/* Ready. */
/* Next item is a header continuation line. */
hdr
=
hdr
->
next
;
s
=
hdr
->
line
;
}
if
(
*
s
==
'('
)
{
int
level
=
1
;
int
in_quote
=
0
;
invalid
=
0
;
for
(
s
++
;;
s
++
)
{
while
(
!*
s
)
{
if
(
!
hdr
->
next
||
!
hdr
->
next
->
cont
)
goto
oparen_out
;
/* Next item is a header continuation line. */
hdr
=
hdr
->
next
;
s
=
hdr
->
line
;
}
if
(
in_quote
)
{
if
(
*
s
==
'\"'
)
in_quote
=
0
;
else
if
(
*
s
==
'\\'
&&
s
[
1
])
/* what about continuation? */
s
++
;
}
else
if
(
*
s
==
')'
)
{
if
(
!--
level
)
break
;
}
else
if
(
*
s
==
'('
)
level
++
;
else
if
(
*
s
==
'\"'
)
in_quote
=
1
;
}
oparen_out
:
if
(
!*
s
)
;
/* Actually this is an error, but we don't care about it. */
else
s
++
;
}
else
if
(
*
s
==
'\"'
||
*
s
==
'['
)
{
/* We do not check for non-allowed nesting of domainliterals */
int
term
=
*
s
==
'\"'
?
'\"'
:
']'
;
invalid
=
0
;
s
++
;
t
=
NULL
;
for
(;;)
{
for
(
s2
=
s
;
*
s2
;
s2
++
)
{
if
(
*
s2
==
term
)
break
;
else
if
(
*
s2
==
'\\'
&&
s2
[
1
])
/* what about continuation? */
s2
++
;
}
t
=
(
t
?
append_to_token
(
t
,
s
,
s2
-
s
)
:
new_token
(
term
==
'\"'
?
tQUOTED
:
tDOMAINLIT
,
s
,
s2
-
s
));
if
(
!
t
)
goto
failure
;
if
(
*
s2
||
!
hdr
->
next
||
!
hdr
->
next
->
cont
)
break
;
/* Next item is a header continuation line. */
hdr
=
hdr
->
next
;
s
=
hdr
->
line
;
}
*
tok_tail
=
t
;
tok_tail
=
&
t
->
next
;
s
=
s2
;
if
(
*
s
)
s
++
;
/* skip the delimiter */
}
else
if
((
s2
=
strchr
(
delimiters2
,
*
s
)))
{
/* Special characters which are not handled above. */
invalid
=
0
;
t
=
new_token
(
tSPECIAL
,
s
,
1
);
if
(
!
t
)
goto
failure
;
*
tok_tail
=
t
;
tok_tail
=
&
t
->
next
;
s
++
;
}
else
if
(
*
s
==
' '
||
*
s
==
'\t'
||
*
s
==
'\r'
||
*
s
==
'\n'
)
{
invalid
=
0
;
s
=
skip_ws
(
s
+
1
);
}
else
if
(
*
s
>
0x20
&&
!
(
*
s
&
128
))
{
/* Atom. */
invalid
=
0
;
for
(
s2
=
s
+
1
;
*
s2
>
0x20
&&
!
(
*
s2
&
128
)
&&
!
strchr
(
delimiters
,
*
s2
);
s2
++
)
;
t
=
new_token
(
tATOM
,
s
,
s2
-
s
);
if
(
!
t
)
goto
failure
;
*
tok_tail
=
t
;
tok_tail
=
&
t
->
next
;
s
=
s2
;
}
else
{
/* Invalid character. */
if
(
!
invalid
)
{
/* For parsing we assume only one space. */
t
=
new_token
(
tSPACE
,
NULL
,
0
);
if
(
!
t
)
goto
failure
;
*
tok_tail
=
t
;
tok_tail
=
&
t
->
next
;
invalid
=
1
;
}
s
++
;
}
}
/*NOTREACHED*/
failure
:
{
int
save
=
errno
;
release_token_list
(
tok
);
errno
=
save
;
}
return
NULL
;
}
/****************
* Find and parse a header field.
* WHICH indicates what to do if there are multiple instance of the same
* field (like "Received"); the following value are defined:
* -1 := Take the last occurrence
* 0 := Reserved
* n := Take the n-th one.
* Returns a handle for further operations on the parse context of the field
* or NULL if the field was not found.
*/
rfc822parse_field_t
rfc822parse_parse_field
(
rfc822parse_t
msg
,
const
char
*
name
,
int
which
)
{
HDR_LINE
hdr
;
if
(
!
which
)
return
NULL
;
hdr
=
find_header
(
msg
,
name
,
which
,
NULL
);
if
(
!
hdr
)
return
NULL
;
return
parse_field
(
hdr
);
}
void
rfc822parse_release_field
(
rfc822parse_field_t
ctx
)
{
if
(
ctx
)
release_token_list
(
ctx
);
}
/****************
* Check whether T points to a parameter.
* A parameter starts with a semicolon and it is assumed that t
* points to exactly this one.
*/
static
int
is_parameter
(
TOKEN
t
)
{
t
=
t
->
next
;
if
(
!
t
||
t
->
type
!=
tATOM
)
return
0
;
t
=
t
->
next
;
if
(
!
t
||
!
(
t
->
type
==
tSPECIAL
&&
t
->
data
[
0
]
==
'='
))
return
0
;
t
=
t
->
next
;
if
(
!
t
)
return
1
;
/* We assume that an non existing value is an empty one. */
return
t
->
type
==
tQUOTED
||
t
->
type
==
tATOM
;
}
/*
Some header (Content-type) have a special syntax where attribute=value
pairs are used after a leading semicolon. The parse_field code
knows about these fields and changes the parsing to the one defined
in RFC2045.
Returns a pointer to the value which is valid as long as the
parse context is valid; NULL is returned in case that attr is not
defined in the header, a missing value is reppresented by an empty string.
With LOWER_VALUE set to true, a matching field valuebe be
lowercased.
Note, that ATTR should be lowercase.
*/
const
char
*
rfc822parse_query_parameter
(
rfc822parse_field_t
ctx
,
const
char
*
attr
,
int
lower_value
)
{
TOKEN
t
,
a
;
for
(
t
=
ctx
;
t
;
t
=
t
->
next
)
{
/* skip to the next semicolon */
for
(;
t
&&
!
(
t
->
type
==
tSPECIAL
&&
t
->
data
[
0
]
==
';'
);
t
=
t
->
next
)
;
if
(
!
t
)
return
NULL
;
if
(
is_parameter
(
t
))
{
/* Look closer. */
a
=
t
->
next
;
/* We know that this is an atom */
if
(
!
a
->
flags
.
lowered
)
{
lowercase_string
(
a
->
data
);
a
->
flags
.
lowered
=
1
;
}
if
(
!
strcmp
(
a
->
data
,
attr
))
{
/* found */
t
=
a
->
next
->
next
;
/* Either T is now an atom, a quoted string or NULL in
* which case we return an empty string. */
if
(
lower_value
&&
t
&&
!
t
->
flags
.
lowered
)
{
lowercase_string
(
t
->
data
);
t
->
flags
.
lowered
=
1
;
}
return
t
?
t
->
data
:
""
;
}
}
}
return
NULL
;
}
/****************
* This function may be used for the Content-Type header to figure out
* the media type and subtype. Note, that the returned strings are
* guaranteed to be lowercase as required by MIME.
*
* Returns: a pointer to the media type and if subtype is not NULL,
* a pointer to the subtype.
*/
const
char
*
rfc822parse_query_media_type
(
rfc822parse_field_t
ctx
,
const
char
**
subtype
)
{
TOKEN
t
=
ctx
;
const
char
*
type
;
if
(
t
->
type
!=
tATOM
)
return
NULL
;
if
(
!
t
->
flags
.
lowered
)
{
lowercase_string
(
t
->
data
);
t
->
flags
.
lowered
=
1
;
}
type
=
t
->
data
;
t
=
t
->
next
;
if
(
!
t
||
t
->
type
!=
tSPECIAL
||
t
->
data
[
0
]
!=
'/'
)
return
NULL
;
t
=
t
->
next
;
if
(
!
t
||
t
->
type
!=
tATOM
)
return
NULL
;
if
(
subtype
)
{
if
(
!
t
->
flags
.
lowered
)
{
lowercase_string
(
t
->
data
);
t
->
flags
.
lowered
=
1
;
}
*
subtype
=
t
->
data
;
}
return
type
;
}
#ifdef TESTING
/* Internal debug function to print the structure of the message. */
static
void
dump_structure
(
rfc822parse_t
msg
,
part_t
part
,
int
indent
)
{
if
(
!
part
)
{
printf
(
"*** Structure of this message:
\n
"
);
part
=
msg
->
parts
;
}
for
(;
part
;
part
=
part
->
right
)
{
rfc822parse_field_t
ctx
;
part_t
save_part
;
/* ugly hack - we should have a function to
get part information. */
const
char
*
s
;
save_part
=
msg
->
current_part
;
msg
->
current_part
=
part
;
ctx
=
rfc822parse_parse_field
(
msg
,
"Content-Type"
,
-1
);
msg
->
current_part
=
save_part
;
if
(
ctx
)
{
const
char
*
s1
,
*
s2
;
s1
=
rfc822parse_query_media_type
(
ctx
,
&
s2
);
if
(
s1
)
printf
(
"*** %*s %s/%s"
,
indent
*
2
,
""
,
s1
,
s2
);
else
printf
(
"*** %*s [not found]"
,
indent
*
2
,
""
);
s
=
rfc822parse_query_parameter
(
ctx
,
"boundary"
,
0
);
if
(
s
)
printf
(
" (boundary=
\"
%s
\"
)"
,
s
);
rfc822parse_release_field
(
ctx
);
}
else
printf
(
"*** %*s text/plain [assumed]"
,
indent
*
2
,
""
);
putchar
(
'\n'
);
if
(
part
->
down
)
dump_structure
(
msg
,
part
->
down
,
indent
+
1
);
}
}
static
void
show_param
(
rfc822parse_field_t
ctx
,
const
char
*
name
)
{
const
char
*
s
;
if
(
!
ctx
)
return
;
s
=
rfc822parse_query_parameter
(
ctx
,
name
,
0
);
if
(
s
)
printf
(
"*** %s: '%s'
\n
"
,
name
,
s
);
}
static
void
show_event
(
rfc822parse_event_t
event
)
{
const
char
*
s
;
switch
(
event
)
{
case
RFC822PARSE_OPEN
:
s
=
"Open"
;
break
;
case
RFC822PARSE_CLOSE
:
s
=
"Close"
;
break
;
case
RFC822PARSE_CANCEL
:
s
=
"Cancel"
;
break
;
case
RFC822PARSE_T2BODY
:
s
=
"T2Body"
;
break
;
case
RFC822PARSE_FINISH
:
s
=
"Finish"
;
break
;
case
RFC822PARSE_RCVD_SEEN
:
s
=
"Rcvd_Seen"
;
break
;
case
RFC822PARSE_LEVEL_DOWN
:
s
=
"Level_Down"
;
break
;
case
RFC822PARSE_LEVEL_UP
:
s
=
"Level_Up"
;
break
;
case
RFC822PARSE_BOUNDARY
:
s
=
"Boundary"
;
break
;
case
RFC822PARSE_LAST_BOUNDARY
:
s
=
"Last_Boundary"
;
break
;
case
RFC822PARSE_BEGIN_HEADER
:
s
=
"Begin_Header"
;
break
;
case
RFC822PARSE_PREAMBLE
:
s
=
"Preamble"
;
break
;
case
RFC822PARSE_EPILOGUE
:
s
=
"Epilogue"
;
break
;
default
:
s
=
"***invalid event***"
;
break
;
}
printf
(
"*** got RFC822 event %s
\n
"
,
s
);
}
static
int
msg_cb
(
void
*
dummy_arg
,
rfc822parse_event_t
event
,
rfc822parse_t
msg
)
{
show_event
(
event
);
if
(
event
==
RFC822PARSE_T2BODY
)
{
rfc822parse_field_t
ctx
;
void
*
ectx
;
const
char
*
line
;
for
(
ectx
=
NULL
;
(
line
=
rfc822parse_enum_header_lines
(
msg
,
&
ectx
));
)
{
printf
(
"*** HDR: %s
\n
"
,
line
);
}
rfc822parse_enum_header_lines
(
NULL
,
&
ectx
);
/* Close enumerator. */
ctx
=
rfc822parse_parse_field
(
msg
,
"Content-Type"
,
-1
);
if
(
ctx
)
{
const
char
*
s1
,
*
s2
;
s1
=
rfc822parse_query_media_type
(
ctx
,
&
s2
);
if
(
s1
)
printf
(
"*** media: '%s/%s'
\n
"
,
s1
,
s2
);
else
printf
(
"*** media: [not found]
\n
"
);
show_param
(
ctx
,
"boundary"
);
show_param
(
ctx
,
"protocol"
);
rfc822parse_release_field
(
ctx
);
}
else
printf
(
"*** media: text/plain [assumed]
\n
"
);
}
return
0
;
}
int
main
(
int
argc
,
char
**
argv
)
{
char
line
[
5000
];
size_t
length
;
rfc822parse_t
msg
;
msg
=
rfc822parse_open
(
msg_cb
,
NULL
);
if
(
!
msg
)
abort
();
while
(
fgets
(
line
,
sizeof
(
line
),
stdin
))
{
length
=
strlen
(
line
);
if
(
length
&&
line
[
length
-
1
]
==
'\n'
)
line
[
--
length
]
=
0
;
if
(
length
&&
line
[
length
-
1
]
==
'\r'
)
line
[
--
length
]
=
0
;
if
(
rfc822parse_insert
(
msg
,
line
,
length
))
abort
();
}
dump_structure
(
msg
,
NULL
,
0
);
rfc822parse_close
(
msg
);
return
0
;
}
#endif
/*
Local Variables:
compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c"
End:
*/
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Tue, Feb 3, 11:31 PM (19 h, 36 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
38/a6/94a2c5c0a9995b453274c0b62809
Attached To
rG GnuPG
Event Timeline
Log In to Comment