10 #ifndef __PION_HTTP_PARSER_HEADER__
11 #define __PION_HTTP_PARSER_HEADER__
14 #include <boost/noncopyable.hpp>
15 #include <boost/function/function2.hpp>
16 #include <boost/logic/tribool.hpp>
17 #include <boost/system/error_code.hpp>
18 #include <boost/thread/once.hpp>
19 #include <pion/config.hpp>
20 #include <pion/logger.hpp>
21 #include <pion/http/message.hpp>
23 #ifndef BOOST_SYSTEM_NOEXCEPT
24 #define BOOST_SYSTEM_NOEXCEPT BOOST_NOEXCEPT
40 private boost::noncopyable
53 ERROR_METHOD_CHAR = 1,
64 ERROR_HEADER_NAME_SIZE,
65 ERROR_HEADER_VALUE_SIZE,
66 ERROR_INVALID_CONTENT_LENGTH,
68 ERROR_MISSING_CHUNK_DATA,
69 ERROR_MISSING_HEADER_DATA,
70 ERROR_MISSING_TOO_MUCH_CONTENT,
75 :
public boost::system::error_category
78 const char *name()
const BOOST_SYSTEM_NOEXCEPT {
return "parser"; }
79 std::string
message(
int ev)
const {
81 case ERROR_METHOD_CHAR:
82 return "invalid method character";
83 case ERROR_METHOD_SIZE:
84 return "method exceeds maximum size";
86 return "invalid URI character";
88 return "method exceeds maximum size";
89 case ERROR_QUERY_CHAR:
90 return "invalid query string character";
91 case ERROR_QUERY_SIZE:
92 return "query string exceeds maximum size";
93 case ERROR_VERSION_EMPTY:
94 return "HTTP version undefined";
95 case ERROR_VERSION_CHAR:
96 return "invalid version character";
97 case ERROR_STATUS_EMPTY:
98 return "HTTP status undefined";
99 case ERROR_STATUS_CHAR:
100 return "invalid status character";
101 case ERROR_HEADER_CHAR:
102 return "invalid header character";
103 case ERROR_HEADER_NAME_SIZE:
104 return "header name exceeds maximum size";
105 case ERROR_HEADER_VALUE_SIZE:
106 return "header value exceeds maximum size";
107 case ERROR_INVALID_CONTENT_LENGTH:
108 return "invalid Content-Length header";
109 case ERROR_CHUNK_CHAR:
110 return "invalid chunk character";
111 case ERROR_MISSING_HEADER_DATA:
112 return "missing header data";
113 case ERROR_MISSING_CHUNK_DATA:
114 return "missing chunk data";
115 case ERROR_MISSING_TOO_MUCH_CONTENT:
116 return "missing too much content";
118 return "parser error";
129 parser(
const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
130 : m_logger(PION_GET_LOGGER(
"pion.http.parser")), m_is_request(is_request),
131 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
132 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
133 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
134 m_bytes_content_remaining(0), m_bytes_content_read(0),
135 m_bytes_last_read(0), m_bytes_total_read(0),
136 m_max_content_length(max_content_length),
137 m_parse_headers_only(false), m_save_raw_headers(false)
154 boost::tribool parse(
http::message& http_msg, boost::system::error_code& ec);
168 boost::tribool parse_missing_data(
http::message& http_msg, std::size_t len,
169 boost::system::error_code& ec);
186 m_read_end_ptr = ptr + len;
195 inline void load_read_pos(
const char *&read_ptr,
const char *&read_end_ptr)
const {
196 read_ptr = m_read_ptr;
197 read_end_ptr = m_read_end_ptr;
209 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
211 m_message_parse_state = PARSE_END;
230 boost::system::error_code ec;
231 finish_header_parsing(http_msg, ec);
236 m_message_parse_state = PARSE_START;
237 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
238 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
240 m_status_message.erase();
243 m_query_string.erase();
244 m_raw_headers.erase();
245 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
249 inline bool eof(
void)
const {
return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
252 inline std::size_t
bytes_available(
void)
const {
return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); }
255 inline std::size_t
gcount(
void)
const {
return m_bytes_last_read; }
312 static bool parse_uri(
const std::string& uri, std::string& proto,
313 std::string& host, boost::uint16_t& port, std::string& path,
326 static bool parse_url_encoded(ihash_multimap& dict,
327 const char *ptr,
const std::size_t len);
340 static bool parse_multipart_form_data(ihash_multimap& dict,
341 const std::string& content_type,
342 const char *ptr,
const std::size_t len);
355 static bool parse_cookie_header(ihash_multimap& dict,
356 const char *ptr,
const std::size_t len,
357 bool set_cookie_header);
370 const std::string& cookie_header,
bool set_cookie_header)
372 return parse_cookie_header(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
385 const std::string& query)
387 return parse_url_encoded(dict, query.c_str(), query.size());
401 const std::string& content_type,
402 const std::string& form_data)
404 return parse_multipart_form_data(dict, content_type, form_data.c_str(), form_data.size());
419 boost::tribool finish_header_parsing(
http::message& http_msg,
420 boost::system::error_code& ec);
431 static bool parse_forwarded_for(
const std::string& header, std::string& public_ip);
436 return *m_error_category_ptr;
457 boost::tribool parse_headers(
http::message& http_msg, boost::system::error_code& ec);
464 void update_message_with_header_data(
http::message& http_msg)
const;
478 boost::system::error_code& ec);
492 boost::system::error_code& ec);
508 static void compute_msg_status(
http::message& http_msg,
bool msg_parsed_ok);
517 ec = boost::system::error_code(static_cast<int>(ev), get_error_category());
521 static void create_error_category(
void);
525 inline static bool is_char(
int c);
526 inline static bool is_control(
int c);
527 inline static bool is_special(
int c);
528 inline static bool is_digit(
int c);
529 inline static bool is_hex_digit(
int c);
530 inline static bool is_cookie_attribute(
const std::string& name,
bool set_cookie_header);
580 enum message_parse_state_t {
581 PARSE_START, PARSE_HEADERS, PARSE_FOOTERS, PARSE_CONTENT,
582 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
587 enum header_parse_state_t {
588 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
589 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
590 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
591 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
592 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
593 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
594 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
595 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
596 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
597 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
602 enum chunk_parse_state_t {
603 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE,
604 PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE,
605 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
606 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK,
607 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
608 PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK,
609 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
614 message_parse_state_t m_message_parse_state;
617 header_parse_state_t m_headers_parse_state;
620 chunk_parse_state_t m_chunked_content_parse_state;
623 payload_handler_t m_payload_handler;
626 boost::uint16_t m_status_code;
629 std::string m_status_message;
632 std::string m_method;
635 std::string m_resource;
638 std::string m_query_string;
641 std::string m_raw_headers;
644 std::string m_header_name;
647 std::string m_header_value;
650 std::string m_chunk_size_str;
653 std::size_t m_size_of_current_chunk;
656 std::size_t m_bytes_read_in_current_chunk;
659 std::size_t m_bytes_content_remaining;
662 std::size_t m_bytes_content_read;
665 std::size_t m_bytes_last_read;
668 std::size_t m_bytes_total_read;
671 std::size_t m_max_content_length;
674 bool m_parse_headers_only;
677 bool m_save_raw_headers;
680 static error_category_t * m_error_category_ptr;
683 static boost::once_flag m_instance_flag;
689 inline bool parser::is_char(
int c)
691 return(c >= 0 && c <= 127);
694 inline bool parser::is_control(
int c)
696 return( (c >= 0 && c <= 31) || c == 127);
699 inline bool parser::is_special(
int c)
702 case '(':
case ')':
case '<':
case '>':
case '@':
703 case ',':
case ';':
case ':':
case '\\':
case '"':
704 case '/':
case '[':
case ']':
case '?':
case '=':
705 case '{':
case '}':
case ' ':
case '\t':
712 inline bool parser::is_digit(
int c)
714 return(c >=
'0' && c <=
'9');
717 inline bool parser::is_hex_digit(
int c)
719 return((c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'f') || (c >=
'A' && c <=
'F'));
722 inline bool parser::is_cookie_attribute(
const std::string& name,
bool set_cookie_header)
724 return (name.empty() || name[0] ==
'$' || (set_cookie_header &&
732 boost::algorithm::iequals(name,
"Comment")
733 || boost::algorithm::iequals(name,
"Domain")
734 || boost::algorithm::iequals(name,
"Max-Age")
735 || boost::algorithm::iequals(name,
"Path")
736 || boost::algorithm::iequals(name,
"Secure")
737 || boost::algorithm::iequals(name,
"Version")
738 || boost::algorithm::iequals(name,
"Expires")
739 || boost::algorithm::iequals(name,
"HttpOnly")
static const boost::uint32_t COOKIE_NAME_MAX
maximum length for the name of a cookie name
static bool parse_url_encoded(ihash_multimap &dict, const std::string &query)
static void create_error_category(void)
creates the unique parser error_category_t
static const std::size_t DEFAULT_CONTENT_MAX
maximum length for HTTP payload content
bool is_parsing_request(void) const
returns true if the parser is being used to parse an HTTP request
static const boost::uint32_t QUERY_NAME_MAX
maximum length for the name of a query string variable
static const boost::uint32_t HEADER_VALUE_MAX
maximum length for an HTTP header value
const char * m_read_end_ptr
points to the end of the read_buffer (last byte + 1)
void set_save_raw_headers(bool b)
sets parameter for saving raw HTTP header content
std::size_t gcount(void) const
returns the number of bytes read during the last parse operation
const std::string & get_raw_headers(void) const
returns the raw HTTP headers saved by the parser
static const boost::uint32_t STATUS_MESSAGE_MAX
maximum length for response status message
static const boost::uint32_t RESOURCE_MAX
maximum length for the resource requested
void load_read_pos(const char *&read_ptr, const char *&read_end_ptr) const
logger m_logger
primary logging interface used by this class
class-specific error category
void set_max_content_length(std::size_t n)
sets the maximum length for HTTP payload content
parser(const bool is_request, std::size_t max_content_length=DEFAULT_CONTENT_MAX)
static void set_error(boost::system::error_code &ec, error_value_t ev)
void concatenate_chunks(void)
error_value_t
class-specific error code values
bool get_save_raw_headers(void) const
returns true if the parser is saving raw HTTP header contents
bool eof(void) const
returns true if there are no more bytes available in the read buffer
static const boost::uint32_t QUERY_STRING_MAX
maximum length for the query string
void reset_max_content_length(void)
resets the maximum length for HTTP payload content to the default value
std::vector< char > chunk_cache_t
used to cache chunked data
void skip_header_parsing(http::message &http_msg)
const bool m_is_request
true if the message is an HTTP request; false if it is an HTTP response
bool get_parse_headers_only(void)
returns true if parsing headers only
static bool parse_cookie_header(ihash_multimap &dict, const std::string &cookie_header, bool set_cookie_header)
virtual void finished_parsing_headers(const boost::system::error_code &ec)
Called after we have finished parsing the HTTP message headers.
std::size_t get_max_content_length(void) const
returns the maximum length for HTTP payload content
static const boost::uint32_t COOKIE_VALUE_MAX
maximum length for the value of a cookie; also used for path and domain
const char * m_read_ptr
points to the next character to be consumed in the read_buffer
static error_category_t & get_error_category(void)
returns an instance of parser::error_category_t
static bool parse_multipart_form_data(ihash_multimap &dict, const std::string &content_type, const std::string &form_data)
logger get_logger(void)
returns the logger currently in use
static const boost::uint32_t HEADER_NAME_MAX
maximum length for an HTTP header name
std::size_t get_total_bytes_read(void) const
returns the total number of bytes read while parsing the HTTP message
std::size_t get_content_bytes_read(void) const
returns the total number of bytes read while parsing the payload content
void set_payload_handler(payload_handler_t &h)
defines a callback function to be used for consuming payload content
static const boost::uint32_t QUERY_VALUE_MAX
maximum length for the value of a query string variable
boost::function2< void, const char *, std::size_t > payload_handler_t
callback type used to consume payload content
void set_logger(logger log_ptr)
sets the logger to be used
virtual ~parser()
default destructor
bool is_parsing_response(void) const
returns true if the parser is being used to parse an HTTP response
bool check_premature_eof(http::message &http_msg)
void parse_headers_only(bool b=true)
std::size_t bytes_available(void) const
returns the number of bytes available in the read buffer
static const boost::uint32_t METHOD_MAX
maximum length for the request method
void reset(void)
resets the parser to its initial state
void set_read_buffer(const char *ptr, size_t len)