26 #include <curl/curl.h>
34 #include <BESContextManager.h>
39 #include "BESSyntaxUserError.h"
40 #include "BESForbiddenError.h"
41 #include "BESNotFoundError.h"
42 #include "BESTimeoutError.h"
43 #include "BESInternalError.h"
46 #include "TheBESKeys.h"
49 #include "BESStopWatch.h"
51 #include "BESSyntaxUserError.h"
52 #include "HttpNames.h"
53 #include "HttpUtils.h"
54 #include "ProxyConfig.h"
55 #include "AllowedHosts.h"
56 #include "CurlUtils.h"
57 #include "EffectiveUrlCache.h"
67 using std::stringstream;
68 using std::ostringstream;
71 #define prolog std::string("CurlUtils::").append(__func__).append("() - ")
75 static const unsigned int retry_limit = 10;
76 static const useconds_t uone_second = 1000 * 1000;
79 curl_slist *add_auth_headers(
struct curl_slist *request_headers);
84 #define CLIENT_ERR_MIN 400
85 #define CLIENT_ERR_MAX 417
86 const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN + 1] = {
88 "Unauthorized: Contact the server administrator.",
90 "Forbidden: Contact the server administrator.",
91 "Not Found: The underlying data source or server could not be found.",
92 "Method Not Allowed.",
94 "Proxy Authentication Required.",
99 "Precondition Failed.",
100 "Request Entity Too Large.",
101 "Request URI Too Large.",
102 "Unsupported Media Type.",
103 "Requested Range Not Satisfiable.",
104 "Expectation Failed."
107 #define SERVER_ERR_MIN 500
108 #define SERVER_ERR_MAX 505
109 const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
111 "Internal Server Error.",
114 "Service Unavailable.",
116 "HTTP Version Not Supported."
127 string http_status_to_string(
int status) {
128 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129 return string(http_client_errors[status - CLIENT_ERR_MIN]);
130 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131 return string(http_server_errors[status - SERVER_ERR_MIN]);
134 msg <<
"Unknown HTTP Error: " << status;
144 static string getCurlAuthTypeName(
const int auth_type) {
146 string authTypeString;
149 match = auth_type & CURLAUTH_BASIC;
151 authTypeString +=
"CURLAUTH_BASIC";
154 match = auth_type & CURLAUTH_DIGEST;
156 if (!authTypeString.empty())
157 authTypeString +=
" ";
158 authTypeString +=
"CURLAUTH_DIGEST";
161 match = auth_type & CURLAUTH_DIGEST_IE;
163 if (!authTypeString.empty())
164 authTypeString +=
" ";
165 authTypeString +=
"CURLAUTH_DIGEST_IE";
168 match = auth_type & CURLAUTH_GSSNEGOTIATE;
170 if (!authTypeString.empty())
171 authTypeString +=
" ";
172 authTypeString +=
"CURLAUTH_GSSNEGOTIATE";
175 match = auth_type & CURLAUTH_NTLM;
177 if (!authTypeString.empty())
178 authTypeString +=
" ";
179 authTypeString +=
"CURLAUTH_NTLM";
183 match = auth_type & CURLAUTH_ANY;
185 if(!authTypeString.empty())
186 authTypeString +=
" ";
187 authTypeString +=
"CURLAUTH_ANY";
191 match = auth_type & CURLAUTH_ANY;
193 if(!authTypeString.empty())
194 authTypeString +=
" ";
195 authTypeString +=
"CURLAUTH_ANYSAFE";
199 match = auth_type & CURLAUTH_ANY;
201 if(!authTypeString.empty())
202 authTypeString +=
" ";
203 authTypeString +=
"CURLAUTH_ONLY";
207 return authTypeString;
213 static size_t writeNothing(
char *,
size_t ,
size_t nmemb,
void * ) {
221 static size_t writeToOpenFileDescriptor(
char *data,
size_t ,
size_t nmemb,
void *userdata) {
223 int *fd = (
int *) userdata;
225 BESDEBUG(MODULE, prolog <<
"Bytes received " << nmemb << endl);
226 int wrote = write(*fd, data, nmemb);
227 BESDEBUG(MODULE, prolog <<
"Bytes written " << wrote << endl);
255 static size_t save_http_response_headers(
void *ptr,
size_t size,
size_t nmemb,
void *resp_hdrs) {
256 BESDEBUG(MODULE, prolog <<
"Inside the header parser." << endl);
257 vector<string> *hdrs =
static_cast<vector<string> *
>(resp_hdrs);
260 string complete_line;
261 if (nmemb > 1 && *(
static_cast<char *
>(ptr) + size * (nmemb - 2)) ==
'\r')
262 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 2));
264 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 1));
267 if (complete_line !=
"" && complete_line.find(
"HTTP") == string::npos) {
268 BESDEBUG(MODULE, prolog <<
"Header line: " << complete_line << endl);
269 hdrs->push_back(complete_line);
283 static int curl_debug(CURL *, curl_infotype info,
char *msg,
size_t size,
void *) {
284 string message(msg, size);
288 BESDEBUG(MODULE, prolog <<
"Text: " << message << endl);
290 case CURLINFO_HEADER_IN:
291 BESDEBUG(MODULE, prolog <<
"Header in: " << message << endl);
293 case CURLINFO_HEADER_OUT:
294 BESDEBUG(MODULE, prolog <<
"Header out: " << endl << message << endl);
296 case CURLINFO_DATA_IN:
297 BESDEBUG(MODULE, prolog <<
"Data in: " << message << endl);
299 case CURLINFO_DATA_OUT:
300 BESDEBUG(MODULE, prolog <<
"Data out: " << message << endl);
303 BESDEBUG(MODULE, prolog <<
"End: " << message << endl);
305 #ifdef CURLINFO_SSL_DATA_IN
306 case CURLINFO_SSL_DATA_IN:
307 BESDEBUG(MODULE, prolog <<
"SSL Data in: " << message << endl );
break;
309 #ifdef CURLINFO_SSL_DATA_OUT
310 case CURLINFO_SSL_DATA_OUT:
311 BESDEBUG(MODULE, prolog <<
"SSL Data out: " << message << endl );
break;
314 BESDEBUG(MODULE, prolog <<
"Curl info: " << message << endl);
326 class BuildHeaders :
public std::unary_function<const string &, void> {
327 struct curl_slist *d_cl;
330 BuildHeaders() : d_cl(0) {}
332 void operator()(
const string &header) {
333 BESDEBUG(MODULE, prolog <<
"Adding '" << header.c_str() <<
"' to the header list." << endl);
334 d_cl = curl_slist_append(d_cl, header.c_str());
337 struct curl_slist *get_headers() {
362 bool configure_curl_handle_for_proxy(CURL *ceh,
const string &target_url) {
363 BESDEBUG(MODULE, prolog <<
"BEGIN." << endl);
365 bool using_proxy = http::ProxyConfig::theOne()->is_configured();
368 BESDEBUG(MODULE, prolog <<
"Proxy has been configured..." << endl);
373 string proxyHost = proxy->host();
374 int proxyPort = proxy->port();
375 string proxyPassword = proxy->proxy_password();
376 string proxyUser = proxy->user();
377 string proxyUserPW = proxy->password();
378 int proxyAuthType = proxy->auth_type();
379 string no_proxy_regex = proxy->no_proxy_regex();
386 if (!no_proxy_regex.empty()) {
387 BESDEBUG(MODULE, prolog <<
"Found NoProxyRegex." << endl);
389 if (r.match(target_url.c_str(), target_url.length()) != -1) {
391 prolog <<
"Found NoProxy match. Regex: " << no_proxy_regex <<
"; Url: " << target_url
399 char error_buffer[CURL_ERROR_SIZE];
401 BESDEBUG(MODULE, prolog <<
"Setting up a proxy server." << endl);
402 BESDEBUG(MODULE, prolog <<
"Proxy host: " << proxyHost << endl);
403 BESDEBUG(MODULE, prolog <<
"Proxy port: " << proxyPort << endl);
405 set_error_buffer(ceh, error_buffer);
407 res = curl_easy_setopt(ceh, CURLOPT_PROXY, proxyHost.data());
408 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXY", error_buffer, __FILE__, __LINE__);
410 res = curl_easy_setopt(ceh, CURLOPT_PROXYPORT, proxyPort);
411 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYPORT", error_buffer, __FILE__, __LINE__);
420 res = curl_easy_setopt(ceh, CURLOPT_PROXYAUTH, proxyAuthType);
421 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYAUTH", error_buffer, __FILE__, __LINE__);
422 BESDEBUG(MODULE, prolog <<
"Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
424 if (!proxyUser.empty()) {
425 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERNAME, proxyUser.data());
426 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYUSERNAME", error_buffer, __FILE__,
428 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYUSERNAME : " << proxyUser << endl);
430 if (!proxyPassword.empty()) {
431 res = curl_easy_setopt(ceh, CURLOPT_PROXYPASSWORD, proxyPassword.data());
432 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYPASSWORD", error_buffer, __FILE__,
434 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
436 }
else if (!proxyUserPW.empty()) {
437 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
438 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYUSERPWD", error_buffer, __FILE__, __LINE__);
439 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
441 unset_error_buffer(ceh);
444 BESDEBUG(MODULE, prolog <<
"END. using_proxy: " << (using_proxy ?
"true" :
"false") << endl);
449 bool configure_curl_handle_for_proxy(CURL *ceh,
const string &target_url) {
450 BESDEBUG(MODULE, prolog <<
"BEGIN." << endl);
452 bool using_proxy =
false;
457 string proxyHost = proxy->host();
458 int proxyPort = proxy->port();
459 string proxyPassword = proxy->proxy_password();
460 string proxyUser = proxy->user();
461 string proxyUserPW = proxy->password();
462 int proxyAuthType = proxy->auth_type();
463 string no_proxy_regex = proxy->no_proxy_regex();
465 if (!proxyHost.empty()) {
476 BESDEBUG(MODULE, prolog <<
"Found proxy configuration." << endl);
482 if (!no_proxy_regex.empty()) {
483 BESDEBUG(MODULE, prolog <<
"Found NoProxyRegex." << endl);
485 if (r.match(target_url.c_str(), target_url.length()) != -1) {
487 prolog <<
"Found NoProxy match. Regex: " << no_proxy_regex <<
"; Url: " << target_url
495 char error_buffer[CURL_ERROR_SIZE];
497 BESDEBUG(MODULE, prolog <<
"Setting up a proxy server." << endl);
498 BESDEBUG(MODULE, prolog <<
"Proxy host: " << proxyHost << endl);
499 BESDEBUG(MODULE, prolog <<
"Proxy port: " << proxyPort << endl);
501 set_error_buffer(ceh, error_buffer);
503 res = curl_easy_setopt(ceh, CURLOPT_PROXY, proxyHost.data());
504 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXY", error_buffer, __FILE__, __LINE__);
506 res = curl_easy_setopt(ceh, CURLOPT_PROXYPORT, proxyPort);
507 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYPORT", error_buffer, __FILE__, __LINE__);
516 res = curl_easy_setopt(ceh, CURLOPT_PROXYAUTH, proxyAuthType);
517 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYAUTH", error_buffer, __FILE__, __LINE__);
518 BESDEBUG(MODULE, prolog <<
"Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
520 if (!proxyUser.empty()) {
521 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERNAME, proxyUser.data());
522 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYUSERNAME", error_buffer, __FILE__, __LINE__);
523 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYUSERNAME : " << proxyUser << endl);
525 if (!proxyPassword.empty()) {
526 res = curl_easy_setopt(ceh, CURLOPT_PROXYPASSWORD, proxyPassword.data());
527 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYPASSWORD", error_buffer, __FILE__,
529 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
532 else if (!proxyUserPW.empty()) {
533 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
534 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYUSERPWD", error_buffer, __FILE__, __LINE__);
535 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
537 unset_error_buffer(ceh);
540 BESDEBUG(MODULE, prolog <<
"END." << endl);
548 CURL *init(
const string &target_url,
549 const struct curl_slist *http_request_headers,
550 vector<string> *http_response_hdrs) {
551 CURL *swanky_new_curl_easy_handle = curl_easy_init();
552 return init(swanky_new_curl_easy_handle, target_url, http_request_headers, http_response_hdrs);
569 CURL *init(CURL *ceh,
570 const string &target_url,
571 const struct curl_slist *http_request_headers,
572 vector<string> *http_response_hdrs
574 char error_buffer[CURL_ERROR_SIZE];
579 throw BESInternalError(
"Could not initialize cURL easy handle.", __FILE__, __LINE__);
582 set_error_buffer(ceh, error_buffer);
585 res = curl_easy_setopt(ceh, CURLOPT_URL, target_url.c_str());
586 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_URL", error_buffer, __FILE__, __LINE__);
596 if (http_request_headers) {
598 res = curl_easy_setopt(ceh, CURLOPT_HTTPHEADER, http_request_headers);
599 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_HTTPHEADER", error_buffer, __FILE__, __LINE__);
603 if (http_response_hdrs) {
604 res = curl_easy_setopt(ceh, CURLOPT_HEADERFUNCTION, save_http_response_headers);
605 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_HEADERFUNCTION", error_buffer, __FILE__, __LINE__);
610 res = curl_easy_setopt(ceh, CURLOPT_WRITEHEADER, http_response_hdrs);
611 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEHEADER", error_buffer, __FILE__, __LINE__);
615 #ifndef CURLOPT_ACCEPT_ENCODING
616 res = curl_easy_setopt(ceh, CURLOPT_ENCODING,
"");
617 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_ENCODING", error_buffer, __FILE__, __LINE__);
619 res = curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING,
"");
620 check_setopt_result(res, prolog,
"CURLOPT_ACCEPT_ENCODING", error_buffer, __FILE__,__LINE__);
623 res = curl_easy_setopt(ceh, CURLOPT_NOPROGRESS, 1L);
624 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NOPROGRESS", error_buffer, __FILE__, __LINE__);
627 res = curl_easy_setopt(ceh, CURLOPT_NOSIGNAL, 1L);
628 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NOSIGNAL", error_buffer, __FILE__, __LINE__);
637 res = curl_easy_setopt(ceh, CURLOPT_FAILONERROR, 0L);
638 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_FAILONERROR", error_buffer, __FILE__, __LINE__);
644 res = curl_easy_setopt(ceh, CURLOPT_HTTPAUTH, (
long) CURLAUTH_ANY);
645 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_HTTPAUTH", error_buffer, __FILE__, __LINE__);
651 res = curl_easy_setopt(ceh, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
652 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NETRC", error_buffer, __FILE__, __LINE__);
655 string netrc_file = get_netrc_filename();
656 if (!netrc_file.empty()) {
657 res = curl_easy_setopt(ceh, CURLOPT_NETRC_FILE, netrc_file.c_str());
658 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NETRC_FILE", error_buffer, __FILE__, __LINE__);
661 VERBOSE(prolog <<
" is using the netrc file '"
662 << ((!netrc_file.empty()) ? netrc_file :
"~/.netrc") <<
"'" << endl);
668 res = curl_easy_setopt(ceh, CURLOPT_COOKIEFILE, curl::get_cookie_filename().c_str());
669 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_COOKIEFILE", error_buffer, __FILE__, __LINE__);
671 res = curl_easy_setopt(ceh, CURLOPT_COOKIEJAR, curl::get_cookie_filename().c_str());
672 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_COOKIEJAR", error_buffer, __FILE__, __LINE__);
677 res = curl_easy_setopt(ceh, CURLOPT_FOLLOWLOCATION, 1L);
678 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_FOLLOWLOCATION", error_buffer, __FILE__, __LINE__);
680 res = curl_easy_setopt(ceh, CURLOPT_MAXREDIRS, max_redirects());
681 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_MAXREDIRS", error_buffer, __FILE__, __LINE__);
684 res = curl_easy_setopt(ceh, CURLOPT_USERAGENT, hyrax_user_agent().c_str());
685 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_USERAGENT", error_buffer, __FILE__, __LINE__);
689 if (!d_rcr->get_validate_ssl() == 0) {
690 res = curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
691 check_setopt_result(res, prolog,
"CURLOPT_SSL_VERIFYPEER", error_buffer, __FILE__, __LINE__);
692 res = curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
693 check_setopt_result(res, prolog,
"CURLOPT_SSL_VERIFYHOST", error_buffer, __FILE__, __LINE__);
698 BESDEBUG(MODULE, prolog <<
"Curl version: " << curl_version() << endl);
699 res = curl_easy_setopt(ceh, CURLOPT_VERBOSE, 1L);
700 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_VERBOSE", error_buffer, __FILE__, __LINE__);
701 BESDEBUG(MODULE, prolog <<
"Curl in verbose mode." << endl);
703 res = curl_easy_setopt(ceh, CURLOPT_DEBUGFUNCTION, curl_debug);
704 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_DEBUGFUNCTION", error_buffer, __FILE__, __LINE__);
705 BESDEBUG(MODULE, prolog <<
"Curl debugging function installed." << endl);
709 unset_error_buffer(ceh);
711 curl::configure_curl_handle_for_proxy(ceh, target_url);
713 BESDEBUG(MODULE, prolog <<
"curl: " << (
void *) ceh << endl);
717 string get_range_arg_string(
const unsigned long long &offset,
const unsigned long long &size) {
719 range << offset <<
"-" << offset + size - 1;
720 BESDEBUG(MODULE, prolog <<
" range: " << range.str() << endl);
739 CURL *init_effective_url_retriever_handle(
const string &target_url,
struct curl_slist *req_headers,
740 vector<string> &resp_hdrs) {
741 char error_buffer[CURL_ERROR_SIZE];
747 ceh = curl::init(target_url, req_headers, &resp_hdrs);
749 set_error_buffer(ceh, error_buffer);
752 res = curl_easy_setopt(ceh, CURLOPT_RANGE, get_range_arg_string(0, 4).c_str());
753 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_RANGE", error_buffer, __FILE__, __LINE__);
755 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, writeNothing);
756 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEFUNCTION", error_buffer, __FILE__, __LINE__);
761 res = curl_easy_setopt(ceh, CURLOPT_WRITEHEADER, &resp_hdrs);
762 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEHEADER", error_buffer, __FILE__, __LINE__);
764 unset_error_buffer(ceh);
786 void http_get_and_write_resource(
const string &target_url,
788 vector<string> *http_response_headers) {
790 char error_buffer[CURL_ERROR_SIZE];
793 curl_slist *req_headers = NULL;
794 BuildHeaders header_builder;
796 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
799 string err = (string)
"The specified URL " + target_url
800 +
" does not match any of the accessible services in"
801 +
" the allowed hosts list.";
802 BESDEBUG(MODULE, prolog << err << endl);
807 req_headers = add_auth_headers(req_headers);
811 ceh = init(target_url, req_headers, http_response_headers);
813 set_error_buffer(ceh, error_buffer);
815 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, writeToOpenFileDescriptor);
816 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEFUNCTION", error_buffer, __FILE__, __LINE__);
818 #ifdef CURLOPT_WRITEDATA
819 res = curl_easy_setopt(ceh, CURLOPT_WRITEDATA, &fd);
820 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEDATA", error_buffer, __FILE__, __LINE__);
822 res = curl_easy_setopt(ceh, CURLOPT_FILE, &fd);
823 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_FILE", error_buffer, __FILE__, __LINE__);
825 unset_error_buffer(ceh);
827 super_easy_perform(ceh);
831 curl_slist_free_all(req_headers);
833 curl_easy_cleanup(ceh);
834 BESDEBUG(MODULE, prolog <<
"Called curl_easy_cleanup()." << endl);
838 curl_slist_free_all(req_headers);
840 curl_easy_cleanup(ceh);
843 BESDEBUG(MODULE, prolog <<
"END" << endl);
853 string error_message(
const CURLcode response_code,
char *error_buffer) {
854 std::ostringstream oss;
855 size_t len = strlen(error_buffer);
857 oss <<
"cURL_error_buffer: '" << error_buffer;
859 oss <<
"' cURL_message: '" << curl_easy_strerror(response_code);
860 oss <<
"' (code: " << (
int) response_code <<
")";
877 size_t c_write_data(
void *buffer,
size_t size,
size_t nmemb,
void *data) {
878 size_t nbytes = size * nmemb;
880 memcpy(data, buffer, nbytes);
890 std::string http_get_as_string(
const std::string &target_url) {
897 char response_buf[1024 * 1024];
899 http_get(target_url, response_buf);
900 string response(response_buf);
919 char response_buf[1024 * 1024];
921 curl::http_get(target_url, response_buf);
923 d.Parse(response_buf);
932 void http_get(
const std::string &target_url,
char *response_buf) {
934 char errbuf[CURL_ERROR_SIZE];
938 curl_slist *request_headers = NULL;
940 request_headers = add_auth_headers(request_headers);
944 ceh = curl::init(target_url, request_headers, NULL);
946 throw BESInternalError(
string(
"ERROR! Failed to acquire cURL Easy Handle! "), __FILE__, __LINE__);
949 set_error_buffer(ceh, errbuf);
952 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, c_write_data);
953 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEFUNCTION", errbuf, __FILE__, __LINE__);
956 res = curl_easy_setopt(ceh, CURLOPT_WRITEDATA,
reinterpret_cast<void *
>(response_buf));
957 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEDATA", errbuf, __FILE__, __LINE__);
959 unset_error_buffer(ceh);
961 super_easy_perform(ceh);
964 curl_slist_free_all(request_headers);
966 curl_easy_cleanup(ceh);
970 curl_slist_free_all(request_headers);
972 curl_easy_cleanup(ceh);
984 CURL *set_up_easy_handle(
const string &target_url,
struct curl_slist *request_headers,
char *response_buff) {
985 char errbuf[CURL_ERROR_SIZE];
989 d_handle = curl::init(target_url,request_headers,NULL);
991 throw BESInternalError(
string(
"ERROR! Failed to acquire cURL Easy Handle! "), __FILE__, __LINE__);
994 set_error_buffer(d_handle,errbuf);
997 res = curl_easy_setopt(d_handle, CURLOPT_WRITEFUNCTION, c_write_data);
998 check_setopt_result(res, prolog,
"CURLOPT_WRITEFUNCTION", errbuf, __FILE__, __LINE__);
1001 res = curl_easy_setopt(d_handle, CURLOPT_WRITEDATA,
reinterpret_cast<void *
>(response_buff));
1002 check_setopt_result(res, prolog,
"CURLOPT_WRITEDATA", errbuf, __FILE__, __LINE__);
1007 res = curl_easy_setopt(d_handle, CURLOPT_FOLLOWLOCATION, 1L);
1008 check_setopt_result(res, prolog,
"CURLOPT_FOLLOWLOCATION", errbuf, __FILE__, __LINE__);
1011 res = curl_easy_setopt(d_handle, CURLOPT_COOKIEFILE, cookies_file.c_str());
1012 check_setopt_result(res, prolog,
"CURLOPT_COOKIEFILE", errbuf, __FILE__, __LINE__);
1014 res = curl_easy_setopt(d_handle, CURLOPT_COOKIEJAR, cookies_file.c_str());
1015 check_setopt_result(res, prolog,
"CURLOPT_COOKIEJAR", errbuf, __FILE__, __LINE__);
1018 res = curl_easy_setopt(d_handle, CURLOPT_HTTPAUTH, (
long) CURLAUTH_ANY);
1019 check_setopt_result(res, prolog,
"CURLOPT_HTTPAUTH", errbuf, __FILE__, __LINE__);
1022 res = curl_easy_setopt(d_handle, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
1023 check_setopt_result(res, prolog,
"CURLOPT_NETRC", errbuf, __FILE__, __LINE__);
1026 string netrc_file = get_netrc_filename();
1027 if (!netrc_file.empty()) {
1028 res = curl_easy_setopt(d_handle, CURLOPT_NETRC_FILE, netrc_file.c_str());
1029 check_setopt_result(res, prolog,
"CURLOPT_NETRC_FILE", errbuf, __FILE__, __LINE__);
1032 VERBOSE(__FILE__ <<
"::get_easy_handle() is using the netrc file '"
1033 << ((!netrc_file.empty()) ? netrc_file :
"~/.netrc") <<
"'" << endl);
1036 unset_error_buffer(d_handle);
1061 void super_easy_perform(CURL *c_handle) {
1062 unsigned int attempts = 0;
1063 useconds_t retry_time = uone_second / 4;
1066 char curlErrorBuf[CURL_ERROR_SIZE];
1070 target_url = get_effective_url(c_handle, empty_str);
1072 if (target_url.empty())
1076 set_error_buffer(c_handle, curlErrorBuf);
1078 curlErrorBuf[0] = 0;
1080 BESDEBUG(MODULE, prolog <<
"Requesting URL: " << target_url <<
" attempt: " << attempts << endl);
1082 curl_code = curl_easy_perform(c_handle);
1083 success = eval_curl_easy_perform_code(c_handle, target_url, curl_code, curlErrorBuf, attempts);
1086 success = eval_http_get_response(c_handle, curlErrorBuf, target_url);
1091 if (attempts == retry_limit) {
1092 string msg = prolog +
"ERROR - Problem with data transfer. Number of re-tries exceeded. Giving up.";
1093 ERROR_LOG(msg << endl);
1097 ERROR_LOG(prolog <<
"ERROR - Problem with data transfer. Will retry (url: " << target_url <<
1098 " attempt: " << attempts <<
")." << endl);
1105 unset_error_buffer(c_handle);
1114 void read_data(CURL *c_handle) {
1116 unsigned int attempts = 0;
1117 useconds_t retry_time = uone_second / 4;
1120 char curlErrorBuf[CURL_ERROR_SIZE];
1123 curl_easy_getinfo(c_handle, CURLINFO_EFFECTIVE_URL, &urlp);
1132 set_error_buffer(c_handle, curlErrorBuf);
1137 BESDEBUG(MODULE, prolog <<
"Requesting URL: " << urlp <<
" attempt: " << attempts << endl);
1139 curl_code = curl_easy_perform(c_handle);
1140 success = eval_curl_easy_perform_code(c_handle, urlp, curl_code, curlErrorBuf, attempts);
1143 success = eval_http_get_response(c_handle, urlp);
1148 if (attempts == retry_limit) {
1149 string msg = prolog +
"ERROR - Problem with data transfer. Number of re-tries exceeded. Giving up.";
1154 LOG(prolog <<
"ERROR - Problem with data transfer. Will retry (url: " << urlp <<
1155 " attempt: " << attempts <<
")." << endl);
1165 curlErrorBuf[0] = 0;
1166 curl_code = curl_easy_perform(c_handle);
1169 if (CURLE_OK != curl_code) {
1171 string(
"read_data() - ERROR! Message: ").append(error_message(curl_code, curlErrorBuf)),
1172 __FILE__, __LINE__);
1175 success = eval_get_response(c_handle, urlp);
1178 if (tries == retry_limit) {
1179 string msg = prolog +
"Data transfer error: Number of re-tries exceeded: "+ error_message(curl_code, curlErrorBuf);
1186 ss <<
"HTTP transfer 500 error, will retry (trial " << tries <<
" for: " << urlp <<
").";
1187 BESDEBUG(MODULE, ss.str());
1196 unset_error_buffer(c_handle);
1200 string get_cookie_file_base() {
1202 string cookie_filename;
1205 cookie_filename = HTTP_DEFAULT_COOKIES_FILE;
1207 return cookie_filename;
1210 string get_cookie_filename() {
1211 string cookie_file_base = get_cookie_file_base();
1212 stringstream cf_with_pid;
1213 cf_with_pid << cookie_file_base <<
"-" << getpid();
1214 return cf_with_pid.str();
1217 void clear_cookies() {
1218 string cf = get_cookie_filename();
1219 int ret = unlink(cf.c_str());
1221 string msg = prolog +
"Failed to unlink the cookie file: " + cf;
1222 ERROR_LOG(msg << endl);
1223 BESDEBUG(MODULE, prolog << msg << endl);
1235 bool is_retryable(std::string target_url) {
1236 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
1237 bool retryable =
true;
1239 vector<string> nr_regexs;
1243 vector<string>::iterator it;
1244 for (it = nr_regexs.begin(); it != nr_regexs.end() && retryable; it++) {
1245 BESRegex no_retry_regex((*it).c_str(), (*it).size());
1246 size_t match_length;
1247 match_length = no_retry_regex.match(target_url.c_str(), target_url.size(), 0);
1248 if (match_length == target_url.size()) {
1249 BESDEBUG(MODULE, prolog <<
"The url: '" << target_url <<
"' fully matched the "
1250 << HTTP_NO_RETRY_URL_REGEX_KEY <<
": '" << *it <<
"'" << endl);
1255 BESDEBUG(MODULE, prolog <<
"END retryable: " << (retryable ?
"true" :
"false") << endl);
1293 bool eval_http_get_response(CURL *ceh,
char *error_buffer,
const string &requested_url) {
1294 BESDEBUG(MODULE, prolog <<
"Requested URL: " << requested_url << endl);
1296 string last_accessed_url = get_effective_url(ceh, requested_url);
1297 BESDEBUG(MODULE, prolog <<
"Last Accessed URL(CURLINFO_EFFECTIVE_URL): " << last_accessed_url << endl);
1301 curl_code = curl_easy_getinfo(ceh, CURLINFO_RESPONSE_CODE, &http_code);
1302 if (curl_code == CURLE_GOT_NOTHING) {
1307 msg << prolog <<
"ERROR - cURL returned CURLE_GOT_NOTHING. Message: '";
1308 msg << error_message(curl_code, error_buffer) <<
"' ";
1309 msg <<
"CURLINFO_EFFECTIVE_URL: " << last_accessed_url <<
" ";
1310 msg <<
"A retry may be possible for: " << requested_url <<
")." << endl;
1311 BESDEBUG(MODULE, msg.str());
1312 ERROR_LOG(msg.str());
1315 else if (curl_code != CURLE_OK) {
1318 string(
"Error acquiring HTTP response code: ").append(curl::error_message(curl_code, error_buffer)),
1319 __FILE__, __LINE__);
1324 curl_easy_getinfo(ceh, CURLINFO_REDIRECT_COUNT, &redirects);
1325 BESDEBUG(MODULE, prolog <<
"CURLINFO_REDIRECT_COUNT: " << redirects << endl);
1327 char *redirect_url = NULL;
1328 curl_easy_getinfo(ceh, CURLINFO_REDIRECT_URL, &redirect_url);
1330 BESDEBUG(MODULE, prolog <<
"CURLINFO_REDIRECT_URL: " << redirect_url << endl);
1334 if (http_code >= 400) {
1335 msg <<
"ERROR - The HTTP GET request for the source URL: " << requested_url <<
" FAILED. ";
1336 msg <<
"CURLINFO_EFFECTIVE_URL: " << last_accessed_url <<
" ";
1337 BESDEBUG(MODULE, prolog << msg.str() << endl);
1339 msg <<
"The response had an HTTP status of " << http_code;
1340 msg <<
" which means '" << http_status_to_string(http_code) <<
"'";
1343 switch (http_code) {
1351 ERROR_LOG(msg.str() << endl);
1357 ERROR_LOG(msg.str() << endl);
1361 ERROR_LOG(msg.str() << endl);
1365 ERROR_LOG(msg.str() << endl);
1374 if (!is_retryable(last_accessed_url)) {
1375 msg <<
" The semantics of this particular last accessed URL indicate that it should not be retried.";
1376 ERROR_LOG(msg.str() << endl);
1383 ERROR_LOG(msg.str() << endl);
1410 bool eval_curl_easy_perform_code(
1412 const string requested_url,
1415 const unsigned int attempt
1417 bool success =
true;
1418 string last_accessed_url = get_effective_url(ceh, requested_url);
1419 if (curl_code == CURLE_SSL_CONNECT_ERROR) {
1421 msg << prolog <<
"ERROR - cURL experienced a CURLE_SSL_CONNECT_ERROR error. Message: '";
1422 msg << error_message(curl_code, error_buffer) <<
"' ";
1423 msg <<
"CURLINFO_EFFECTIVE_URL: " << last_accessed_url <<
" ";
1424 msg <<
"A retry may be possible for: " << requested_url <<
" (attempt: " << attempt <<
")." << endl;
1425 BESDEBUG(MODULE, msg.str());
1426 ERROR_LOG(msg.str());
1429 else if (curl_code == CURLE_SSL_CACERT_BADFILE) {
1431 msg << prolog <<
"ERROR - cURL experienced a CURLE_SSL_CACERT_BADFILE error. Message: '";
1432 msg << error_message(curl_code, error_buffer) <<
"' ";
1433 msg <<
"CURLINFO_EFFECTIVE_URL: " << last_accessed_url <<
" ";
1434 msg <<
"A retry may be possible for: " << requested_url <<
" (attempt: " << attempt <<
")." << endl;
1435 BESDEBUG(MODULE, msg.str());
1436 ERROR_LOG(msg.str());
1439 else if (curl_code == CURLE_GOT_NOTHING) {
1444 msg << prolog <<
"ERROR - cURL returned CURLE_GOT_NOTHING. Message: ";
1445 msg << error_message(curl_code, error_buffer) <<
"' ";
1446 msg <<
"CURLINFO_EFFECTIVE_URL: " << last_accessed_url <<
" ";
1447 msg <<
"A retry may be possible for: " << requested_url <<
" (attempt: " << attempt <<
")." << endl;
1448 BESDEBUG(MODULE, msg.str());
1449 ERROR_LOG(msg.str());
1452 else if (CURLE_OK != curl_code) {
1454 msg <<
"ERROR - Problem with data transfer. Message: " << error_message(curl_code, error_buffer);
1455 string effective_url = get_effective_url(ceh, requested_url);
1456 msg <<
" CURLINFO_EFFECTIVE_URL: " << effective_url;
1457 BESDEBUG(MODULE, prolog << msg.str() << endl);
1458 ERROR_LOG(msg.str() << endl);
1472 void retrieve_effective_url(
const string &target_url,
string &last_accessed_url) {
1473 vector<string> resp_hdrs;
1476 curl_slist *request_headers = NULL;
1478 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
1481 request_headers = add_auth_headers(request_headers);
1485 prolog <<
"BESDebug::IsSet(" << MODULE <<
"): " << (
BESDebug::IsSet(MODULE) ?
"true" :
"false")
1487 BESDEBUG(MODULE, prolog <<
"BESDebug::IsSet(" << TIMING_LOG_KEY <<
"): "
1490 prolog <<
"BESLog::TheLog()->is_verbose(): " << (BESLog::TheLog()->is_verbose() ?
"true" :
"false")
1493 ceh = init_effective_url_retriever_handle(target_url, request_headers, resp_hdrs);
1498 BESLog::TheLog()->is_verbose()) {
1499 sw.
start(prolog +
" Following Redirects Starting With: " + target_url);
1501 super_easy_perform(ceh);
1505 last_accessed_url = get_effective_url(ceh, target_url);
1506 BESDEBUG(MODULE, prolog <<
"Last Accessed URL(CURLINFO_EFFECTIVE_URL): " << last_accessed_url << endl);
1508 prolog <<
"Source URL: '" << target_url <<
"' CURLINFO_EFFECTIVE_URL: '" << last_accessed_url <<
"'"
1511 if (request_headers)
1512 curl_slist_free_all(request_headers);
1514 curl_easy_cleanup(ceh);
1517 if (request_headers)
1518 curl_slist_free_all(request_headers);
1520 curl_easy_cleanup(ceh);
1536 vector<string> resp_hdrs;
1539 curl_slist *request_headers = NULL;
1541 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
1544 request_headers = add_auth_headers(request_headers);
1548 prolog <<
"BESDebug::IsSet(" << MODULE <<
"): " << (
BESDebug::IsSet(MODULE) ?
"true" :
"false")
1550 BESDEBUG(MODULE, prolog <<
"BESDebug::IsSet(" << TIMING_LOG_KEY <<
"): "
1552 BESDEBUG(MODULE, prolog <<
"BESLog::TheLog()->is_verbose(): "
1553 << (BESLog::TheLog()->is_verbose() ?
"true" :
"false") << endl);
1555 ceh = init_effective_url_retriever_handle(target_url, request_headers, resp_hdrs);
1560 BESLog::TheLog()->is_verbose()) {
1561 sw.
start(prolog +
" Following Redirects Starting With: " + target_url);
1563 super_easy_perform(ceh);
1567 string effective_url_str = get_effective_url(ceh, target_url);
1568 BESDEBUG(MODULE, prolog <<
"Last Accessed URL(CURLINFO_EFFECTIVE_URL): " << effective_url_str << endl);
1569 INFO_LOG(prolog <<
"Source URL: '" << target_url <<
"' CURLINFO_EFFECTIVE_URL: '" << effective_url_str
1573 auto *eurl =
new EffectiveUrl(effective_url_str, resp_hdrs);
1575 if (request_headers)
1576 curl_slist_free_all(request_headers);
1578 curl_easy_cleanup(ceh);
1583 if (request_headers)
1584 curl_slist_free_all(request_headers);
1586 curl_easy_cleanup(ceh);
1592 unsigned int attempts = 0;
1593 bool success =
true;
1594 useconds_t retry_time = uone_second / 4;
1596 char error_buffer[CURL_ERROR_SIZE];
1597 vector<string> resp_hdrs;
1601 struct curl_slist *request_headers = NULL;
1603 request_headers = get_auth_headers(request_headers);
1606 ceh = init_effective_url_retriever_handle(
url, request_headers, resp_hdrs);
1607 set_error_buffer(ceh, error_buffer);
1610 error_buffer[0] = 0;
1612 BESDEBUG(MODULE, prolog <<
"Requesting URL: " << target_url <<
" attempt: " << attempts << endl);
1614 curl_code = curl_easy_perform(ceh);
1615 success = eval_curl_easy_perform_code(ceh, target_url, curl_code, error_buffer, attempts);
1618 success = eval_http_get_response(ceh, target_url);
1620 if (attempts == retry_limit) {
1621 string msg = prolog +
1622 "ERROR - Problem with data transfer. Number of re-tries exceeded. Giving up.";
1626 LOG(prolog <<
"ERROR - Problem with data transfer. Will retry (url: " << target_url <<
1627 " attempt: " << attempts <<
")." << endl);
1638 char *effective_url = 0;
1639 curl_easy_getinfo(ceh, CURLINFO_EFFECTIVE_URL, &effective_url);
1640 BESDEBUG(MODULE, prolog <<
" CURLINFO_EFFECTIVE_URL: " << effective_url << endl);
1641 last_accessed_url = effective_url;
1643 LOG(prolog <<
"Source URL: '" << target_url <<
"' Last Accessed URL: '" << last_accessed_url <<
"'" << endl);
1645 unset_error_buffer(ceh);
1648 curl_slist_free_all(request_headers);
1649 curl_easy_cleanup(ceh);
1654 if (request_headers)
1655 curl_slist_free_all(request_headers);
1657 curl_easy_cleanup(ceh);
1675 string get_netrc_filename() {
1676 string netrc_filename;
1680 BESDEBUG(MODULE, prolog <<
"Using netrc file: " << netrc_filename << endl);
1683 BESDEBUG(MODULE, prolog <<
"Using default netrc file. (~/.netrc)" << endl);
1685 return netrc_filename;
1693 void set_error_buffer(CURL *ceh,
char *error_buffer) {
1695 res = curl_easy_setopt(ceh, CURLOPT_ERRORBUFFER, error_buffer);
1696 curl::eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_ERRORBUFFER", error_buffer, __FILE__, __LINE__);
1704 void unset_error_buffer(CURL *ceh) {
1705 set_error_buffer(ceh, NULL);
1713 string hyrax_user_agent() {
1733 void eval_curl_easy_setopt_result(
1739 unsigned int line) {
1740 if (curl_code != CURLE_OK) {
1742 msg << msg_base <<
"ERROR - cURL failed to set " << opt_name <<
" Message: " << curl::error_message(curl_code, ebuf);
1747 unsigned long max_redirects() {
1762 curl_slist *append_http_header(curl_slist *slist,
const string &header_name,
const string &value)
1765 string full_header = header_name;
1766 full_header.append(
": ").append(value);
1768 BESDEBUG(MODULE, prolog << full_header << endl);
1771 struct curl_slist *temp = curl_slist_append(slist, full_header.c_str());
1774 msg << prolog <<
"Encountered cURL Error setting the " << header_name <<
" header. full_header: " << full_header;
1811 curl_slist *add_auth_headers(curl_slist *request_headers) {
1815 s = BESContextManager::TheManager()->
get_context(EDL_UID_KEY, found);
1816 if (found && !s.empty()) {
1817 request_headers = append_http_header(request_headers,
"User-Id",s);
1820 s = BESContextManager::TheManager()->
get_context(EDL_AUTH_TOKEN_KEY, found);
1821 if (found && !s.empty()) {
1822 request_headers = append_http_header(request_headers,
"Authorization",s);
1825 s = BESContextManager::TheManager()->
get_context(EDL_ECHO_TOKEN_KEY, found);
1826 if (found && !s.empty()) {
1827 request_headers = append_http_header(request_headers,
"Echo-Token",s);
1830 return request_headers;
1840 string get_effective_url(CURL *ceh,
string requested_url) {
1841 char *effectve_url = NULL;
1842 CURLcode curl_code = curl_easy_getinfo(ceh, CURLINFO_EFFECTIVE_URL, &effectve_url);
1843 if (curl_code != CURLE_OK) {
1845 msg << prolog <<
"Unable to determine CURLINFO_EFFECTIVE_URL! Requested URL: " << requested_url;
1846 BESDEBUG(MODULE, msg.str() << endl);
1849 return effectve_url;
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
virtual bool start(std::string name)
error thrown if there is a user syntax error in the request or any other user error
error thrown if there is a user syntax error in the request or any other user error
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
static AllowedHosts * theHosts()
Static accessor for the singleton.
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
utility class for the HTTP catalog module
size_t load_max_redirects_from_keys()