source: MondoRescue/branches/2.2.9/mindi-busybox/networking/wget.c@ 2725

Last change on this file since 2725 was 2725, checked in by Bruno Cornec, 13 years ago
  • Update mindi-busybox to 1.18.3 to avoid problems with the tar command which is now failing on recent versions with busybox 1.7.3
File size: 22.9 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP or FTP
4 *
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
10 */
11#include "libbb.h"
12
13struct host_info {
14 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
16 const char *path;
17 const char *user;
18 char *host;
19 int port;
20 smallint is_ftp;
21};
22
23
24/* Globals */
25struct globals {
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28#if ENABLE_FEATURE_WGET_STATUSBAR
29 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
31 bb_progress_t pmt;
32#endif
33#if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
35#endif
36 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
38} FIX_ALIASING;
39#define G (*(struct globals*)&bb_common_bufsiz1)
40struct BUG_G_too_big {
41 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
42};
43#define INIT_G() do { \
44 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
45} while (0)
46
47
48/* Must match option string! */
49enum {
50 WGET_OPT_CONTINUE = (1 << 0),
51 WGET_OPT_SPIDER = (1 << 1),
52 WGET_OPT_QUIET = (1 << 2),
53 WGET_OPT_OUTNAME = (1 << 3),
54 WGET_OPT_PREFIX = (1 << 4),
55 WGET_OPT_PROXY = (1 << 5),
56 WGET_OPT_USER_AGENT = (1 << 6),
57 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
58 WGET_OPT_RETRIES = (1 << 8),
59 WGET_OPT_PASSIVE = (1 << 9),
60 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
61 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
62};
63
64enum {
65 PROGRESS_START = -1,
66 PROGRESS_END = 0,
67 PROGRESS_BUMP = 1,
68};
69#if ENABLE_FEATURE_WGET_STATUSBAR
70static void progress_meter(int flag)
71{
72 if (option_mask32 & WGET_OPT_QUIET)
73 return;
74
75 if (flag == PROGRESS_START)
76 bb_progress_init(&G.pmt);
77
78 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
79 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
80
81 if (flag == PROGRESS_END) {
82 bb_putchar_stderr('\n');
83 G.transferred = 0;
84 }
85}
86#else
87static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
88#endif
89
90
91/* IPv6 knows scoped address types i.e. link and site local addresses. Link
92 * local addresses can have a scope identifier to specify the
93 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
94 * identifier is only valid on a single node.
95 *
96 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
97 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
98 * in the Host header as invalid requests, see
99 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
100 */
101static void strip_ipv6_scope_id(char *host)
102{
103 char *scope, *cp;
104
105 /* bbox wget actually handles IPv6 addresses without [], like
106 * wget "http://::1/xxx", but this is not standard.
107 * To save code, _here_ we do not support it. */
108
109 if (host[0] != '[')
110 return; /* not IPv6 */
111
112 scope = strchr(host, '%');
113 if (!scope)
114 return;
115
116 /* Remove the IPv6 zone identifier from the host address */
117 cp = strchr(host, ']');
118 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
119 /* malformed address (not "[xx]:nn" or "[xx]") */
120 return;
121 }
122
123 /* cp points to "]...", scope points to "%eth0]..." */
124 overlapping_strcpy(scope, cp);
125}
126
127/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
128 * and a short count if an eof or non-interrupt error is encountered. */
129static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
130{
131 size_t ret;
132 char *p = (char*)ptr;
133
134 do {
135 clearerr(stream);
136 errno = 0;
137 ret = fread(p, 1, nmemb, stream);
138 p += ret;
139 nmemb -= ret;
140 } while (nmemb && ferror(stream) && errno == EINTR);
141
142 return p - (char*)ptr;
143}
144
145/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
146 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
147static char *safe_fgets(char *s, int size, FILE *stream)
148{
149 char *ret;
150
151 do {
152 clearerr(stream);
153 errno = 0;
154 ret = fgets(s, size, stream);
155 } while (ret == NULL && ferror(stream) && errno == EINTR);
156
157 return ret;
158}
159
160#if ENABLE_FEATURE_WGET_AUTHENTICATION
161/* Base64-encode character string. buf is assumed to be char buf[512]. */
162static char *base64enc_512(char buf[512], const char *str)
163{
164 unsigned len = strlen(str);
165 if (len > 512/4*3 - 10) /* paranoia */
166 len = 512/4*3 - 10;
167 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
168 return buf;
169}
170#endif
171
172static char* sanitize_string(char *s)
173{
174 unsigned char *p = (void *) s;
175 while (*p >= ' ')
176 p++;
177 *p = '\0';
178 return s;
179}
180
181static FILE *open_socket(len_and_sockaddr *lsa)
182{
183 FILE *fp;
184
185 /* glibc 2.4 seems to try seeking on it - ??! */
186 /* hopefully it understands what ESPIPE means... */
187 fp = fdopen(xconnect_stream(lsa), "r+");
188 if (fp == NULL)
189 bb_perror_msg_and_die("fdopen");
190
191 return fp;
192}
193
194static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
195{
196 int result;
197 if (s1) {
198 if (!s2) s2 = "";
199 fprintf(fp, "%s%s\r\n", s1, s2);
200 fflush(fp);
201 }
202
203 do {
204 char *buf_ptr;
205
206 if (fgets(buf, 510, fp) == NULL) {
207 bb_perror_msg_and_die("error getting response");
208 }
209 buf_ptr = strstr(buf, "\r\n");
210 if (buf_ptr) {
211 *buf_ptr = '\0';
212 }
213 } while (!isdigit(buf[0]) || buf[3] != ' ');
214
215 buf[3] = '\0';
216 result = xatoi_positive(buf);
217 buf[3] = ' ';
218 return result;
219}
220
221static void parse_url(char *src_url, struct host_info *h)
222{
223 char *url, *p, *sp;
224
225 /* h->allocated = */ url = xstrdup(src_url);
226
227 if (strncmp(url, "http://", 7) == 0) {
228 h->port = bb_lookup_port("http", "tcp", 80);
229 h->host = url + 7;
230 h->is_ftp = 0;
231 } else if (strncmp(url, "ftp://", 6) == 0) {
232 h->port = bb_lookup_port("ftp", "tcp", 21);
233 h->host = url + 6;
234 h->is_ftp = 1;
235 } else
236 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
237
238 // FYI:
239 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
240 // 'GET /?var=a/b HTTP 1.0'
241 // and saves 'index.html?var=a%2Fb' (we save 'b')
242 // wget 'http://busybox.net?login=john@doe':
243 // request: 'GET /?login=john@doe HTTP/1.0'
244 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
245 // wget 'http://busybox.net#test/test':
246 // request: 'GET / HTTP/1.0'
247 // saves: 'index.html' (we save 'test')
248 //
249 // We also don't add unique .N suffix if file exists...
250 sp = strchr(h->host, '/');
251 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
252 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
253 if (!sp) {
254 h->path = "";
255 } else if (*sp == '/') {
256 *sp = '\0';
257 h->path = sp + 1;
258 } else { // '#' or '?'
259 // http://busybox.net?login=john@doe is a valid URL
260 // memmove converts to:
261 // http:/busybox.nett?login=john@doe...
262 memmove(h->host - 1, h->host, sp - h->host);
263 h->host--;
264 sp[-1] = '\0';
265 h->path = sp;
266 }
267
268 // We used to set h->user to NULL here, but this interferes
269 // with handling of code 302 ("object was moved")
270
271 sp = strrchr(h->host, '@');
272 if (sp != NULL) {
273 h->user = h->host;
274 *sp = '\0';
275 h->host = sp + 1;
276 }
277
278 sp = h->host;
279}
280
281static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
282{
283 char *s, *hdrval;
284 int c;
285
286 /* *istrunc = 0; */
287
288 /* retrieve header line */
289 if (fgets(buf, bufsiz, fp) == NULL)
290 return NULL;
291
292 /* see if we are at the end of the headers */
293 for (s = buf; *s == '\r'; ++s)
294 continue;
295 if (*s == '\n')
296 return NULL;
297
298 /* convert the header name to lower case */
299 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
300 /* tolower for "A-Z", no-op for "0-9a-z-." */
301 *s = (*s | 0x20);
302 }
303
304 /* verify we are at the end of the header name */
305 if (*s != ':')
306 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
307
308 /* locate the start of the header value */
309 *s++ = '\0';
310 hdrval = skip_whitespace(s);
311
312 /* locate the end of header */
313 while (*s && *s != '\r' && *s != '\n')
314 ++s;
315
316 /* end of header found */
317 if (*s) {
318 *s = '\0';
319 return hdrval;
320 }
321
322 /* Rats! The buffer isn't big enough to hold the entire header value */
323 while (c = getc(fp), c != EOF && c != '\n')
324 continue;
325 /* *istrunc = 1; */
326 return hdrval;
327}
328
329#if ENABLE_FEATURE_WGET_LONG_OPTIONS
330static char *URL_escape(const char *str)
331{
332 /* URL encode, see RFC 2396 */
333 char *dst;
334 char *res = dst = xmalloc(strlen(str) * 3 + 1);
335 unsigned char c;
336
337 while (1) {
338 c = *str++;
339 if (c == '\0'
340 /* || strchr("!&'()*-.=_~", c) - more code */
341 || c == '!'
342 || c == '&'
343 || c == '\''
344 || c == '('
345 || c == ')'
346 || c == '*'
347 || c == '-'
348 || c == '.'
349 || c == '='
350 || c == '_'
351 || c == '~'
352 || (c >= '0' && c <= '9')
353 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
354 ) {
355 *dst++ = c;
356 if (c == '\0')
357 return res;
358 } else {
359 *dst++ = '%';
360 *dst++ = bb_hexdigits_upcase[c >> 4];
361 *dst++ = bb_hexdigits_upcase[c & 0xf];
362 }
363 }
364}
365#endif
366
367static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
368{
369 char buf[512];
370 FILE *sfp;
371 char *str;
372 int port;
373
374 if (!target->user)
375 target->user = xstrdup("anonymous:busybox@");
376
377 sfp = open_socket(lsa);
378 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
379 bb_error_msg_and_die("%s", sanitize_string(buf+4));
380
381 /*
382 * Splitting username:password pair,
383 * trying to log in
384 */
385 str = strchr(target->user, ':');
386 if (str)
387 *str++ = '\0';
388 switch (ftpcmd("USER ", target->user, sfp, buf)) {
389 case 230:
390 break;
391 case 331:
392 if (ftpcmd("PASS ", str, sfp, buf) == 230)
393 break;
394 /* fall through (failed login) */
395 default:
396 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
397 }
398
399 ftpcmd("TYPE I", NULL, sfp, buf);
400
401 /*
402 * Querying file size
403 */
404 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
405 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
406 if (G.content_len < 0 || errno) {
407 bb_error_msg_and_die("SIZE value is garbage");
408 }
409 G.got_clen = 1;
410 }
411
412 /*
413 * Entering passive mode
414 */
415 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
416 pasv_error:
417 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
418 }
419 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
420 // Server's IP is N1.N2.N3.N4 (we ignore it)
421 // Server's port for data connection is P1*256+P2
422 str = strrchr(buf, ')');
423 if (str) str[0] = '\0';
424 str = strrchr(buf, ',');
425 if (!str) goto pasv_error;
426 port = xatou_range(str+1, 0, 255);
427 *str = '\0';
428 str = strrchr(buf, ',');
429 if (!str) goto pasv_error;
430 port += xatou_range(str+1, 0, 255) * 256;
431 set_nport(lsa, htons(port));
432
433 *dfpp = open_socket(lsa);
434
435 if (G.beg_range) {
436 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
437 if (ftpcmd(buf, NULL, sfp, buf) == 350)
438 G.content_len -= G.beg_range;
439 }
440
441 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
442 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
443
444 return sfp;
445}
446
447static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
448{
449 char buf[512];
450#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
451# if ENABLE_FEATURE_WGET_TIMEOUT
452 unsigned second_cnt;
453# endif
454 struct pollfd polldata;
455
456 polldata.fd = fileno(dfp);
457 polldata.events = POLLIN | POLLPRI;
458 ndelay_on(polldata.fd);
459#endif
460 progress_meter(PROGRESS_START);
461
462 if (G.chunked)
463 goto get_clen;
464
465 /* Loops only if chunked */
466 while (1) {
467 while (1) {
468 int n;
469 unsigned rdsz;
470
471 rdsz = sizeof(buf);
472 if (G.got_clen) {
473 if (G.content_len < (off_t)sizeof(buf)) {
474 if ((int)G.content_len <= 0)
475 break;
476 rdsz = (unsigned)G.content_len;
477 }
478 }
479#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
480# if ENABLE_FEATURE_WGET_TIMEOUT
481 second_cnt = G.timeout_seconds;
482# endif
483 while (1) {
484 if (safe_poll(&polldata, 1, 1000) != 0)
485 break; /* error, EOF, or data is available */
486# if ENABLE_FEATURE_WGET_TIMEOUT
487 if (second_cnt != 0 && --second_cnt == 0) {
488 progress_meter(PROGRESS_END);
489 bb_perror_msg_and_die("download timed out");
490 }
491# endif
492 /* Needed for "stalled" indicator */
493 progress_meter(PROGRESS_BUMP);
494 }
495#endif
496 n = safe_fread(buf, rdsz, dfp);
497 if (n <= 0) {
498 if (ferror(dfp)) {
499 /* perror will not work: ferror doesn't set errno */
500 bb_error_msg_and_die(bb_msg_read_error);
501 }
502 break;
503 }
504 xwrite(output_fd, buf, n);
505#if ENABLE_FEATURE_WGET_STATUSBAR
506 G.transferred += n;
507 progress_meter(PROGRESS_BUMP);
508#endif
509 if (G.got_clen)
510 G.content_len -= n;
511 }
512
513 if (!G.chunked)
514 break;
515
516 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
517 get_clen:
518 safe_fgets(buf, sizeof(buf), dfp);
519 G.content_len = STRTOOFF(buf, NULL, 16);
520 /* FIXME: error check? */
521 if (G.content_len == 0)
522 break; /* all done! */
523 G.got_clen = 1;
524 }
525
526 progress_meter(PROGRESS_END);
527}
528
529int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
530int wget_main(int argc UNUSED_PARAM, char **argv)
531{
532 char buf[512];
533 struct host_info server, target;
534 len_and_sockaddr *lsa;
535 unsigned opt;
536 int redir_limit;
537 char *proxy = NULL;
538 char *dir_prefix = NULL;
539#if ENABLE_FEATURE_WGET_LONG_OPTIONS
540 char *post_data;
541 char *extra_headers = NULL;
542 llist_t *headers_llist = NULL;
543#endif
544 FILE *sfp; /* socket to web/ftp server */
545 FILE *dfp; /* socket to ftp server (data) */
546 char *fname_out; /* where to direct output (-O) */
547 int output_fd = -1;
548 bool use_proxy; /* Use proxies if env vars are set */
549 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
550 const char *user_agent = "Wget";/* "User-Agent" header field */
551
552 static const char keywords[] ALIGN1 =
553 "content-length\0""transfer-encoding\0""chunked\0""location\0";
554 enum {
555 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
556 };
557#if ENABLE_FEATURE_WGET_LONG_OPTIONS
558 static const char wget_longopts[] ALIGN1 =
559 /* name, has_arg, val */
560 "continue\0" No_argument "c"
561 "spider\0" No_argument "s"
562 "quiet\0" No_argument "q"
563 "output-document\0" Required_argument "O"
564 "directory-prefix\0" Required_argument "P"
565 "proxy\0" Required_argument "Y"
566 "user-agent\0" Required_argument "U"
567#if ENABLE_FEATURE_WGET_TIMEOUT
568 "timeout\0" Required_argument "T"
569#endif
570 /* Ignored: */
571 // "tries\0" Required_argument "t"
572 /* Ignored (we always use PASV): */
573 "passive-ftp\0" No_argument "\xff"
574 "header\0" Required_argument "\xfe"
575 "post-data\0" Required_argument "\xfd"
576 /* Ignored (we don't do ssl) */
577 "no-check-certificate\0" No_argument "\xfc"
578 ;
579#endif
580
581 INIT_G();
582
583#if ENABLE_FEATURE_WGET_LONG_OPTIONS
584 applet_long_options = wget_longopts;
585#endif
586 /* server.allocated = target.allocated = NULL; */
587 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
588 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
589 &fname_out, &dir_prefix,
590 &proxy_flag, &user_agent,
591 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
592 NULL /* -t RETRIES */
593 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
594 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
595 );
596#if ENABLE_FEATURE_WGET_LONG_OPTIONS
597 if (headers_llist) {
598 int size = 1;
599 char *cp;
600 llist_t *ll = headers_llist;
601 while (ll) {
602 size += strlen(ll->data) + 2;
603 ll = ll->link;
604 }
605 extra_headers = cp = xmalloc(size);
606 while (headers_llist) {
607 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
608 }
609 }
610#endif
611
612 /* TODO: compat issue: should handle "wget URL1 URL2..." */
613
614 target.user = NULL;
615 parse_url(argv[optind], &target);
616
617 /* Use the proxy if necessary */
618 use_proxy = (strcmp(proxy_flag, "off") != 0);
619 if (use_proxy) {
620 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
621 if (proxy && proxy[0]) {
622 server.user = NULL;
623 parse_url(proxy, &server);
624 } else {
625 use_proxy = 0;
626 }
627 }
628 if (!use_proxy) {
629 server.port = target.port;
630 if (ENABLE_FEATURE_IPV6) {
631 server.host = xstrdup(target.host);
632 } else {
633 server.host = target.host;
634 }
635 }
636
637 if (ENABLE_FEATURE_IPV6)
638 strip_ipv6_scope_id(target.host);
639
640 /* Guess an output filename, if there was no -O FILE */
641 if (!(opt & WGET_OPT_OUTNAME)) {
642 fname_out = bb_get_last_path_component_nostrip(target.path);
643 /* handle "wget http://kernel.org//" */
644 if (fname_out[0] == '/' || !fname_out[0])
645 fname_out = (char*)"index.html";
646 /* -P DIR is considered only if there was no -O FILE */
647 if (dir_prefix)
648 fname_out = concat_path_file(dir_prefix, fname_out);
649 } else {
650 if (LONE_DASH(fname_out)) {
651 /* -O - */
652 output_fd = 1;
653 opt &= ~WGET_OPT_CONTINUE;
654 }
655 }
656#if ENABLE_FEATURE_WGET_STATUSBAR
657 G.curfile = bb_get_last_path_component_nostrip(fname_out);
658#endif
659
660 /* Impossible?
661 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
662 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
663 */
664
665 /* Determine where to start transfer */
666 if (opt & WGET_OPT_CONTINUE) {
667 output_fd = open(fname_out, O_WRONLY);
668 if (output_fd >= 0) {
669 G.beg_range = xlseek(output_fd, 0, SEEK_END);
670 }
671 /* File doesn't exist. We do not create file here yet.
672 * We are not sure it exists on remove side */
673 }
674
675 redir_limit = 5;
676 resolve_lsa:
677 lsa = xhost2sockaddr(server.host, server.port);
678 if (!(opt & WGET_OPT_QUIET)) {
679 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
680 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
681 free(s);
682 }
683 establish_session:
684 if (use_proxy || !target.is_ftp) {
685 /*
686 * HTTP session
687 */
688 char *str;
689 int status;
690
691 /* Open socket to http server */
692 sfp = open_socket(lsa);
693
694 /* Send HTTP request */
695 if (use_proxy) {
696 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
697 target.is_ftp ? "f" : "ht", target.host,
698 target.path);
699 } else {
700 if (opt & WGET_OPT_POST_DATA)
701 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
702 else
703 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
704 }
705
706 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
707 target.host, user_agent);
708
709#if ENABLE_FEATURE_WGET_AUTHENTICATION
710 if (target.user) {
711 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
712 base64enc_512(buf, target.user));
713 }
714 if (use_proxy && server.user) {
715 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
716 base64enc_512(buf, server.user));
717 }
718#endif
719
720 if (G.beg_range)
721 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
722#if ENABLE_FEATURE_WGET_LONG_OPTIONS
723 if (extra_headers)
724 fputs(extra_headers, sfp);
725
726 if (opt & WGET_OPT_POST_DATA) {
727 char *estr = URL_escape(post_data);
728 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
729 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
730 (int) strlen(estr), estr);
731 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
732 /*fprintf(sfp, "%s\r\n", estr);*/
733 free(estr);
734 } else
735#endif
736 { /* If "Connection:" is needed, document why */
737 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
738 }
739
740 fflush(sfp);
741
742 /*
743 * Retrieve HTTP response line and check for "200" status code.
744 */
745 read_response:
746 if (fgets(buf, sizeof(buf), sfp) == NULL)
747 bb_error_msg_and_die("no response from server");
748
749 str = buf;
750 str = skip_non_whitespace(str);
751 str = skip_whitespace(str);
752 // FIXME: no error check
753 // xatou wouldn't work: "200 OK"
754 status = atoi(str);
755 switch (status) {
756 case 0:
757 case 100:
758 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
759 /* eat all remaining headers */;
760 goto read_response;
761 case 200:
762/*
763Response 204 doesn't say "null file", it says "metadata
764has changed but data didn't":
765
766"10.2.5 204 No Content
767The server has fulfilled the request but does not need to return
768an entity-body, and might want to return updated metainformation.
769The response MAY include new or updated metainformation in the form
770of entity-headers, which if present SHOULD be associated with
771the requested variant.
772
773If the client is a user agent, it SHOULD NOT change its document
774view from that which caused the request to be sent. This response
775is primarily intended to allow input for actions to take place
776without causing a change to the user agent's active document view,
777although any new or updated metainformation SHOULD be applied
778to the document currently in the user agent's active view.
779
780The 204 response MUST NOT include a message-body, and thus
781is always terminated by the first empty line after the header fields."
782
783However, in real world it was observed that some web servers
784(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
785*/
786 case 204:
787 break;
788 case 300: /* redirection */
789 case 301:
790 case 302:
791 case 303:
792 break;
793 case 206:
794 if (G.beg_range)
795 break;
796 /* fall through */
797 default:
798 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
799 }
800
801 /*
802 * Retrieve HTTP headers.
803 */
804 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
805 /* gethdr converted "FOO:" string to lowercase */
806 smalluint key;
807 /* strip trailing whitespace */
808 char *s = strchrnul(str, '\0') - 1;
809 while (s >= str && (*s == ' ' || *s == '\t')) {
810 *s = '\0';
811 s--;
812 }
813 key = index_in_strings(keywords, buf) + 1;
814 if (key == KEY_content_length) {
815 G.content_len = BB_STRTOOFF(str, NULL, 10);
816 if (G.content_len < 0 || errno) {
817 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
818 }
819 G.got_clen = 1;
820 continue;
821 }
822 if (key == KEY_transfer_encoding) {
823 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
824 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
825 G.chunked = G.got_clen = 1;
826 }
827 if (key == KEY_location && status >= 300) {
828 if (--redir_limit == 0)
829 bb_error_msg_and_die("too many redirections");
830 fclose(sfp);
831 G.got_clen = 0;
832 G.chunked = 0;
833 if (str[0] == '/')
834 /* free(target.allocated); */
835 target.path = /* target.allocated = */ xstrdup(str+1);
836 /* lsa stays the same: it's on the same server */
837 else {
838 parse_url(str, &target);
839 if (!use_proxy) {
840 server.host = target.host;
841 /* strip_ipv6_scope_id(target.host); - no! */
842 /* we assume remote never gives us IPv6 addr with scope id */
843 server.port = target.port;
844 free(lsa);
845 goto resolve_lsa;
846 } /* else: lsa stays the same: we use proxy */
847 }
848 goto establish_session;
849 }
850 }
851// if (status >= 300)
852// bb_error_msg_and_die("bad redirection (no Location: header from server)");
853
854 /* For HTTP, data is pumped over the same connection */
855 dfp = sfp;
856
857 } else {
858 /*
859 * FTP session
860 */
861 sfp = prepare_ftp_session(&dfp, &target, lsa);
862 }
863
864 if (opt & WGET_OPT_SPIDER) {
865 if (ENABLE_FEATURE_CLEAN_UP)
866 fclose(sfp);
867 return EXIT_SUCCESS;
868 }
869
870 if (output_fd < 0) {
871 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
872 /* compat with wget: -O FILE can overwrite */
873 if (opt & WGET_OPT_OUTNAME)
874 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
875 output_fd = xopen(fname_out, o_flags);
876 }
877
878 retrieve_file_data(dfp, output_fd);
879 xclose(output_fd);
880
881 if (dfp != sfp) {
882 /* It's ftp. Close it properly */
883 fclose(dfp);
884 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
885 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
886 /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
887 }
888
889 return EXIT_SUCCESS;
890}
Note: See TracBrowser for help on using the repository browser.