source: MondoRescue/branches/2.2.9/mindi-busybox/networking/wget.c@ 2859

Last change on this file since 2859 was 2859, checked in by Bruno Cornec, 13 years ago
  • Update to upstream busybox 1.18.5
File size: 23.9 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP or FTP
4 *
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
10 */
11#include "libbb.h"
12
13struct host_info {
14 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
16 const char *path;
17 const char *user;
18 char *host;
19 int port;
20 smallint is_ftp;
21};
22
23
24/* Globals */
25struct globals {
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28#if ENABLE_FEATURE_WGET_STATUSBAR
29 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
31 bb_progress_t pmt;
32#endif
33#if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
35#endif
36 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
38} FIX_ALIASING;
39#define G (*(struct globals*)&bb_common_bufsiz1)
40struct BUG_G_too_big {
41 char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
42};
43#define INIT_G() do { \
44 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
45} while (0)
46
47
48/* Must match option string! */
49enum {
50 WGET_OPT_CONTINUE = (1 << 0),
51 WGET_OPT_SPIDER = (1 << 1),
52 WGET_OPT_QUIET = (1 << 2),
53 WGET_OPT_OUTNAME = (1 << 3),
54 WGET_OPT_PREFIX = (1 << 4),
55 WGET_OPT_PROXY = (1 << 5),
56 WGET_OPT_USER_AGENT = (1 << 6),
57 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
58 WGET_OPT_RETRIES = (1 << 8),
59 WGET_OPT_PASSIVE = (1 << 9),
60 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
61 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
62};
63
64enum {
65 PROGRESS_START = -1,
66 PROGRESS_END = 0,
67 PROGRESS_BUMP = 1,
68};
69#if ENABLE_FEATURE_WGET_STATUSBAR
70static void progress_meter(int flag)
71{
72 if (option_mask32 & WGET_OPT_QUIET)
73 return;
74
75 if (flag == PROGRESS_START)
76 bb_progress_init(&G.pmt);
77
78 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
79 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
80
81 if (flag == PROGRESS_END) {
82 bb_putchar_stderr('\n');
83 G.transferred = 0;
84 }
85}
86#else
87static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
88#endif
89
90
91/* IPv6 knows scoped address types i.e. link and site local addresses. Link
92 * local addresses can have a scope identifier to specify the
93 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
94 * identifier is only valid on a single node.
95 *
96 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
97 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
98 * in the Host header as invalid requests, see
99 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
100 */
101static void strip_ipv6_scope_id(char *host)
102{
103 char *scope, *cp;
104
105 /* bbox wget actually handles IPv6 addresses without [], like
106 * wget "http://::1/xxx", but this is not standard.
107 * To save code, _here_ we do not support it. */
108
109 if (host[0] != '[')
110 return; /* not IPv6 */
111
112 scope = strchr(host, '%');
113 if (!scope)
114 return;
115
116 /* Remove the IPv6 zone identifier from the host address */
117 cp = strchr(host, ']');
118 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
119 /* malformed address (not "[xx]:nn" or "[xx]") */
120 return;
121 }
122
123 /* cp points to "]...", scope points to "%eth0]..." */
124 overlapping_strcpy(scope, cp);
125}
126
127/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
128 * and a short count if an eof or non-interrupt error is encountered. */
129static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
130{
131 size_t ret;
132 char *p = (char*)ptr;
133
134 do {
135 clearerr(stream);
136 errno = 0;
137 ret = fread(p, 1, nmemb, stream);
138 p += ret;
139 nmemb -= ret;
140 } while (nmemb && ferror(stream) && errno == EINTR);
141
142 return p - (char*)ptr;
143}
144
145/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
146 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
147static char *safe_fgets(char *s, int size, FILE *stream)
148{
149 char *ret;
150
151 do {
152 clearerr(stream);
153 errno = 0;
154 ret = fgets(s, size, stream);
155 } while (ret == NULL && ferror(stream) && errno == EINTR);
156
157 return ret;
158}
159
160#if ENABLE_FEATURE_WGET_AUTHENTICATION
161/* Base64-encode character string. buf is assumed to be char buf[512]. */
162static char *base64enc_512(char buf[512], const char *str)
163{
164 unsigned len = strlen(str);
165 if (len > 512/4*3 - 10) /* paranoia */
166 len = 512/4*3 - 10;
167 bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
168 return buf;
169}
170#endif
171
172static char* sanitize_string(char *s)
173{
174 unsigned char *p = (void *) s;
175 while (*p >= ' ')
176 p++;
177 *p = '\0';
178 return s;
179}
180
181static FILE *open_socket(len_and_sockaddr *lsa)
182{
183 FILE *fp;
184
185 /* glibc 2.4 seems to try seeking on it - ??! */
186 /* hopefully it understands what ESPIPE means... */
187 fp = fdopen(xconnect_stream(lsa), "r+");
188 if (fp == NULL)
189 bb_perror_msg_and_die("fdopen");
190
191 return fp;
192}
193
194static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
195{
196 int result;
197 if (s1) {
198 if (!s2) s2 = "";
199 fprintf(fp, "%s%s\r\n", s1, s2);
200 fflush(fp);
201 }
202
203 do {
204 char *buf_ptr;
205
206 if (fgets(buf, 510, fp) == NULL) {
207 bb_perror_msg_and_die("error getting response");
208 }
209 buf_ptr = strstr(buf, "\r\n");
210 if (buf_ptr) {
211 *buf_ptr = '\0';
212 }
213 } while (!isdigit(buf[0]) || buf[3] != ' ');
214
215 buf[3] = '\0';
216 result = xatoi_positive(buf);
217 buf[3] = ' ';
218 return result;
219}
220
221static void parse_url(char *src_url, struct host_info *h)
222{
223 char *url, *p, *sp;
224
225 /* h->allocated = */ url = xstrdup(src_url);
226
227 if (strncmp(url, "http://", 7) == 0) {
228 h->port = bb_lookup_port("http", "tcp", 80);
229 h->host = url + 7;
230 h->is_ftp = 0;
231 } else if (strncmp(url, "ftp://", 6) == 0) {
232 h->port = bb_lookup_port("ftp", "tcp", 21);
233 h->host = url + 6;
234 h->is_ftp = 1;
235 } else
236 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
237
238 // FYI:
239 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
240 // 'GET /?var=a/b HTTP 1.0'
241 // and saves 'index.html?var=a%2Fb' (we save 'b')
242 // wget 'http://busybox.net?login=john@doe':
243 // request: 'GET /?login=john@doe HTTP/1.0'
244 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
245 // wget 'http://busybox.net#test/test':
246 // request: 'GET / HTTP/1.0'
247 // saves: 'index.html' (we save 'test')
248 //
249 // We also don't add unique .N suffix if file exists...
250 sp = strchr(h->host, '/');
251 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
252 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
253 if (!sp) {
254 h->path = "";
255 } else if (*sp == '/') {
256 *sp = '\0';
257 h->path = sp + 1;
258 } else { // '#' or '?'
259 // http://busybox.net?login=john@doe is a valid URL
260 // memmove converts to:
261 // http:/busybox.nett?login=john@doe...
262 memmove(h->host - 1, h->host, sp - h->host);
263 h->host--;
264 sp[-1] = '\0';
265 h->path = sp;
266 }
267
268 // We used to set h->user to NULL here, but this interferes
269 // with handling of code 302 ("object was moved")
270
271 sp = strrchr(h->host, '@');
272 if (sp != NULL) {
273 h->user = h->host;
274 *sp = '\0';
275 h->host = sp + 1;
276 }
277
278 sp = h->host;
279}
280
281static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
282{
283 char *s, *hdrval;
284 int c;
285
286 /* *istrunc = 0; */
287
288 /* retrieve header line */
289 if (fgets(buf, bufsiz, fp) == NULL)
290 return NULL;
291
292 /* see if we are at the end of the headers */
293 for (s = buf; *s == '\r'; ++s)
294 continue;
295 if (*s == '\n')
296 return NULL;
297
298 /* convert the header name to lower case */
299 for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
300 /* tolower for "A-Z", no-op for "0-9a-z-." */
301 *s = (*s | 0x20);
302 }
303
304 /* verify we are at the end of the header name */
305 if (*s != ':')
306 bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
307
308 /* locate the start of the header value */
309 *s++ = '\0';
310 hdrval = skip_whitespace(s);
311
312 /* locate the end of header */
313 while (*s && *s != '\r' && *s != '\n')
314 ++s;
315
316 /* end of header found */
317 if (*s) {
318 *s = '\0';
319 return hdrval;
320 }
321
322 /* Rats! The buffer isn't big enough to hold the entire header value */
323 while (c = getc(fp), c != EOF && c != '\n')
324 continue;
325 /* *istrunc = 1; */
326 return hdrval;
327}
328
329#if ENABLE_FEATURE_WGET_LONG_OPTIONS
330static char *URL_escape(const char *str)
331{
332 /* URL encode, see RFC 2396 */
333 char *dst;
334 char *res = dst = xmalloc(strlen(str) * 3 + 1);
335 unsigned char c;
336
337 while (1) {
338 c = *str++;
339 if (c == '\0'
340 /* || strchr("!&'()*-.=_~", c) - more code */
341 || c == '!'
342 || c == '&'
343 || c == '\''
344 || c == '('
345 || c == ')'
346 || c == '*'
347 || c == '-'
348 || c == '.'
349 || c == '='
350 || c == '_'
351 || c == '~'
352 || (c >= '0' && c <= '9')
353 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
354 ) {
355 *dst++ = c;
356 if (c == '\0')
357 return res;
358 } else {
359 *dst++ = '%';
360 *dst++ = bb_hexdigits_upcase[c >> 4];
361 *dst++ = bb_hexdigits_upcase[c & 0xf];
362 }
363 }
364}
365#endif
366
367static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
368{
369 char buf[512];
370 FILE *sfp;
371 char *str;
372 int port;
373
374 if (!target->user)
375 target->user = xstrdup("anonymous:busybox@");
376
377 sfp = open_socket(lsa);
378 if (ftpcmd(NULL, NULL, sfp, buf) != 220)
379 bb_error_msg_and_die("%s", sanitize_string(buf+4));
380
381 /*
382 * Splitting username:password pair,
383 * trying to log in
384 */
385 str = strchr(target->user, ':');
386 if (str)
387 *str++ = '\0';
388 switch (ftpcmd("USER ", target->user, sfp, buf)) {
389 case 230:
390 break;
391 case 331:
392 if (ftpcmd("PASS ", str, sfp, buf) == 230)
393 break;
394 /* fall through (failed login) */
395 default:
396 bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
397 }
398
399 ftpcmd("TYPE I", NULL, sfp, buf);
400
401 /*
402 * Querying file size
403 */
404 if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
405 G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
406 if (G.content_len < 0 || errno) {
407 bb_error_msg_and_die("SIZE value is garbage");
408 }
409 G.got_clen = 1;
410 }
411
412 /*
413 * Entering passive mode
414 */
415 if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
416 pasv_error:
417 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
418 }
419 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
420 // Server's IP is N1.N2.N3.N4 (we ignore it)
421 // Server's port for data connection is P1*256+P2
422 str = strrchr(buf, ')');
423 if (str) str[0] = '\0';
424 str = strrchr(buf, ',');
425 if (!str) goto pasv_error;
426 port = xatou_range(str+1, 0, 255);
427 *str = '\0';
428 str = strrchr(buf, ',');
429 if (!str) goto pasv_error;
430 port += xatou_range(str+1, 0, 255) * 256;
431 set_nport(lsa, htons(port));
432
433 *dfpp = open_socket(lsa);
434
435 if (G.beg_range) {
436 sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
437 if (ftpcmd(buf, NULL, sfp, buf) == 350)
438 G.content_len -= G.beg_range;
439 }
440
441 if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
442 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
443
444 return sfp;
445}
446
447static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
448{
449 char buf[4*1024]; /* made bigger to speed up local xfers */
450#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
451# if ENABLE_FEATURE_WGET_TIMEOUT
452 unsigned second_cnt;
453# endif
454 struct pollfd polldata;
455
456 polldata.fd = fileno(dfp);
457 polldata.events = POLLIN | POLLPRI;
458#endif
459 progress_meter(PROGRESS_START);
460
461 if (G.chunked)
462 goto get_clen;
463
464 /* Loops only if chunked */
465 while (1) {
466
467#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
468 ndelay_on(polldata.fd);
469#endif
470 while (1) {
471 int n;
472 unsigned rdsz;
473
474 rdsz = sizeof(buf);
475 if (G.got_clen) {
476 if (G.content_len < (off_t)sizeof(buf)) {
477 if ((int)G.content_len <= 0)
478 break;
479 rdsz = (unsigned)G.content_len;
480 }
481 }
482#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
483# if ENABLE_FEATURE_WGET_TIMEOUT
484 second_cnt = G.timeout_seconds;
485# endif
486 while (1) {
487 if (safe_poll(&polldata, 1, 1000) != 0)
488 break; /* error, EOF, or data is available */
489# if ENABLE_FEATURE_WGET_TIMEOUT
490 if (second_cnt != 0 && --second_cnt == 0) {
491 progress_meter(PROGRESS_END);
492 bb_perror_msg_and_die("download timed out");
493 }
494# endif
495 /* Needed for "stalled" indicator */
496 progress_meter(PROGRESS_BUMP);
497 }
498#endif
499 /* fread internally uses read loop, which in our case
500 * is usually exited when we get EAGAIN.
501 * In this case, libc sets error marker on the stream.
502 * Need to clear it before next fread to avoid possible
503 * rare false positive ferror below. Rare because usually
504 * fread gets more than zero bytes, and we don't fall
505 * into if (n <= 0) ...
506 */
507 clearerr(dfp);
508 errno = 0;
509 n = safe_fread(buf, rdsz, dfp);
510 /* man fread:
511 * If error occurs, or EOF is reached, the return value
512 * is a short item count (or zero).
513 * fread does not distinguish between EOF and error.
514 */
515 if (n <= 0) {
516#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
517 if (errno == EAGAIN) /* poll lied, there is no data? */
518 continue; /* yes */
519#endif
520 if (ferror(dfp))
521 bb_perror_msg_and_die(bb_msg_read_error);
522 break; /* EOF, not error */
523 }
524
525 xwrite(output_fd, buf, n);
526#if ENABLE_FEATURE_WGET_STATUSBAR
527 G.transferred += n;
528 progress_meter(PROGRESS_BUMP);
529#endif
530 if (G.got_clen) {
531 G.content_len -= n;
532 if (G.content_len == 0)
533 break;
534 }
535 }
536#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
537 ndelay_off(polldata.fd);
538#endif
539
540 if (!G.chunked)
541 break;
542
543 safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
544 get_clen:
545 safe_fgets(buf, sizeof(buf), dfp);
546 G.content_len = STRTOOFF(buf, NULL, 16);
547 /* FIXME: error check? */
548 if (G.content_len == 0)
549 break; /* all done! */
550 G.got_clen = 1;
551 }
552
553 progress_meter(PROGRESS_END);
554}
555
556int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
557int wget_main(int argc UNUSED_PARAM, char **argv)
558{
559 char buf[512];
560 struct host_info server, target;
561 len_and_sockaddr *lsa;
562 unsigned opt;
563 int redir_limit;
564 char *proxy = NULL;
565 char *dir_prefix = NULL;
566#if ENABLE_FEATURE_WGET_LONG_OPTIONS
567 char *post_data;
568 char *extra_headers = NULL;
569 llist_t *headers_llist = NULL;
570#endif
571 FILE *sfp; /* socket to web/ftp server */
572 FILE *dfp; /* socket to ftp server (data) */
573 char *fname_out; /* where to direct output (-O) */
574 int output_fd = -1;
575 bool use_proxy; /* Use proxies if env vars are set */
576 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
577 const char *user_agent = "Wget";/* "User-Agent" header field */
578
579 static const char keywords[] ALIGN1 =
580 "content-length\0""transfer-encoding\0""chunked\0""location\0";
581 enum {
582 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
583 };
584#if ENABLE_FEATURE_WGET_LONG_OPTIONS
585 static const char wget_longopts[] ALIGN1 =
586 /* name, has_arg, val */
587 "continue\0" No_argument "c"
588 "spider\0" No_argument "s"
589 "quiet\0" No_argument "q"
590 "output-document\0" Required_argument "O"
591 "directory-prefix\0" Required_argument "P"
592 "proxy\0" Required_argument "Y"
593 "user-agent\0" Required_argument "U"
594#if ENABLE_FEATURE_WGET_TIMEOUT
595 "timeout\0" Required_argument "T"
596#endif
597 /* Ignored: */
598 // "tries\0" Required_argument "t"
599 /* Ignored (we always use PASV): */
600 "passive-ftp\0" No_argument "\xff"
601 "header\0" Required_argument "\xfe"
602 "post-data\0" Required_argument "\xfd"
603 /* Ignored (we don't do ssl) */
604 "no-check-certificate\0" No_argument "\xfc"
605 ;
606#endif
607
608 INIT_G();
609
610#if ENABLE_FEATURE_WGET_LONG_OPTIONS
611 applet_long_options = wget_longopts;
612#endif
613 /* server.allocated = target.allocated = NULL; */
614 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
615 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
616 &fname_out, &dir_prefix,
617 &proxy_flag, &user_agent,
618 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
619 NULL /* -t RETRIES */
620 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
621 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
622 );
623#if ENABLE_FEATURE_WGET_LONG_OPTIONS
624 if (headers_llist) {
625 int size = 1;
626 char *cp;
627 llist_t *ll = headers_llist;
628 while (ll) {
629 size += strlen(ll->data) + 2;
630 ll = ll->link;
631 }
632 extra_headers = cp = xmalloc(size);
633 while (headers_llist) {
634 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
635 }
636 }
637#endif
638
639 /* TODO: compat issue: should handle "wget URL1 URL2..." */
640
641 target.user = NULL;
642 parse_url(argv[optind], &target);
643
644 /* Use the proxy if necessary */
645 use_proxy = (strcmp(proxy_flag, "off") != 0);
646 if (use_proxy) {
647 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
648 if (proxy && proxy[0]) {
649 server.user = NULL;
650 parse_url(proxy, &server);
651 } else {
652 use_proxy = 0;
653 }
654 }
655 if (!use_proxy) {
656 server.port = target.port;
657 if (ENABLE_FEATURE_IPV6) {
658 server.host = xstrdup(target.host);
659 } else {
660 server.host = target.host;
661 }
662 }
663
664 if (ENABLE_FEATURE_IPV6)
665 strip_ipv6_scope_id(target.host);
666
667 /* Guess an output filename, if there was no -O FILE */
668 if (!(opt & WGET_OPT_OUTNAME)) {
669 fname_out = bb_get_last_path_component_nostrip(target.path);
670 /* handle "wget http://kernel.org//" */
671 if (fname_out[0] == '/' || !fname_out[0])
672 fname_out = (char*)"index.html";
673 /* -P DIR is considered only if there was no -O FILE */
674 if (dir_prefix)
675 fname_out = concat_path_file(dir_prefix, fname_out);
676 } else {
677 if (LONE_DASH(fname_out)) {
678 /* -O - */
679 output_fd = 1;
680 opt &= ~WGET_OPT_CONTINUE;
681 }
682 }
683#if ENABLE_FEATURE_WGET_STATUSBAR
684 G.curfile = bb_get_last_path_component_nostrip(fname_out);
685#endif
686
687 /* Impossible?
688 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
689 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
690 */
691
692 /* Determine where to start transfer */
693 if (opt & WGET_OPT_CONTINUE) {
694 output_fd = open(fname_out, O_WRONLY);
695 if (output_fd >= 0) {
696 G.beg_range = xlseek(output_fd, 0, SEEK_END);
697 }
698 /* File doesn't exist. We do not create file here yet.
699 * We are not sure it exists on remove side */
700 }
701
702 redir_limit = 5;
703 resolve_lsa:
704 lsa = xhost2sockaddr(server.host, server.port);
705 if (!(opt & WGET_OPT_QUIET)) {
706 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
707 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
708 free(s);
709 }
710 establish_session:
711 if (use_proxy || !target.is_ftp) {
712 /*
713 * HTTP session
714 */
715 char *str;
716 int status;
717
718 /* Open socket to http server */
719 sfp = open_socket(lsa);
720
721 /* Send HTTP request */
722 if (use_proxy) {
723 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
724 target.is_ftp ? "f" : "ht", target.host,
725 target.path);
726 } else {
727 if (opt & WGET_OPT_POST_DATA)
728 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
729 else
730 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
731 }
732
733 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
734 target.host, user_agent);
735
736 /* Ask server to close the connection as soon as we are done
737 * (IOW: we do not intend to send more requests)
738 */
739 fprintf(sfp, "Connection: close\r\n");
740
741#if ENABLE_FEATURE_WGET_AUTHENTICATION
742 if (target.user) {
743 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
744 base64enc_512(buf, target.user));
745 }
746 if (use_proxy && server.user) {
747 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
748 base64enc_512(buf, server.user));
749 }
750#endif
751
752 if (G.beg_range)
753 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
754
755#if ENABLE_FEATURE_WGET_LONG_OPTIONS
756 if (extra_headers)
757 fputs(extra_headers, sfp);
758
759 if (opt & WGET_OPT_POST_DATA) {
760 char *estr = URL_escape(post_data);
761 fprintf(sfp,
762 "Content-Type: application/x-www-form-urlencoded\r\n"
763 "Content-Length: %u\r\n"
764 "\r\n"
765 "%s",
766 (int) strlen(estr), estr
767 );
768 free(estr);
769 } else
770#endif
771 {
772 fprintf(sfp, "\r\n");
773 }
774
775 fflush(sfp);
776
777 /*
778 * Retrieve HTTP response line and check for "200" status code.
779 */
780 read_response:
781 if (fgets(buf, sizeof(buf), sfp) == NULL)
782 bb_error_msg_and_die("no response from server");
783
784 str = buf;
785 str = skip_non_whitespace(str);
786 str = skip_whitespace(str);
787 // FIXME: no error check
788 // xatou wouldn't work: "200 OK"
789 status = atoi(str);
790 switch (status) {
791 case 0:
792 case 100:
793 while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
794 /* eat all remaining headers */;
795 goto read_response;
796 case 200:
797/*
798Response 204 doesn't say "null file", it says "metadata
799has changed but data didn't":
800
801"10.2.5 204 No Content
802The server has fulfilled the request but does not need to return
803an entity-body, and might want to return updated metainformation.
804The response MAY include new or updated metainformation in the form
805of entity-headers, which if present SHOULD be associated with
806the requested variant.
807
808If the client is a user agent, it SHOULD NOT change its document
809view from that which caused the request to be sent. This response
810is primarily intended to allow input for actions to take place
811without causing a change to the user agent's active document view,
812although any new or updated metainformation SHOULD be applied
813to the document currently in the user agent's active view.
814
815The 204 response MUST NOT include a message-body, and thus
816is always terminated by the first empty line after the header fields."
817
818However, in real world it was observed that some web servers
819(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
820*/
821 case 204:
822 break;
823 case 300: /* redirection */
824 case 301:
825 case 302:
826 case 303:
827 break;
828 case 206:
829 if (G.beg_range)
830 break;
831 /* fall through */
832 default:
833 bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
834 }
835
836 /*
837 * Retrieve HTTP headers.
838 */
839 while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
840 /* gethdr converted "FOO:" string to lowercase */
841 smalluint key;
842 /* strip trailing whitespace */
843 char *s = strchrnul(str, '\0') - 1;
844 while (s >= str && (*s == ' ' || *s == '\t')) {
845 *s = '\0';
846 s--;
847 }
848 key = index_in_strings(keywords, buf) + 1;
849 if (key == KEY_content_length) {
850 G.content_len = BB_STRTOOFF(str, NULL, 10);
851 if (G.content_len < 0 || errno) {
852 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
853 }
854 G.got_clen = 1;
855 continue;
856 }
857 if (key == KEY_transfer_encoding) {
858 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
859 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
860 G.chunked = G.got_clen = 1;
861 }
862 if (key == KEY_location && status >= 300) {
863 if (--redir_limit == 0)
864 bb_error_msg_and_die("too many redirections");
865 fclose(sfp);
866 G.got_clen = 0;
867 G.chunked = 0;
868 if (str[0] == '/')
869 /* free(target.allocated); */
870 target.path = /* target.allocated = */ xstrdup(str+1);
871 /* lsa stays the same: it's on the same server */
872 else {
873 parse_url(str, &target);
874 if (!use_proxy) {
875 server.host = target.host;
876 /* strip_ipv6_scope_id(target.host); - no! */
877 /* we assume remote never gives us IPv6 addr with scope id */
878 server.port = target.port;
879 free(lsa);
880 goto resolve_lsa;
881 } /* else: lsa stays the same: we use proxy */
882 }
883 goto establish_session;
884 }
885 }
886// if (status >= 300)
887// bb_error_msg_and_die("bad redirection (no Location: header from server)");
888
889 /* For HTTP, data is pumped over the same connection */
890 dfp = sfp;
891
892 } else {
893 /*
894 * FTP session
895 */
896 sfp = prepare_ftp_session(&dfp, &target, lsa);
897 }
898
899 if (opt & WGET_OPT_SPIDER) {
900 if (ENABLE_FEATURE_CLEAN_UP)
901 fclose(sfp);
902 return EXIT_SUCCESS;
903 }
904
905 if (output_fd < 0) {
906 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
907 /* compat with wget: -O FILE can overwrite */
908 if (opt & WGET_OPT_OUTNAME)
909 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
910 output_fd = xopen(fname_out, o_flags);
911 }
912
913 retrieve_file_data(dfp, output_fd);
914 xclose(output_fd);
915
916 if (dfp != sfp) {
917 /* It's ftp. Close it properly */
918 fclose(dfp);
919 if (ftpcmd(NULL, NULL, sfp, buf) != 226)
920 bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
921 /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
922 }
923
924 return EXIT_SUCCESS;
925}
Note: See TracBrowser for help on using the repository browser.