source: MondoRescue/branches/3.2/mindi-busybox/networking/wget.c@ 3232

Last change on this file since 3232 was 3232, checked in by Bruno Cornec, 10 years ago
  • Update mindi-busybox to 1.21.1
File size: 26.9 KB
Line 
1/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP or FTP
4 *
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
10 */
11
12//usage:#define wget_trivial_usage
13//usage: IF_FEATURE_WGET_LONG_OPTIONS(
14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16/* Since we ignore these opts, we don't show them in --help */
17/* //usage: " [--no-check-certificate] [--no-cache]" */
18//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
19//usage: )
20//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23//usage: )
24//usage:#define wget_full_usage "\n\n"
25//usage: "Retrieve files via HTTP or FTP\n"
26//usage: "\n -s Spider mode - only check file existence"
27//usage: "\n -c Continue retrieval of aborted transfer"
28//usage: "\n -q Quiet"
29//usage: "\n -P DIR Save to DIR (default .)"
30//usage: IF_FEATURE_WGET_TIMEOUT(
31//usage: "\n -T SEC Network read timeout is SEC seconds"
32//usage: )
33//usage: "\n -O FILE Save to FILE ('-' for stdout)"
34//usage: "\n -U STR Use STR for User-Agent header"
35//usage: "\n -Y Use proxy ('on' or 'off')"
36
37#include "libbb.h"
38
39#if 0
40# define log_io(...) bb_error_msg(__VA_ARGS__)
41#else
42# define log_io(...) ((void)0)
43#endif
44
45
46struct host_info {
47 char *allocated;
48 const char *path;
49 const char *user;
50 char *host;
51 int port;
52 smallint is_ftp;
53};
54
55
56/* Globals */
57struct globals {
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60#if ENABLE_FEATURE_WGET_STATUSBAR
61 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
63 bb_progress_t pmt;
64#endif
65 char *dir_prefix;
66#if ENABLE_FEATURE_WGET_LONG_OPTIONS
67 char *post_data;
68 char *extra_headers;
69#endif
70 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
73#if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
75#endif
76 int output_fd;
77 int o_flags;
78 smallint chunked; /* chunked transfer encoding */
79 smallint got_clen; /* got content-length: from server */
80 /* Local downloads do benefit from big buffer.
81 * With 512 byte buffer, it was measured to be
82 * an order of magnitude slower than with big one.
83 */
84 uint64_t just_to_align_next_member;
85 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
86} FIX_ALIASING;
87#define G (*ptr_to_globals)
88#define INIT_G() do { \
89 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
90 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
91} while (0)
92
93
94/* Must match option string! */
95enum {
96 WGET_OPT_CONTINUE = (1 << 0),
97 WGET_OPT_SPIDER = (1 << 1),
98 WGET_OPT_QUIET = (1 << 2),
99 WGET_OPT_OUTNAME = (1 << 3),
100 WGET_OPT_PREFIX = (1 << 4),
101 WGET_OPT_PROXY = (1 << 5),
102 WGET_OPT_USER_AGENT = (1 << 6),
103 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
104 WGET_OPT_RETRIES = (1 << 8),
105 WGET_OPT_PASSIVE = (1 << 9),
106 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
107 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
108};
109
110enum {
111 PROGRESS_START = -1,
112 PROGRESS_END = 0,
113 PROGRESS_BUMP = 1,
114};
115#if ENABLE_FEATURE_WGET_STATUSBAR
116static void progress_meter(int flag)
117{
118 if (option_mask32 & WGET_OPT_QUIET)
119 return;
120
121 if (flag == PROGRESS_START)
122 bb_progress_init(&G.pmt, G.curfile);
123
124 bb_progress_update(&G.pmt,
125 G.beg_range,
126 G.transferred,
127 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
128 );
129
130 if (flag == PROGRESS_END) {
131 bb_progress_free(&G.pmt);
132 bb_putchar_stderr('\n');
133 G.transferred = 0;
134 }
135}
136#else
137static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
138#endif
139
140
141/* IPv6 knows scoped address types i.e. link and site local addresses. Link
142 * local addresses can have a scope identifier to specify the
143 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
144 * identifier is only valid on a single node.
145 *
146 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
147 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
148 * in the Host header as invalid requests, see
149 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
150 */
151static void strip_ipv6_scope_id(char *host)
152{
153 char *scope, *cp;
154
155 /* bbox wget actually handles IPv6 addresses without [], like
156 * wget "http://::1/xxx", but this is not standard.
157 * To save code, _here_ we do not support it. */
158
159 if (host[0] != '[')
160 return; /* not IPv6 */
161
162 scope = strchr(host, '%');
163 if (!scope)
164 return;
165
166 /* Remove the IPv6 zone identifier from the host address */
167 cp = strchr(host, ']');
168 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
169 /* malformed address (not "[xx]:nn" or "[xx]") */
170 return;
171 }
172
173 /* cp points to "]...", scope points to "%eth0]..." */
174 overlapping_strcpy(scope, cp);
175}
176
177#if ENABLE_FEATURE_WGET_AUTHENTICATION
178/* Base64-encode character string. */
179static char *base64enc(const char *str)
180{
181 unsigned len = strlen(str);
182 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
183 len = sizeof(G.wget_buf)/4*3 - 10;
184 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
185 return G.wget_buf;
186}
187#endif
188
189static char* sanitize_string(char *s)
190{
191 unsigned char *p = (void *) s;
192 while (*p >= ' ')
193 p++;
194 *p = '\0';
195 return s;
196}
197
198static FILE *open_socket(len_and_sockaddr *lsa)
199{
200 FILE *fp;
201
202 /* glibc 2.4 seems to try seeking on it - ??! */
203 /* hopefully it understands what ESPIPE means... */
204 fp = fdopen(xconnect_stream(lsa), "r+");
205 if (fp == NULL)
206 bb_perror_msg_and_die(bb_msg_memory_exhausted);
207
208 return fp;
209}
210
211/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
212static char fgets_and_trim(FILE *fp)
213{
214 char c;
215 char *buf_ptr;
216
217 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
218 bb_perror_msg_and_die("error getting response");
219
220 buf_ptr = strchrnul(G.wget_buf, '\n');
221 c = *buf_ptr;
222 *buf_ptr = '\0';
223 buf_ptr = strchrnul(G.wget_buf, '\r');
224 *buf_ptr = '\0';
225
226 log_io("< %s", G.wget_buf);
227
228 return c;
229}
230
231static int ftpcmd(const char *s1, const char *s2, FILE *fp)
232{
233 int result;
234 if (s1) {
235 if (!s2)
236 s2 = "";
237 fprintf(fp, "%s%s\r\n", s1, s2);
238 fflush(fp);
239 log_io("> %s%s", s1, s2);
240 }
241
242 do {
243 fgets_and_trim(fp);
244 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
245
246 G.wget_buf[3] = '\0';
247 result = xatoi_positive(G.wget_buf);
248 G.wget_buf[3] = ' ';
249 return result;
250}
251
252static void parse_url(const char *src_url, struct host_info *h)
253{
254 char *url, *p, *sp;
255
256 free(h->allocated);
257 h->allocated = url = xstrdup(src_url);
258
259 if (strncmp(url, "http://", 7) == 0) {
260 h->port = bb_lookup_port("http", "tcp", 80);
261 h->host = url + 7;
262 h->is_ftp = 0;
263 } else if (strncmp(url, "ftp://", 6) == 0) {
264 h->port = bb_lookup_port("ftp", "tcp", 21);
265 h->host = url + 6;
266 h->is_ftp = 1;
267 } else
268 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
269
270 // FYI:
271 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
272 // 'GET /?var=a/b HTTP 1.0'
273 // and saves 'index.html?var=a%2Fb' (we save 'b')
274 // wget 'http://busybox.net?login=john@doe':
275 // request: 'GET /?login=john@doe HTTP/1.0'
276 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
277 // wget 'http://busybox.net#test/test':
278 // request: 'GET / HTTP/1.0'
279 // saves: 'index.html' (we save 'test')
280 //
281 // We also don't add unique .N suffix if file exists...
282 sp = strchr(h->host, '/');
283 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
284 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
285 if (!sp) {
286 h->path = "";
287 } else if (*sp == '/') {
288 *sp = '\0';
289 h->path = sp + 1;
290 } else { // '#' or '?'
291 // http://busybox.net?login=john@doe is a valid URL
292 // memmove converts to:
293 // http:/busybox.nett?login=john@doe...
294 memmove(h->host - 1, h->host, sp - h->host);
295 h->host--;
296 sp[-1] = '\0';
297 h->path = sp;
298 }
299
300 // We used to set h->user to NULL here, but this interferes
301 // with handling of code 302 ("object was moved")
302
303 sp = strrchr(h->host, '@');
304 if (sp != NULL) {
305 // URL-decode "user:password" string before base64-encoding:
306 // wget http://test:my%20pass@example.com should send
307 // Authorization: Basic dGVzdDpteSBwYXNz
308 // which decodes to "test:my pass".
309 // Standard wget and curl do this too.
310 *sp = '\0';
311 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
312 h->host = sp + 1;
313 }
314
315 sp = h->host;
316}
317
318static char *gethdr(FILE *fp)
319{
320 char *s, *hdrval;
321 int c;
322
323 /* retrieve header line */
324 c = fgets_and_trim(fp);
325
326 /* end of the headers? */
327 if (G.wget_buf[0] == '\0')
328 return NULL;
329
330 /* convert the header name to lower case */
331 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
332 /* tolower for "A-Z", no-op for "0-9a-z-." */
333 *s |= 0x20;
334 }
335
336 /* verify we are at the end of the header name */
337 if (*s != ':')
338 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
339
340 /* locate the start of the header value */
341 *s++ = '\0';
342 hdrval = skip_whitespace(s);
343
344 if (c != '\n') {
345 /* Rats! The buffer isn't big enough to hold the entire header value */
346 while (c = getc(fp), c != EOF && c != '\n')
347 continue;
348 }
349
350 return hdrval;
351}
352
353static void reset_beg_range_to_zero(void)
354{
355 bb_error_msg("restart failed");
356 G.beg_range = 0;
357 xlseek(G.output_fd, 0, SEEK_SET);
358 /* Done at the end instead: */
359 /* ftruncate(G.output_fd, 0); */
360}
361
362static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
363{
364 FILE *sfp;
365 char *str;
366 int port;
367
368 if (!target->user)
369 target->user = xstrdup("anonymous:busybox@");
370
371 sfp = open_socket(lsa);
372 if (ftpcmd(NULL, NULL, sfp) != 220)
373 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
374
375 /*
376 * Splitting username:password pair,
377 * trying to log in
378 */
379 str = strchr(target->user, ':');
380 if (str)
381 *str++ = '\0';
382 switch (ftpcmd("USER ", target->user, sfp)) {
383 case 230:
384 break;
385 case 331:
386 if (ftpcmd("PASS ", str, sfp) == 230)
387 break;
388 /* fall through (failed login) */
389 default:
390 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
391 }
392
393 ftpcmd("TYPE I", NULL, sfp);
394
395 /*
396 * Querying file size
397 */
398 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
399 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
400 if (G.content_len < 0 || errno) {
401 bb_error_msg_and_die("SIZE value is garbage");
402 }
403 G.got_clen = 1;
404 }
405
406 /*
407 * Entering passive mode
408 */
409 if (ftpcmd("PASV", NULL, sfp) != 227) {
410 pasv_error:
411 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
412 }
413 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
414 // Server's IP is N1.N2.N3.N4 (we ignore it)
415 // Server's port for data connection is P1*256+P2
416 str = strrchr(G.wget_buf, ')');
417 if (str) str[0] = '\0';
418 str = strrchr(G.wget_buf, ',');
419 if (!str) goto pasv_error;
420 port = xatou_range(str+1, 0, 255);
421 *str = '\0';
422 str = strrchr(G.wget_buf, ',');
423 if (!str) goto pasv_error;
424 port += xatou_range(str+1, 0, 255) * 256;
425 set_nport(&lsa->u.sa, htons(port));
426
427 *dfpp = open_socket(lsa);
428
429 if (G.beg_range != 0) {
430 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
431 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
432 G.content_len -= G.beg_range;
433 else
434 reset_beg_range_to_zero();
435 }
436
437 if (ftpcmd("RETR ", target->path, sfp) > 150)
438 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
439
440 return sfp;
441}
442
443static void NOINLINE retrieve_file_data(FILE *dfp)
444{
445#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
446# if ENABLE_FEATURE_WGET_TIMEOUT
447 unsigned second_cnt = G.timeout_seconds;
448# endif
449 struct pollfd polldata;
450
451 polldata.fd = fileno(dfp);
452 polldata.events = POLLIN | POLLPRI;
453#endif
454 progress_meter(PROGRESS_START);
455
456 if (G.chunked)
457 goto get_clen;
458
459 /* Loops only if chunked */
460 while (1) {
461
462#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
463 /* Must use nonblocking I/O, otherwise fread will loop
464 * and *block* until it reads full buffer,
465 * which messes up progress bar and/or timeout logic.
466 * Because of nonblocking I/O, we need to dance
467 * very carefully around EAGAIN. See explanation at
468 * clearerr() calls.
469 */
470 ndelay_on(polldata.fd);
471#endif
472 while (1) {
473 int n;
474 unsigned rdsz;
475
476#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
477 /* fread internally uses read loop, which in our case
478 * is usually exited when we get EAGAIN.
479 * In this case, libc sets error marker on the stream.
480 * Need to clear it before next fread to avoid possible
481 * rare false positive ferror below. Rare because usually
482 * fread gets more than zero bytes, and we don't fall
483 * into if (n <= 0) ...
484 */
485 clearerr(dfp);
486#endif
487 errno = 0;
488 rdsz = sizeof(G.wget_buf);
489 if (G.got_clen) {
490 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
491 if ((int)G.content_len <= 0)
492 break;
493 rdsz = (unsigned)G.content_len;
494 }
495 }
496 n = fread(G.wget_buf, 1, rdsz, dfp);
497
498 if (n > 0) {
499 xwrite(G.output_fd, G.wget_buf, n);
500#if ENABLE_FEATURE_WGET_STATUSBAR
501 G.transferred += n;
502#endif
503 if (G.got_clen) {
504 G.content_len -= n;
505 if (G.content_len == 0)
506 break;
507 }
508#if ENABLE_FEATURE_WGET_TIMEOUT
509 second_cnt = G.timeout_seconds;
510#endif
511 continue;
512 }
513
514 /* n <= 0.
515 * man fread:
516 * If error occurs, or EOF is reached, the return value
517 * is a short item count (or zero).
518 * fread does not distinguish between EOF and error.
519 */
520 if (errno != EAGAIN) {
521 if (ferror(dfp)) {
522 progress_meter(PROGRESS_END);
523 bb_perror_msg_and_die(bb_msg_read_error);
524 }
525 break; /* EOF, not error */
526 }
527
528#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
529 /* It was EAGAIN. There is no data. Wait up to one second
530 * then abort if timed out, or update the bar and try reading again.
531 */
532 if (safe_poll(&polldata, 1, 1000) == 0) {
533# if ENABLE_FEATURE_WGET_TIMEOUT
534 if (second_cnt != 0 && --second_cnt == 0) {
535 progress_meter(PROGRESS_END);
536 bb_error_msg_and_die("download timed out");
537 }
538# endif
539 /* We used to loop back to poll here,
540 * but there is no great harm in letting fread
541 * to try reading anyway.
542 */
543 }
544 /* Need to do it _every_ second for "stalled" indicator
545 * to be shown properly.
546 */
547 progress_meter(PROGRESS_BUMP);
548#endif
549 } /* while (reading data) */
550
551#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
552 clearerr(dfp);
553 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
554#endif
555 if (!G.chunked)
556 break;
557
558 fgets_and_trim(dfp); /* Eat empty line */
559 get_clen:
560 fgets_and_trim(dfp);
561 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
562 /* FIXME: error check? */
563 if (G.content_len == 0)
564 break; /* all done! */
565 G.got_clen = 1;
566 /*
567 * Note that fgets may result in some data being buffered in dfp.
568 * We loop back to fread, which will retrieve this data.
569 * Also note that code has to be arranged so that fread
570 * is done _before_ one-second poll wait - poll doesn't know
571 * about stdio buffering and can result in spurious one second waits!
572 */
573 }
574
575 /* If -c failed, we restart from the beginning,
576 * but we do not truncate file then, we do it only now, at the end.
577 * This lets user to ^C if his 99% complete 10 GB file download
578 * failed to restart *without* losing the almost complete file.
579 */
580 {
581 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
582 if (pos != (off_t)-1)
583 ftruncate(G.output_fd, pos);
584 }
585
586 /* Draw full bar and free its resources */
587 G.chunked = 0; /* makes it show 100% even for chunked download */
588 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
589 progress_meter(PROGRESS_END);
590}
591
592static void download_one_url(const char *url)
593{
594 bool use_proxy; /* Use proxies if env vars are set */
595 int redir_limit;
596 len_and_sockaddr *lsa;
597 FILE *sfp; /* socket to web/ftp server */
598 FILE *dfp; /* socket to ftp server (data) */
599 char *proxy = NULL;
600 char *fname_out_alloc;
601 char *redirected_path = NULL;
602 struct host_info server;
603 struct host_info target;
604
605 server.allocated = NULL;
606 target.allocated = NULL;
607 server.user = NULL;
608 target.user = NULL;
609
610 parse_url(url, &target);
611
612 /* Use the proxy if necessary */
613 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
614 if (use_proxy) {
615 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
616 use_proxy = (proxy && proxy[0]);
617 if (use_proxy)
618 parse_url(proxy, &server);
619 }
620 if (!use_proxy) {
621 server.port = target.port;
622 if (ENABLE_FEATURE_IPV6) {
623 //free(server.allocated); - can't be non-NULL
624 server.host = server.allocated = xstrdup(target.host);
625 } else {
626 server.host = target.host;
627 }
628 }
629
630 if (ENABLE_FEATURE_IPV6)
631 strip_ipv6_scope_id(target.host);
632
633 /* If there was no -O FILE, guess output filename */
634 fname_out_alloc = NULL;
635 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
636 G.fname_out = bb_get_last_path_component_nostrip(target.path);
637 /* handle "wget http://kernel.org//" */
638 if (G.fname_out[0] == '/' || !G.fname_out[0])
639 G.fname_out = (char*)"index.html";
640 /* -P DIR is considered only if there was no -O FILE */
641 if (G.dir_prefix)
642 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
643 else {
644 /* redirects may free target.path later, need to make a copy */
645 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
646 }
647 }
648#if ENABLE_FEATURE_WGET_STATUSBAR
649 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
650#endif
651
652 /* Determine where to start transfer */
653 G.beg_range = 0;
654 if (option_mask32 & WGET_OPT_CONTINUE) {
655 G.output_fd = open(G.fname_out, O_WRONLY);
656 if (G.output_fd >= 0) {
657 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
658 }
659 /* File doesn't exist. We do not create file here yet.
660 * We are not sure it exists on remote side */
661 }
662
663 redir_limit = 5;
664 resolve_lsa:
665 lsa = xhost2sockaddr(server.host, server.port);
666 if (!(option_mask32 & WGET_OPT_QUIET)) {
667 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
668 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
669 free(s);
670 }
671 establish_session:
672 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
673 G.got_clen = 0;
674 G.chunked = 0;
675 if (use_proxy || !target.is_ftp) {
676 /*
677 * HTTP session
678 */
679 char *str;
680 int status;
681
682
683 /* Open socket to http server */
684 sfp = open_socket(lsa);
685
686 /* Send HTTP request */
687 if (use_proxy) {
688 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
689 target.is_ftp ? "f" : "ht", target.host,
690 target.path);
691 } else {
692 if (option_mask32 & WGET_OPT_POST_DATA)
693 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
694 else
695 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
696 }
697
698 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
699 target.host, G.user_agent);
700
701 /* Ask server to close the connection as soon as we are done
702 * (IOW: we do not intend to send more requests)
703 */
704 fprintf(sfp, "Connection: close\r\n");
705
706#if ENABLE_FEATURE_WGET_AUTHENTICATION
707 if (target.user) {
708 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
709 base64enc(target.user));
710 }
711 if (use_proxy && server.user) {
712 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
713 base64enc(server.user));
714 }
715#endif
716
717 if (G.beg_range != 0)
718 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
719
720#if ENABLE_FEATURE_WGET_LONG_OPTIONS
721 if (G.extra_headers)
722 fputs(G.extra_headers, sfp);
723
724 if (option_mask32 & WGET_OPT_POST_DATA) {
725 fprintf(sfp,
726 "Content-Type: application/x-www-form-urlencoded\r\n"
727 "Content-Length: %u\r\n"
728 "\r\n"
729 "%s",
730 (int) strlen(G.post_data), G.post_data
731 );
732 } else
733#endif
734 {
735 fprintf(sfp, "\r\n");
736 }
737
738 fflush(sfp);
739
740 /*
741 * Retrieve HTTP response line and check for "200" status code.
742 */
743 read_response:
744 fgets_and_trim(sfp);
745
746 str = G.wget_buf;
747 str = skip_non_whitespace(str);
748 str = skip_whitespace(str);
749 // FIXME: no error check
750 // xatou wouldn't work: "200 OK"
751 status = atoi(str);
752 switch (status) {
753 case 0:
754 case 100:
755 while (gethdr(sfp) != NULL)
756 /* eat all remaining headers */;
757 goto read_response;
758 case 200:
759/*
760Response 204 doesn't say "null file", it says "metadata
761has changed but data didn't":
762
763"10.2.5 204 No Content
764The server has fulfilled the request but does not need to return
765an entity-body, and might want to return updated metainformation.
766The response MAY include new or updated metainformation in the form
767of entity-headers, which if present SHOULD be associated with
768the requested variant.
769
770If the client is a user agent, it SHOULD NOT change its document
771view from that which caused the request to be sent. This response
772is primarily intended to allow input for actions to take place
773without causing a change to the user agent's active document view,
774although any new or updated metainformation SHOULD be applied
775to the document currently in the user agent's active view.
776
777The 204 response MUST NOT include a message-body, and thus
778is always terminated by the first empty line after the header fields."
779
780However, in real world it was observed that some web servers
781(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
782*/
783 case 204:
784 if (G.beg_range != 0) {
785 /* "Range:..." was not honored by the server.
786 * Restart download from the beginning.
787 */
788 reset_beg_range_to_zero();
789 }
790 break;
791 case 300: /* redirection */
792 case 301:
793 case 302:
794 case 303:
795 break;
796 case 206: /* Partial Content */
797 if (G.beg_range != 0)
798 /* "Range:..." worked. Good. */
799 break;
800 /* Partial Content even though we did not ask for it??? */
801 /* fall through */
802 default:
803 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
804 }
805
806 /*
807 * Retrieve HTTP headers.
808 */
809 while ((str = gethdr(sfp)) != NULL) {
810 static const char keywords[] ALIGN1 =
811 "content-length\0""transfer-encoding\0""location\0";
812 enum {
813 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
814 };
815 smalluint key;
816
817 /* gethdr converted "FOO:" string to lowercase */
818
819 /* strip trailing whitespace */
820 char *s = strchrnul(str, '\0') - 1;
821 while (s >= str && (*s == ' ' || *s == '\t')) {
822 *s = '\0';
823 s--;
824 }
825 key = index_in_strings(keywords, G.wget_buf) + 1;
826 if (key == KEY_content_length) {
827 G.content_len = BB_STRTOOFF(str, NULL, 10);
828 if (G.content_len < 0 || errno) {
829 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
830 }
831 G.got_clen = 1;
832 continue;
833 }
834 if (key == KEY_transfer_encoding) {
835 if (strcmp(str_tolower(str), "chunked") != 0)
836 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
837 G.chunked = 1;
838 }
839 if (key == KEY_location && status >= 300) {
840 if (--redir_limit == 0)
841 bb_error_msg_and_die("too many redirections");
842 fclose(sfp);
843 if (str[0] == '/') {
844 free(redirected_path);
845 target.path = redirected_path = xstrdup(str+1);
846 /* lsa stays the same: it's on the same server */
847 } else {
848 parse_url(str, &target);
849 if (!use_proxy) {
850 free(server.allocated);
851 server.allocated = NULL;
852 server.host = target.host;
853 /* strip_ipv6_scope_id(target.host); - no! */
854 /* we assume remote never gives us IPv6 addr with scope id */
855 server.port = target.port;
856 free(lsa);
857 goto resolve_lsa;
858 } /* else: lsa stays the same: we use proxy */
859 }
860 goto establish_session;
861 }
862 }
863// if (status >= 300)
864// bb_error_msg_and_die("bad redirection (no Location: header from server)");
865
866 /* For HTTP, data is pumped over the same connection */
867 dfp = sfp;
868
869 } else {
870 /*
871 * FTP session
872 */
873 sfp = prepare_ftp_session(&dfp, &target, lsa);
874 }
875
876 free(lsa);
877
878 if (!(option_mask32 & WGET_OPT_SPIDER)) {
879 if (G.output_fd < 0)
880 G.output_fd = xopen(G.fname_out, G.o_flags);
881 retrieve_file_data(dfp);
882 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
883 xclose(G.output_fd);
884 G.output_fd = -1;
885 }
886 }
887
888 if (dfp != sfp) {
889 /* It's ftp. Close data connection properly */
890 fclose(dfp);
891 if (ftpcmd(NULL, NULL, sfp) != 226)
892 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
893 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
894 }
895 fclose(sfp);
896
897 free(server.allocated);
898 free(target.allocated);
899 free(fname_out_alloc);
900 free(redirected_path);
901}
902
903int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
904int wget_main(int argc UNUSED_PARAM, char **argv)
905{
906#if ENABLE_FEATURE_WGET_LONG_OPTIONS
907 static const char wget_longopts[] ALIGN1 =
908 /* name, has_arg, val */
909 "continue\0" No_argument "c"
910//FIXME: -s isn't --spider, it's --save-headers!
911 "spider\0" No_argument "s"
912 "quiet\0" No_argument "q"
913 "output-document\0" Required_argument "O"
914 "directory-prefix\0" Required_argument "P"
915 "proxy\0" Required_argument "Y"
916 "user-agent\0" Required_argument "U"
917#if ENABLE_FEATURE_WGET_TIMEOUT
918 "timeout\0" Required_argument "T"
919#endif
920 /* Ignored: */
921 // "tries\0" Required_argument "t"
922 /* Ignored (we always use PASV): */
923 "passive-ftp\0" No_argument "\xff"
924 "header\0" Required_argument "\xfe"
925 "post-data\0" Required_argument "\xfd"
926 /* Ignored (we don't do ssl) */
927 "no-check-certificate\0" No_argument "\xfc"
928 /* Ignored (we don't support caching) */
929 "no-cache\0" No_argument "\xfb"
930 ;
931#endif
932
933#if ENABLE_FEATURE_WGET_LONG_OPTIONS
934 llist_t *headers_llist = NULL;
935#endif
936
937 INIT_G();
938
939 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
940 G.proxy_flag = "on"; /* use proxies if env vars are set */
941 G.user_agent = "Wget"; /* "User-Agent" header field */
942
943#if ENABLE_FEATURE_WGET_LONG_OPTIONS
944 applet_long_options = wget_longopts;
945#endif
946 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
947 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
948 &G.fname_out, &G.dir_prefix,
949 &G.proxy_flag, &G.user_agent,
950 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
951 NULL /* -t RETRIES */
952 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
953 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
954 );
955 argv += optind;
956
957#if ENABLE_FEATURE_WGET_LONG_OPTIONS
958 if (headers_llist) {
959 int size = 1;
960 char *cp;
961 llist_t *ll = headers_llist;
962 while (ll) {
963 size += strlen(ll->data) + 2;
964 ll = ll->link;
965 }
966 G.extra_headers = cp = xmalloc(size);
967 while (headers_llist) {
968 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
969 }
970 }
971#endif
972
973 G.output_fd = -1;
974 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
975 if (G.fname_out) { /* -O FILE ? */
976 if (LONE_DASH(G.fname_out)) { /* -O - ? */
977 G.output_fd = 1;
978 option_mask32 &= ~WGET_OPT_CONTINUE;
979 }
980 /* compat with wget: -O FILE can overwrite */
981 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
982 }
983
984 while (*argv)
985 download_one_url(*argv++);
986
987 if (G.output_fd >= 0)
988 xclose(G.output_fd);
989
990 return EXIT_SUCCESS;
991}
Note: See TracBrowser for help on using the repository browser.