1 | /* vi: set sw=4 ts=4: */
|
---|
2 | /*
|
---|
3 | * wget - retrieve a file using HTTP or FTP
|
---|
4 | *
|
---|
5 | * Chip Rosenthal Covad Communications <chip@laserlink.net>
|
---|
6 | * Licensed under GPLv2, see file LICENSE in this source tree.
|
---|
7 | *
|
---|
8 | * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
|
---|
9 | * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
|
---|
10 | */
|
---|
11 | #include "libbb.h"
|
---|
12 |
|
---|
13 | struct host_info {
|
---|
14 | // May be used if we ever will want to free() all xstrdup()s...
|
---|
15 | /* char *allocated; */
|
---|
16 | const char *path;
|
---|
17 | const char *user;
|
---|
18 | char *host;
|
---|
19 | int port;
|
---|
20 | smallint is_ftp;
|
---|
21 | };
|
---|
22 |
|
---|
23 |
|
---|
24 | /* Globals */
|
---|
25 | struct globals {
|
---|
26 | off_t content_len; /* Content-length of the file */
|
---|
27 | off_t beg_range; /* Range at which continue begins */
|
---|
28 | #if ENABLE_FEATURE_WGET_STATUSBAR
|
---|
29 | off_t transferred; /* Number of bytes transferred so far */
|
---|
30 | const char *curfile; /* Name of current file being transferred */
|
---|
31 | bb_progress_t pmt;
|
---|
32 | #endif
|
---|
33 | #if ENABLE_FEATURE_WGET_TIMEOUT
|
---|
34 | unsigned timeout_seconds;
|
---|
35 | #endif
|
---|
36 | smallint chunked; /* chunked transfer encoding */
|
---|
37 | smallint got_clen; /* got content-length: from server */
|
---|
38 | } FIX_ALIASING;
|
---|
39 | #define G (*(struct globals*)&bb_common_bufsiz1)
|
---|
40 | struct BUG_G_too_big {
|
---|
41 | char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1];
|
---|
42 | };
|
---|
43 | #define INIT_G() do { \
|
---|
44 | IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
|
---|
45 | } while (0)
|
---|
46 |
|
---|
47 |
|
---|
48 | /* Must match option string! */
|
---|
49 | enum {
|
---|
50 | WGET_OPT_CONTINUE = (1 << 0),
|
---|
51 | WGET_OPT_SPIDER = (1 << 1),
|
---|
52 | WGET_OPT_QUIET = (1 << 2),
|
---|
53 | WGET_OPT_OUTNAME = (1 << 3),
|
---|
54 | WGET_OPT_PREFIX = (1 << 4),
|
---|
55 | WGET_OPT_PROXY = (1 << 5),
|
---|
56 | WGET_OPT_USER_AGENT = (1 << 6),
|
---|
57 | WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
|
---|
58 | WGET_OPT_RETRIES = (1 << 8),
|
---|
59 | WGET_OPT_PASSIVE = (1 << 9),
|
---|
60 | WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
|
---|
61 | WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
|
---|
62 | };
|
---|
63 |
|
---|
64 | enum {
|
---|
65 | PROGRESS_START = -1,
|
---|
66 | PROGRESS_END = 0,
|
---|
67 | PROGRESS_BUMP = 1,
|
---|
68 | };
|
---|
69 | #if ENABLE_FEATURE_WGET_STATUSBAR
|
---|
70 | static void progress_meter(int flag)
|
---|
71 | {
|
---|
72 | if (option_mask32 & WGET_OPT_QUIET)
|
---|
73 | return;
|
---|
74 |
|
---|
75 | if (flag == PROGRESS_START)
|
---|
76 | bb_progress_init(&G.pmt);
|
---|
77 |
|
---|
78 | bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
|
---|
79 | G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
|
---|
80 |
|
---|
81 | if (flag == PROGRESS_END) {
|
---|
82 | bb_putchar_stderr('\n');
|
---|
83 | G.transferred = 0;
|
---|
84 | }
|
---|
85 | }
|
---|
86 | #else
|
---|
87 | static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
|
---|
88 | #endif
|
---|
89 |
|
---|
90 |
|
---|
91 | /* IPv6 knows scoped address types i.e. link and site local addresses. Link
|
---|
92 | * local addresses can have a scope identifier to specify the
|
---|
93 | * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
|
---|
94 | * identifier is only valid on a single node.
|
---|
95 | *
|
---|
96 | * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
|
---|
97 | * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
|
---|
98 | * in the Host header as invalid requests, see
|
---|
99 | * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
|
---|
100 | */
|
---|
101 | static void strip_ipv6_scope_id(char *host)
|
---|
102 | {
|
---|
103 | char *scope, *cp;
|
---|
104 |
|
---|
105 | /* bbox wget actually handles IPv6 addresses without [], like
|
---|
106 | * wget "http://::1/xxx", but this is not standard.
|
---|
107 | * To save code, _here_ we do not support it. */
|
---|
108 |
|
---|
109 | if (host[0] != '[')
|
---|
110 | return; /* not IPv6 */
|
---|
111 |
|
---|
112 | scope = strchr(host, '%');
|
---|
113 | if (!scope)
|
---|
114 | return;
|
---|
115 |
|
---|
116 | /* Remove the IPv6 zone identifier from the host address */
|
---|
117 | cp = strchr(host, ']');
|
---|
118 | if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
|
---|
119 | /* malformed address (not "[xx]:nn" or "[xx]") */
|
---|
120 | return;
|
---|
121 | }
|
---|
122 |
|
---|
123 | /* cp points to "]...", scope points to "%eth0]..." */
|
---|
124 | overlapping_strcpy(scope, cp);
|
---|
125 | }
|
---|
126 |
|
---|
127 | /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
|
---|
128 | * and a short count if an eof or non-interrupt error is encountered. */
|
---|
129 | static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
|
---|
130 | {
|
---|
131 | size_t ret;
|
---|
132 | char *p = (char*)ptr;
|
---|
133 |
|
---|
134 | do {
|
---|
135 | clearerr(stream);
|
---|
136 | errno = 0;
|
---|
137 | ret = fread(p, 1, nmemb, stream);
|
---|
138 | p += ret;
|
---|
139 | nmemb -= ret;
|
---|
140 | } while (nmemb && ferror(stream) && errno == EINTR);
|
---|
141 |
|
---|
142 | return p - (char*)ptr;
|
---|
143 | }
|
---|
144 |
|
---|
145 | /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
|
---|
146 | * Returns S, or NULL if an eof or non-interrupt error is encountered. */
|
---|
147 | static char *safe_fgets(char *s, int size, FILE *stream)
|
---|
148 | {
|
---|
149 | char *ret;
|
---|
150 |
|
---|
151 | do {
|
---|
152 | clearerr(stream);
|
---|
153 | errno = 0;
|
---|
154 | ret = fgets(s, size, stream);
|
---|
155 | } while (ret == NULL && ferror(stream) && errno == EINTR);
|
---|
156 |
|
---|
157 | return ret;
|
---|
158 | }
|
---|
159 |
|
---|
160 | #if ENABLE_FEATURE_WGET_AUTHENTICATION
|
---|
161 | /* Base64-encode character string. buf is assumed to be char buf[512]. */
|
---|
162 | static char *base64enc_512(char buf[512], const char *str)
|
---|
163 | {
|
---|
164 | unsigned len = strlen(str);
|
---|
165 | if (len > 512/4*3 - 10) /* paranoia */
|
---|
166 | len = 512/4*3 - 10;
|
---|
167 | bb_uuencode(buf, str, len, bb_uuenc_tbl_base64);
|
---|
168 | return buf;
|
---|
169 | }
|
---|
170 | #endif
|
---|
171 |
|
---|
172 | static char* sanitize_string(char *s)
|
---|
173 | {
|
---|
174 | unsigned char *p = (void *) s;
|
---|
175 | while (*p >= ' ')
|
---|
176 | p++;
|
---|
177 | *p = '\0';
|
---|
178 | return s;
|
---|
179 | }
|
---|
180 |
|
---|
181 | static FILE *open_socket(len_and_sockaddr *lsa)
|
---|
182 | {
|
---|
183 | FILE *fp;
|
---|
184 |
|
---|
185 | /* glibc 2.4 seems to try seeking on it - ??! */
|
---|
186 | /* hopefully it understands what ESPIPE means... */
|
---|
187 | fp = fdopen(xconnect_stream(lsa), "r+");
|
---|
188 | if (fp == NULL)
|
---|
189 | bb_perror_msg_and_die("fdopen");
|
---|
190 |
|
---|
191 | return fp;
|
---|
192 | }
|
---|
193 |
|
---|
194 | static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf)
|
---|
195 | {
|
---|
196 | int result;
|
---|
197 | if (s1) {
|
---|
198 | if (!s2) s2 = "";
|
---|
199 | fprintf(fp, "%s%s\r\n", s1, s2);
|
---|
200 | fflush(fp);
|
---|
201 | }
|
---|
202 |
|
---|
203 | do {
|
---|
204 | char *buf_ptr;
|
---|
205 |
|
---|
206 | if (fgets(buf, 510, fp) == NULL) {
|
---|
207 | bb_perror_msg_and_die("error getting response");
|
---|
208 | }
|
---|
209 | buf_ptr = strstr(buf, "\r\n");
|
---|
210 | if (buf_ptr) {
|
---|
211 | *buf_ptr = '\0';
|
---|
212 | }
|
---|
213 | } while (!isdigit(buf[0]) || buf[3] != ' ');
|
---|
214 |
|
---|
215 | buf[3] = '\0';
|
---|
216 | result = xatoi_positive(buf);
|
---|
217 | buf[3] = ' ';
|
---|
218 | return result;
|
---|
219 | }
|
---|
220 |
|
---|
221 | static void parse_url(char *src_url, struct host_info *h)
|
---|
222 | {
|
---|
223 | char *url, *p, *sp;
|
---|
224 |
|
---|
225 | /* h->allocated = */ url = xstrdup(src_url);
|
---|
226 |
|
---|
227 | if (strncmp(url, "http://", 7) == 0) {
|
---|
228 | h->port = bb_lookup_port("http", "tcp", 80);
|
---|
229 | h->host = url + 7;
|
---|
230 | h->is_ftp = 0;
|
---|
231 | } else if (strncmp(url, "ftp://", 6) == 0) {
|
---|
232 | h->port = bb_lookup_port("ftp", "tcp", 21);
|
---|
233 | h->host = url + 6;
|
---|
234 | h->is_ftp = 1;
|
---|
235 | } else
|
---|
236 | bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
|
---|
237 |
|
---|
238 | // FYI:
|
---|
239 | // "Real" wget 'http://busybox.net?var=a/b' sends this request:
|
---|
240 | // 'GET /?var=a/b HTTP 1.0'
|
---|
241 | // and saves 'index.html?var=a%2Fb' (we save 'b')
|
---|
242 | // wget 'http://busybox.net?login=john@doe':
|
---|
243 | // request: 'GET /?login=john@doe HTTP/1.0'
|
---|
244 | // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
|
---|
245 | // wget 'http://busybox.net#test/test':
|
---|
246 | // request: 'GET / HTTP/1.0'
|
---|
247 | // saves: 'index.html' (we save 'test')
|
---|
248 | //
|
---|
249 | // We also don't add unique .N suffix if file exists...
|
---|
250 | sp = strchr(h->host, '/');
|
---|
251 | p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
|
---|
252 | p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
|
---|
253 | if (!sp) {
|
---|
254 | h->path = "";
|
---|
255 | } else if (*sp == '/') {
|
---|
256 | *sp = '\0';
|
---|
257 | h->path = sp + 1;
|
---|
258 | } else { // '#' or '?'
|
---|
259 | // http://busybox.net?login=john@doe is a valid URL
|
---|
260 | // memmove converts to:
|
---|
261 | // http:/busybox.nett?login=john@doe...
|
---|
262 | memmove(h->host - 1, h->host, sp - h->host);
|
---|
263 | h->host--;
|
---|
264 | sp[-1] = '\0';
|
---|
265 | h->path = sp;
|
---|
266 | }
|
---|
267 |
|
---|
268 | // We used to set h->user to NULL here, but this interferes
|
---|
269 | // with handling of code 302 ("object was moved")
|
---|
270 |
|
---|
271 | sp = strrchr(h->host, '@');
|
---|
272 | if (sp != NULL) {
|
---|
273 | h->user = h->host;
|
---|
274 | *sp = '\0';
|
---|
275 | h->host = sp + 1;
|
---|
276 | }
|
---|
277 |
|
---|
278 | sp = h->host;
|
---|
279 | }
|
---|
280 |
|
---|
281 | static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
|
---|
282 | {
|
---|
283 | char *s, *hdrval;
|
---|
284 | int c;
|
---|
285 |
|
---|
286 | /* *istrunc = 0; */
|
---|
287 |
|
---|
288 | /* retrieve header line */
|
---|
289 | if (fgets(buf, bufsiz, fp) == NULL)
|
---|
290 | return NULL;
|
---|
291 |
|
---|
292 | /* see if we are at the end of the headers */
|
---|
293 | for (s = buf; *s == '\r'; ++s)
|
---|
294 | continue;
|
---|
295 | if (*s == '\n')
|
---|
296 | return NULL;
|
---|
297 |
|
---|
298 | /* convert the header name to lower case */
|
---|
299 | for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
|
---|
300 | /* tolower for "A-Z", no-op for "0-9a-z-." */
|
---|
301 | *s = (*s | 0x20);
|
---|
302 | }
|
---|
303 |
|
---|
304 | /* verify we are at the end of the header name */
|
---|
305 | if (*s != ':')
|
---|
306 | bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
|
---|
307 |
|
---|
308 | /* locate the start of the header value */
|
---|
309 | *s++ = '\0';
|
---|
310 | hdrval = skip_whitespace(s);
|
---|
311 |
|
---|
312 | /* locate the end of header */
|
---|
313 | while (*s && *s != '\r' && *s != '\n')
|
---|
314 | ++s;
|
---|
315 |
|
---|
316 | /* end of header found */
|
---|
317 | if (*s) {
|
---|
318 | *s = '\0';
|
---|
319 | return hdrval;
|
---|
320 | }
|
---|
321 |
|
---|
322 | /* Rats! The buffer isn't big enough to hold the entire header value */
|
---|
323 | while (c = getc(fp), c != EOF && c != '\n')
|
---|
324 | continue;
|
---|
325 | /* *istrunc = 1; */
|
---|
326 | return hdrval;
|
---|
327 | }
|
---|
328 |
|
---|
329 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
---|
330 | static char *URL_escape(const char *str)
|
---|
331 | {
|
---|
332 | /* URL encode, see RFC 2396 */
|
---|
333 | char *dst;
|
---|
334 | char *res = dst = xmalloc(strlen(str) * 3 + 1);
|
---|
335 | unsigned char c;
|
---|
336 |
|
---|
337 | while (1) {
|
---|
338 | c = *str++;
|
---|
339 | if (c == '\0'
|
---|
340 | /* || strchr("!&'()*-.=_~", c) - more code */
|
---|
341 | || c == '!'
|
---|
342 | || c == '&'
|
---|
343 | || c == '\''
|
---|
344 | || c == '('
|
---|
345 | || c == ')'
|
---|
346 | || c == '*'
|
---|
347 | || c == '-'
|
---|
348 | || c == '.'
|
---|
349 | || c == '='
|
---|
350 | || c == '_'
|
---|
351 | || c == '~'
|
---|
352 | || (c >= '0' && c <= '9')
|
---|
353 | || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
|
---|
354 | ) {
|
---|
355 | *dst++ = c;
|
---|
356 | if (c == '\0')
|
---|
357 | return res;
|
---|
358 | } else {
|
---|
359 | *dst++ = '%';
|
---|
360 | *dst++ = bb_hexdigits_upcase[c >> 4];
|
---|
361 | *dst++ = bb_hexdigits_upcase[c & 0xf];
|
---|
362 | }
|
---|
363 | }
|
---|
364 | }
|
---|
365 | #endif
|
---|
366 |
|
---|
367 | static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
|
---|
368 | {
|
---|
369 | char buf[512];
|
---|
370 | FILE *sfp;
|
---|
371 | char *str;
|
---|
372 | int port;
|
---|
373 |
|
---|
374 | if (!target->user)
|
---|
375 | target->user = xstrdup("anonymous:busybox@");
|
---|
376 |
|
---|
377 | sfp = open_socket(lsa);
|
---|
378 | if (ftpcmd(NULL, NULL, sfp, buf) != 220)
|
---|
379 | bb_error_msg_and_die("%s", sanitize_string(buf+4));
|
---|
380 |
|
---|
381 | /*
|
---|
382 | * Splitting username:password pair,
|
---|
383 | * trying to log in
|
---|
384 | */
|
---|
385 | str = strchr(target->user, ':');
|
---|
386 | if (str)
|
---|
387 | *str++ = '\0';
|
---|
388 | switch (ftpcmd("USER ", target->user, sfp, buf)) {
|
---|
389 | case 230:
|
---|
390 | break;
|
---|
391 | case 331:
|
---|
392 | if (ftpcmd("PASS ", str, sfp, buf) == 230)
|
---|
393 | break;
|
---|
394 | /* fall through (failed login) */
|
---|
395 | default:
|
---|
396 | bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
|
---|
397 | }
|
---|
398 |
|
---|
399 | ftpcmd("TYPE I", NULL, sfp, buf);
|
---|
400 |
|
---|
401 | /*
|
---|
402 | * Querying file size
|
---|
403 | */
|
---|
404 | if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) {
|
---|
405 | G.content_len = BB_STRTOOFF(buf+4, NULL, 10);
|
---|
406 | if (G.content_len < 0 || errno) {
|
---|
407 | bb_error_msg_and_die("SIZE value is garbage");
|
---|
408 | }
|
---|
409 | G.got_clen = 1;
|
---|
410 | }
|
---|
411 |
|
---|
412 | /*
|
---|
413 | * Entering passive mode
|
---|
414 | */
|
---|
415 | if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
|
---|
416 | pasv_error:
|
---|
417 | bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
|
---|
418 | }
|
---|
419 | // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
|
---|
420 | // Server's IP is N1.N2.N3.N4 (we ignore it)
|
---|
421 | // Server's port for data connection is P1*256+P2
|
---|
422 | str = strrchr(buf, ')');
|
---|
423 | if (str) str[0] = '\0';
|
---|
424 | str = strrchr(buf, ',');
|
---|
425 | if (!str) goto pasv_error;
|
---|
426 | port = xatou_range(str+1, 0, 255);
|
---|
427 | *str = '\0';
|
---|
428 | str = strrchr(buf, ',');
|
---|
429 | if (!str) goto pasv_error;
|
---|
430 | port += xatou_range(str+1, 0, 255) * 256;
|
---|
431 | set_nport(lsa, htons(port));
|
---|
432 |
|
---|
433 | *dfpp = open_socket(lsa);
|
---|
434 |
|
---|
435 | if (G.beg_range) {
|
---|
436 | sprintf(buf, "REST %"OFF_FMT"u", G.beg_range);
|
---|
437 | if (ftpcmd(buf, NULL, sfp, buf) == 350)
|
---|
438 | G.content_len -= G.beg_range;
|
---|
439 | }
|
---|
440 |
|
---|
441 | if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
|
---|
442 | bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
|
---|
443 |
|
---|
444 | return sfp;
|
---|
445 | }
|
---|
446 |
|
---|
447 | static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
|
---|
448 | {
|
---|
449 | char buf[4*1024]; /* made bigger to speed up local xfers */
|
---|
450 | #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
|
---|
451 | # if ENABLE_FEATURE_WGET_TIMEOUT
|
---|
452 | unsigned second_cnt;
|
---|
453 | # endif
|
---|
454 | struct pollfd polldata;
|
---|
455 |
|
---|
456 | polldata.fd = fileno(dfp);
|
---|
457 | polldata.events = POLLIN | POLLPRI;
|
---|
458 | #endif
|
---|
459 | progress_meter(PROGRESS_START);
|
---|
460 |
|
---|
461 | if (G.chunked)
|
---|
462 | goto get_clen;
|
---|
463 |
|
---|
464 | /* Loops only if chunked */
|
---|
465 | while (1) {
|
---|
466 |
|
---|
467 | #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
|
---|
468 | ndelay_on(polldata.fd);
|
---|
469 | #endif
|
---|
470 | while (1) {
|
---|
471 | int n;
|
---|
472 | unsigned rdsz;
|
---|
473 |
|
---|
474 | rdsz = sizeof(buf);
|
---|
475 | if (G.got_clen) {
|
---|
476 | if (G.content_len < (off_t)sizeof(buf)) {
|
---|
477 | if ((int)G.content_len <= 0)
|
---|
478 | break;
|
---|
479 | rdsz = (unsigned)G.content_len;
|
---|
480 | }
|
---|
481 | }
|
---|
482 | #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
|
---|
483 | # if ENABLE_FEATURE_WGET_TIMEOUT
|
---|
484 | second_cnt = G.timeout_seconds;
|
---|
485 | # endif
|
---|
486 | while (1) {
|
---|
487 | if (safe_poll(&polldata, 1, 1000) != 0)
|
---|
488 | break; /* error, EOF, or data is available */
|
---|
489 | # if ENABLE_FEATURE_WGET_TIMEOUT
|
---|
490 | if (second_cnt != 0 && --second_cnt == 0) {
|
---|
491 | progress_meter(PROGRESS_END);
|
---|
492 | bb_perror_msg_and_die("download timed out");
|
---|
493 | }
|
---|
494 | # endif
|
---|
495 | /* Needed for "stalled" indicator */
|
---|
496 | progress_meter(PROGRESS_BUMP);
|
---|
497 | }
|
---|
498 | #endif
|
---|
499 | /* fread internally uses read loop, which in our case
|
---|
500 | * is usually exited when we get EAGAIN.
|
---|
501 | * In this case, libc sets error marker on the stream.
|
---|
502 | * Need to clear it before next fread to avoid possible
|
---|
503 | * rare false positive ferror below. Rare because usually
|
---|
504 | * fread gets more than zero bytes, and we don't fall
|
---|
505 | * into if (n <= 0) ...
|
---|
506 | */
|
---|
507 | clearerr(dfp);
|
---|
508 | errno = 0;
|
---|
509 | n = safe_fread(buf, rdsz, dfp);
|
---|
510 | /* man fread:
|
---|
511 | * If error occurs, or EOF is reached, the return value
|
---|
512 | * is a short item count (or zero).
|
---|
513 | * fread does not distinguish between EOF and error.
|
---|
514 | */
|
---|
515 | if (n <= 0) {
|
---|
516 | #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
|
---|
517 | if (errno == EAGAIN) /* poll lied, there is no data? */
|
---|
518 | continue; /* yes */
|
---|
519 | #endif
|
---|
520 | if (ferror(dfp))
|
---|
521 | bb_perror_msg_and_die(bb_msg_read_error);
|
---|
522 | break; /* EOF, not error */
|
---|
523 | }
|
---|
524 |
|
---|
525 | xwrite(output_fd, buf, n);
|
---|
526 | #if ENABLE_FEATURE_WGET_STATUSBAR
|
---|
527 | G.transferred += n;
|
---|
528 | progress_meter(PROGRESS_BUMP);
|
---|
529 | #endif
|
---|
530 | if (G.got_clen) {
|
---|
531 | G.content_len -= n;
|
---|
532 | if (G.content_len == 0)
|
---|
533 | break;
|
---|
534 | }
|
---|
535 | }
|
---|
536 | #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
|
---|
537 | ndelay_off(polldata.fd);
|
---|
538 | #endif
|
---|
539 |
|
---|
540 | if (!G.chunked)
|
---|
541 | break;
|
---|
542 |
|
---|
543 | safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */
|
---|
544 | get_clen:
|
---|
545 | safe_fgets(buf, sizeof(buf), dfp);
|
---|
546 | G.content_len = STRTOOFF(buf, NULL, 16);
|
---|
547 | /* FIXME: error check? */
|
---|
548 | if (G.content_len == 0)
|
---|
549 | break; /* all done! */
|
---|
550 | G.got_clen = 1;
|
---|
551 | }
|
---|
552 |
|
---|
553 | progress_meter(PROGRESS_END);
|
---|
554 | }
|
---|
555 |
|
---|
556 | int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
|
---|
557 | int wget_main(int argc UNUSED_PARAM, char **argv)
|
---|
558 | {
|
---|
559 | char buf[512];
|
---|
560 | struct host_info server, target;
|
---|
561 | len_and_sockaddr *lsa;
|
---|
562 | unsigned opt;
|
---|
563 | int redir_limit;
|
---|
564 | char *proxy = NULL;
|
---|
565 | char *dir_prefix = NULL;
|
---|
566 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
---|
567 | char *post_data;
|
---|
568 | char *extra_headers = NULL;
|
---|
569 | llist_t *headers_llist = NULL;
|
---|
570 | #endif
|
---|
571 | FILE *sfp; /* socket to web/ftp server */
|
---|
572 | FILE *dfp; /* socket to ftp server (data) */
|
---|
573 | char *fname_out; /* where to direct output (-O) */
|
---|
574 | int output_fd = -1;
|
---|
575 | bool use_proxy; /* Use proxies if env vars are set */
|
---|
576 | const char *proxy_flag = "on"; /* Use proxies if env vars are set */
|
---|
577 | const char *user_agent = "Wget";/* "User-Agent" header field */
|
---|
578 |
|
---|
579 | static const char keywords[] ALIGN1 =
|
---|
580 | "content-length\0""transfer-encoding\0""chunked\0""location\0";
|
---|
581 | enum {
|
---|
582 | KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
|
---|
583 | };
|
---|
584 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
---|
585 | static const char wget_longopts[] ALIGN1 =
|
---|
586 | /* name, has_arg, val */
|
---|
587 | "continue\0" No_argument "c"
|
---|
588 | "spider\0" No_argument "s"
|
---|
589 | "quiet\0" No_argument "q"
|
---|
590 | "output-document\0" Required_argument "O"
|
---|
591 | "directory-prefix\0" Required_argument "P"
|
---|
592 | "proxy\0" Required_argument "Y"
|
---|
593 | "user-agent\0" Required_argument "U"
|
---|
594 | #if ENABLE_FEATURE_WGET_TIMEOUT
|
---|
595 | "timeout\0" Required_argument "T"
|
---|
596 | #endif
|
---|
597 | /* Ignored: */
|
---|
598 | // "tries\0" Required_argument "t"
|
---|
599 | /* Ignored (we always use PASV): */
|
---|
600 | "passive-ftp\0" No_argument "\xff"
|
---|
601 | "header\0" Required_argument "\xfe"
|
---|
602 | "post-data\0" Required_argument "\xfd"
|
---|
603 | /* Ignored (we don't do ssl) */
|
---|
604 | "no-check-certificate\0" No_argument "\xfc"
|
---|
605 | ;
|
---|
606 | #endif
|
---|
607 |
|
---|
608 | INIT_G();
|
---|
609 |
|
---|
610 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
---|
611 | applet_long_options = wget_longopts;
|
---|
612 | #endif
|
---|
613 | /* server.allocated = target.allocated = NULL; */
|
---|
614 | opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
|
---|
615 | opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
|
---|
616 | &fname_out, &dir_prefix,
|
---|
617 | &proxy_flag, &user_agent,
|
---|
618 | IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
|
---|
619 | NULL /* -t RETRIES */
|
---|
620 | IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
|
---|
621 | IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
|
---|
622 | );
|
---|
623 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
---|
624 | if (headers_llist) {
|
---|
625 | int size = 1;
|
---|
626 | char *cp;
|
---|
627 | llist_t *ll = headers_llist;
|
---|
628 | while (ll) {
|
---|
629 | size += strlen(ll->data) + 2;
|
---|
630 | ll = ll->link;
|
---|
631 | }
|
---|
632 | extra_headers = cp = xmalloc(size);
|
---|
633 | while (headers_llist) {
|
---|
634 | cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
|
---|
635 | }
|
---|
636 | }
|
---|
637 | #endif
|
---|
638 |
|
---|
639 | /* TODO: compat issue: should handle "wget URL1 URL2..." */
|
---|
640 |
|
---|
641 | target.user = NULL;
|
---|
642 | parse_url(argv[optind], &target);
|
---|
643 |
|
---|
644 | /* Use the proxy if necessary */
|
---|
645 | use_proxy = (strcmp(proxy_flag, "off") != 0);
|
---|
646 | if (use_proxy) {
|
---|
647 | proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
|
---|
648 | if (proxy && proxy[0]) {
|
---|
649 | server.user = NULL;
|
---|
650 | parse_url(proxy, &server);
|
---|
651 | } else {
|
---|
652 | use_proxy = 0;
|
---|
653 | }
|
---|
654 | }
|
---|
655 | if (!use_proxy) {
|
---|
656 | server.port = target.port;
|
---|
657 | if (ENABLE_FEATURE_IPV6) {
|
---|
658 | server.host = xstrdup(target.host);
|
---|
659 | } else {
|
---|
660 | server.host = target.host;
|
---|
661 | }
|
---|
662 | }
|
---|
663 |
|
---|
664 | if (ENABLE_FEATURE_IPV6)
|
---|
665 | strip_ipv6_scope_id(target.host);
|
---|
666 |
|
---|
667 | /* Guess an output filename, if there was no -O FILE */
|
---|
668 | if (!(opt & WGET_OPT_OUTNAME)) {
|
---|
669 | fname_out = bb_get_last_path_component_nostrip(target.path);
|
---|
670 | /* handle "wget http://kernel.org//" */
|
---|
671 | if (fname_out[0] == '/' || !fname_out[0])
|
---|
672 | fname_out = (char*)"index.html";
|
---|
673 | /* -P DIR is considered only if there was no -O FILE */
|
---|
674 | if (dir_prefix)
|
---|
675 | fname_out = concat_path_file(dir_prefix, fname_out);
|
---|
676 | } else {
|
---|
677 | if (LONE_DASH(fname_out)) {
|
---|
678 | /* -O - */
|
---|
679 | output_fd = 1;
|
---|
680 | opt &= ~WGET_OPT_CONTINUE;
|
---|
681 | }
|
---|
682 | }
|
---|
683 | #if ENABLE_FEATURE_WGET_STATUSBAR
|
---|
684 | G.curfile = bb_get_last_path_component_nostrip(fname_out);
|
---|
685 | #endif
|
---|
686 |
|
---|
687 | /* Impossible?
|
---|
688 | if ((opt & WGET_OPT_CONTINUE) && !fname_out)
|
---|
689 | bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
|
---|
690 | */
|
---|
691 |
|
---|
692 | /* Determine where to start transfer */
|
---|
693 | if (opt & WGET_OPT_CONTINUE) {
|
---|
694 | output_fd = open(fname_out, O_WRONLY);
|
---|
695 | if (output_fd >= 0) {
|
---|
696 | G.beg_range = xlseek(output_fd, 0, SEEK_END);
|
---|
697 | }
|
---|
698 | /* File doesn't exist. We do not create file here yet.
|
---|
699 | * We are not sure it exists on remove side */
|
---|
700 | }
|
---|
701 |
|
---|
702 | redir_limit = 5;
|
---|
703 | resolve_lsa:
|
---|
704 | lsa = xhost2sockaddr(server.host, server.port);
|
---|
705 | if (!(opt & WGET_OPT_QUIET)) {
|
---|
706 | char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
|
---|
707 | fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
|
---|
708 | free(s);
|
---|
709 | }
|
---|
710 | establish_session:
|
---|
711 | if (use_proxy || !target.is_ftp) {
|
---|
712 | /*
|
---|
713 | * HTTP session
|
---|
714 | */
|
---|
715 | char *str;
|
---|
716 | int status;
|
---|
717 |
|
---|
718 | /* Open socket to http server */
|
---|
719 | sfp = open_socket(lsa);
|
---|
720 |
|
---|
721 | /* Send HTTP request */
|
---|
722 | if (use_proxy) {
|
---|
723 | fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
|
---|
724 | target.is_ftp ? "f" : "ht", target.host,
|
---|
725 | target.path);
|
---|
726 | } else {
|
---|
727 | if (opt & WGET_OPT_POST_DATA)
|
---|
728 | fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
|
---|
729 | else
|
---|
730 | fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
|
---|
731 | }
|
---|
732 |
|
---|
733 | fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
|
---|
734 | target.host, user_agent);
|
---|
735 |
|
---|
736 | /* Ask server to close the connection as soon as we are done
|
---|
737 | * (IOW: we do not intend to send more requests)
|
---|
738 | */
|
---|
739 | fprintf(sfp, "Connection: close\r\n");
|
---|
740 |
|
---|
741 | #if ENABLE_FEATURE_WGET_AUTHENTICATION
|
---|
742 | if (target.user) {
|
---|
743 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
|
---|
744 | base64enc_512(buf, target.user));
|
---|
745 | }
|
---|
746 | if (use_proxy && server.user) {
|
---|
747 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
|
---|
748 | base64enc_512(buf, server.user));
|
---|
749 | }
|
---|
750 | #endif
|
---|
751 |
|
---|
752 | if (G.beg_range)
|
---|
753 | fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
|
---|
754 |
|
---|
755 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
---|
756 | if (extra_headers)
|
---|
757 | fputs(extra_headers, sfp);
|
---|
758 |
|
---|
759 | if (opt & WGET_OPT_POST_DATA) {
|
---|
760 | char *estr = URL_escape(post_data);
|
---|
761 | fprintf(sfp,
|
---|
762 | "Content-Type: application/x-www-form-urlencoded\r\n"
|
---|
763 | "Content-Length: %u\r\n"
|
---|
764 | "\r\n"
|
---|
765 | "%s",
|
---|
766 | (int) strlen(estr), estr
|
---|
767 | );
|
---|
768 | free(estr);
|
---|
769 | } else
|
---|
770 | #endif
|
---|
771 | {
|
---|
772 | fprintf(sfp, "\r\n");
|
---|
773 | }
|
---|
774 |
|
---|
775 | fflush(sfp);
|
---|
776 |
|
---|
777 | /*
|
---|
778 | * Retrieve HTTP response line and check for "200" status code.
|
---|
779 | */
|
---|
780 | read_response:
|
---|
781 | if (fgets(buf, sizeof(buf), sfp) == NULL)
|
---|
782 | bb_error_msg_and_die("no response from server");
|
---|
783 |
|
---|
784 | str = buf;
|
---|
785 | str = skip_non_whitespace(str);
|
---|
786 | str = skip_whitespace(str);
|
---|
787 | // FIXME: no error check
|
---|
788 | // xatou wouldn't work: "200 OK"
|
---|
789 | status = atoi(str);
|
---|
790 | switch (status) {
|
---|
791 | case 0:
|
---|
792 | case 100:
|
---|
793 | while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
|
---|
794 | /* eat all remaining headers */;
|
---|
795 | goto read_response;
|
---|
796 | case 200:
|
---|
797 | /*
|
---|
798 | Response 204 doesn't say "null file", it says "metadata
|
---|
799 | has changed but data didn't":
|
---|
800 |
|
---|
801 | "10.2.5 204 No Content
|
---|
802 | The server has fulfilled the request but does not need to return
|
---|
803 | an entity-body, and might want to return updated metainformation.
|
---|
804 | The response MAY include new or updated metainformation in the form
|
---|
805 | of entity-headers, which if present SHOULD be associated with
|
---|
806 | the requested variant.
|
---|
807 |
|
---|
808 | If the client is a user agent, it SHOULD NOT change its document
|
---|
809 | view from that which caused the request to be sent. This response
|
---|
810 | is primarily intended to allow input for actions to take place
|
---|
811 | without causing a change to the user agent's active document view,
|
---|
812 | although any new or updated metainformation SHOULD be applied
|
---|
813 | to the document currently in the user agent's active view.
|
---|
814 |
|
---|
815 | The 204 response MUST NOT include a message-body, and thus
|
---|
816 | is always terminated by the first empty line after the header fields."
|
---|
817 |
|
---|
818 | However, in real world it was observed that some web servers
|
---|
819 | (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
|
---|
820 | */
|
---|
821 | case 204:
|
---|
822 | break;
|
---|
823 | case 300: /* redirection */
|
---|
824 | case 301:
|
---|
825 | case 302:
|
---|
826 | case 303:
|
---|
827 | break;
|
---|
828 | case 206:
|
---|
829 | if (G.beg_range)
|
---|
830 | break;
|
---|
831 | /* fall through */
|
---|
832 | default:
|
---|
833 | bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
|
---|
834 | }
|
---|
835 |
|
---|
836 | /*
|
---|
837 | * Retrieve HTTP headers.
|
---|
838 | */
|
---|
839 | while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
|
---|
840 | /* gethdr converted "FOO:" string to lowercase */
|
---|
841 | smalluint key;
|
---|
842 | /* strip trailing whitespace */
|
---|
843 | char *s = strchrnul(str, '\0') - 1;
|
---|
844 | while (s >= str && (*s == ' ' || *s == '\t')) {
|
---|
845 | *s = '\0';
|
---|
846 | s--;
|
---|
847 | }
|
---|
848 | key = index_in_strings(keywords, buf) + 1;
|
---|
849 | if (key == KEY_content_length) {
|
---|
850 | G.content_len = BB_STRTOOFF(str, NULL, 10);
|
---|
851 | if (G.content_len < 0 || errno) {
|
---|
852 | bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
|
---|
853 | }
|
---|
854 | G.got_clen = 1;
|
---|
855 | continue;
|
---|
856 | }
|
---|
857 | if (key == KEY_transfer_encoding) {
|
---|
858 | if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
|
---|
859 | bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
|
---|
860 | G.chunked = G.got_clen = 1;
|
---|
861 | }
|
---|
862 | if (key == KEY_location && status >= 300) {
|
---|
863 | if (--redir_limit == 0)
|
---|
864 | bb_error_msg_and_die("too many redirections");
|
---|
865 | fclose(sfp);
|
---|
866 | G.got_clen = 0;
|
---|
867 | G.chunked = 0;
|
---|
868 | if (str[0] == '/')
|
---|
869 | /* free(target.allocated); */
|
---|
870 | target.path = /* target.allocated = */ xstrdup(str+1);
|
---|
871 | /* lsa stays the same: it's on the same server */
|
---|
872 | else {
|
---|
873 | parse_url(str, &target);
|
---|
874 | if (!use_proxy) {
|
---|
875 | server.host = target.host;
|
---|
876 | /* strip_ipv6_scope_id(target.host); - no! */
|
---|
877 | /* we assume remote never gives us IPv6 addr with scope id */
|
---|
878 | server.port = target.port;
|
---|
879 | free(lsa);
|
---|
880 | goto resolve_lsa;
|
---|
881 | } /* else: lsa stays the same: we use proxy */
|
---|
882 | }
|
---|
883 | goto establish_session;
|
---|
884 | }
|
---|
885 | }
|
---|
886 | // if (status >= 300)
|
---|
887 | // bb_error_msg_and_die("bad redirection (no Location: header from server)");
|
---|
888 |
|
---|
889 | /* For HTTP, data is pumped over the same connection */
|
---|
890 | dfp = sfp;
|
---|
891 |
|
---|
892 | } else {
|
---|
893 | /*
|
---|
894 | * FTP session
|
---|
895 | */
|
---|
896 | sfp = prepare_ftp_session(&dfp, &target, lsa);
|
---|
897 | }
|
---|
898 |
|
---|
899 | if (opt & WGET_OPT_SPIDER) {
|
---|
900 | if (ENABLE_FEATURE_CLEAN_UP)
|
---|
901 | fclose(sfp);
|
---|
902 | return EXIT_SUCCESS;
|
---|
903 | }
|
---|
904 |
|
---|
905 | if (output_fd < 0) {
|
---|
906 | int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
|
---|
907 | /* compat with wget: -O FILE can overwrite */
|
---|
908 | if (opt & WGET_OPT_OUTNAME)
|
---|
909 | o_flags = O_WRONLY | O_CREAT | O_TRUNC;
|
---|
910 | output_fd = xopen(fname_out, o_flags);
|
---|
911 | }
|
---|
912 |
|
---|
913 | retrieve_file_data(dfp, output_fd);
|
---|
914 | xclose(output_fd);
|
---|
915 |
|
---|
916 | if (dfp != sfp) {
|
---|
917 | /* It's ftp. Close it properly */
|
---|
918 | fclose(dfp);
|
---|
919 | if (ftpcmd(NULL, NULL, sfp, buf) != 226)
|
---|
920 | bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
|
---|
921 | /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */
|
---|
922 | }
|
---|
923 |
|
---|
924 | return EXIT_SUCCESS;
|
---|
925 | }
|
---|