|
| 1 | +#!/bin/sh |
| 2 | + |
| 3 | +# wcurl - a simple wrapper around curl to easily download files. |
| 4 | +# |
| 5 | +# Requires curl >= 7.46.0 (2015) |
| 6 | +# |
| 7 | +# Copyright (C) Samuel Henrique <[email protected]>, Sergio Durigan |
| 8 | +# Junior <[email protected]> and many contributors, see the AUTHORS |
| 9 | +# file. |
| 10 | +# |
| 11 | +# Permission to use, copy, modify, and distribute this software for any purpose |
| 12 | +# with or without fee is hereby granted, provided that the above copyright |
| 13 | +# notice and this permission notice appear in all copies. |
| 14 | +# |
| 15 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN |
| 18 | +# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
| 19 | +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| 20 | +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE |
| 21 | +# OR OTHER DEALINGS IN THE SOFTWARE. |
| 22 | +# |
| 23 | +# Except as contained in this notice, the name of a copyright holder shall not be |
| 24 | +# used in advertising or otherwise to promote the sale, use or other dealings in |
| 25 | +# this Software without prior written authorization of the copyright holder. |
| 26 | +# |
| 27 | +# SPDX-License-Identifier: curl |
| 28 | + |
| 29 | +# Stop on errors and on usage of unset variables. |
| 30 | +set -eu |
| 31 | + |
| 32 | +VERSION="2025.05.26" |
| 33 | + |
| 34 | +PROGRAM_NAME="$(basename "$0")" |
| 35 | +readonly PROGRAM_NAME |
| 36 | + |
| 37 | +# Display the version. |
| 38 | +print_version() |
| 39 | +{ |
| 40 | + cat << _EOF_ |
| 41 | +${VERSION} |
| 42 | +_EOF_ |
| 43 | +} |
| 44 | + |
| 45 | +# Display the program usage. |
| 46 | +usage() |
| 47 | +{ |
| 48 | + cat << _EOF_ |
| 49 | +${PROGRAM_NAME} -- a simple wrapper around curl to easily download files. |
| 50 | +
|
| 51 | +Usage: ${PROGRAM_NAME} <URL>... |
| 52 | + ${PROGRAM_NAME} [--curl-options <CURL_OPTIONS>]... [--no-decode-filename] [-o|-O|--output <PATH>] [--dry-run] [--] <URL>... |
| 53 | + ${PROGRAM_NAME} [--curl-options=<CURL_OPTIONS>]... [--no-decode-filename] [--output=<PATH>] [--dry-run] [--] <URL>... |
| 54 | + ${PROGRAM_NAME} -h|--help |
| 55 | + ${PROGRAM_NAME} -V|--version |
| 56 | +
|
| 57 | +Options: |
| 58 | +
|
| 59 | + --curl-options <CURL_OPTIONS>: Specify extra options to be passed when invoking curl. May be |
| 60 | + specified more than once. |
| 61 | +
|
| 62 | + -o, -O, --output <PATH>: Use the provided output path instead of getting it from the URL. If |
| 63 | + multiple URLs are provided, resulting files share the same name with a |
| 64 | + number appended to the end (curl >= 7.83.0). If this option is provided |
| 65 | + multiple times, only the last value is considered. |
| 66 | +
|
| 67 | + --no-decode-filename: Don't percent-decode the output filename, even if the percent-encoding in |
| 68 | + the URL was done by wcurl, e.g.: The URL contained whitespaces. |
| 69 | +
|
| 70 | + --dry-run: Don't actually execute curl, just print what would be invoked. |
| 71 | +
|
| 72 | + -V, --version: Print version information. |
| 73 | +
|
| 74 | + -h, --help: Print this usage message. |
| 75 | +
|
| 76 | + <CURL_OPTIONS>: Any option supported by curl can be set here. This is not used by wcurl; it is |
| 77 | + instead forwarded to the curl invocation. |
| 78 | +
|
| 79 | + <URL>: URL to be downloaded. Anything that is not a parameter is considered |
| 80 | + an URL. Whitespaces are percent-encoded and the URL is passed to curl, which |
| 81 | + then performs the parsing. May be specified more than once. |
| 82 | +_EOF_ |
| 83 | +} |
| 84 | + |
| 85 | +# Display an error message and bail out. |
| 86 | +error() |
| 87 | +{ |
| 88 | + printf "%s\n" "$*" > /dev/stderr |
| 89 | + exit 1 |
| 90 | +} |
| 91 | + |
| 92 | +# Extra curl options provided by the user. |
| 93 | +# This is set per-URL for every URL provided. |
| 94 | +# Some options are global, but we are erroring on the side of needlesly setting |
| 95 | +# them multiple times instead of causing issues with parameters that needs to |
| 96 | +# be set per-URL. |
| 97 | +CURL_OPTIONS="" |
| 98 | + |
| 99 | +# The URLs to be downloaded. |
| 100 | +URLS="" |
| 101 | + |
| 102 | +# Variable used to be set to the percent-decoded filename parsed from the URL, unless |
| 103 | +# --output or --no-decode-filename are used. |
| 104 | +OUTPUT_PATH="" |
| 105 | +HAS_USER_SET_OUTPUT="false" |
| 106 | + |
| 107 | +# The parameters that are passed per-URL to curl. |
| 108 | +readonly PER_URL_PARAMETERS="\ |
| 109 | + --fail \ |
| 110 | + --globoff \ |
| 111 | + --location \ |
| 112 | + --proto-default https \ |
| 113 | + --remote-time \ |
| 114 | + --retry 5 " |
| 115 | + |
| 116 | +# Whether to invoke curl or not. |
| 117 | +DRY_RUN="false" |
| 118 | + |
| 119 | +# Sanitize parameters. |
| 120 | +sanitize() |
| 121 | +{ |
| 122 | + if [ -z "${URLS}" ]; then |
| 123 | + error "You must provide at least one URL to download." |
| 124 | + fi |
| 125 | + |
| 126 | + readonly CURL_OPTIONS URLS DRY_RUN HAS_USER_SET_OUTPUT |
| 127 | +} |
| 128 | + |
| 129 | +# Indicate via exit code whether the string given in the first parameter |
| 130 | +# consists solely of characters from the string given in the second parameter. |
| 131 | +# In other words, it returns 0 if the first parameter only contains characters |
| 132 | +# from the second parameter, e.g.: Are $1 characters a subset of $2 characters? |
| 133 | +is_subset_of() |
| 134 | +{ |
| 135 | + case "${1}" in |
| 136 | + *[!${2}]*|'') return 1;; |
| 137 | + esac |
| 138 | +} |
| 139 | + |
| 140 | +# Print the given string percent-decoded. |
| 141 | +percent_decode() |
| 142 | +{ |
| 143 | + # Encodings of control characters (00-1F) are passed through without decoding. |
| 144 | + # Iterate on the input character-by-character, decoding it. |
| 145 | + printf "%s\n" "${1}" | fold -w1 | while IFS= read -r decode_out; do |
| 146 | + # If character is a "%", read the next character as decode_hex1. |
| 147 | + if [ "${decode_out}" = % ] && IFS= read -r decode_hex1; then |
| 148 | + decode_out="${decode_out}${decode_hex1}" |
| 149 | + # If there's one more character, read it as decode_hex2. |
| 150 | + if IFS= read -r decode_hex2; then |
| 151 | + decode_out="${decode_out}${decode_hex2}" |
| 152 | + # Skip decoding if this is a control character (00-1F). |
| 153 | + # Skip decoding if DECODE_FILENAME is not "true". |
| 154 | + if is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" && \ |
| 155 | + is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" && \ |
| 156 | + [ "${DECODE_FILENAME}" = "true" ]; then |
| 157 | + # Use printf to decode it into octal and then decode it to the final format. |
| 158 | + decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")" |
| 159 | + fi |
| 160 | + fi |
| 161 | + fi |
| 162 | + printf %s "${decode_out}" |
| 163 | + done |
| 164 | +} |
| 165 | + |
| 166 | +# Print the percent-decoded filename portion of the given URL. |
| 167 | +get_url_filename() |
| 168 | +{ |
| 169 | + # Remove protocol and query string if present. |
| 170 | + hostname_and_path="$(printf %s "${1}" | sed -e 's,^[^/]*//,,' -e 's,?.*$,,')" |
| 171 | + # If what remains contains a slash, there's a path; return it percent-decoded. |
| 172 | + case "${hostname_and_path}" in |
| 173 | + # sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something" |
| 174 | + */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,')";; |
| 175 | + esac |
| 176 | + # No slash means there was just a hostname and no path; return empty string. |
| 177 | +} |
| 178 | + |
| 179 | +# Execute curl with the list of URLs provided by the user. |
| 180 | +exec_curl() |
| 181 | +{ |
| 182 | + CMD="curl " |
| 183 | + |
| 184 | + # Store version to check if it supports --no-clobber and --parallel. |
| 185 | + curl_version=$($CMD --version | cut -f2 -d' ' | head -n1) |
| 186 | + curl_version_major=$(echo "$curl_version" | cut -f1 -d.) |
| 187 | + curl_version_minor=$(echo "$curl_version" | cut -f2 -d.) |
| 188 | + |
| 189 | + CURL_HAS_NO_CLOBBER="" |
| 190 | + CURL_HAS_PARALLEL="" |
| 191 | + # --no-clobber is only supported since 7.83.0. |
| 192 | + # --parallel is only supported since 7.66.0. |
| 193 | + if [ "${curl_version_major}" -ge 8 ]; then |
| 194 | + CURL_HAS_NO_CLOBBER="--no-clobber" |
| 195 | + CURL_HAS_PARALLEL="--parallel" |
| 196 | + elif [ "${curl_version_major}" -eq 7 ];then |
| 197 | + if [ "${curl_version_minor}" -ge 83 ]; then |
| 198 | + CURL_HAS_NO_CLOBBER="--no-clobber" |
| 199 | + fi |
| 200 | + if [ "${curl_version_minor}" -ge 66 ]; then |
| 201 | + CURL_HAS_PARALLEL="--parallel" |
| 202 | + fi |
| 203 | + fi |
| 204 | + |
| 205 | + # Detecting whether we need --parallel. It's easier to rely on |
| 206 | + # the shell's argument parsing. |
| 207 | + # shellcheck disable=SC2086 |
| 208 | + set -- $URLS |
| 209 | + |
| 210 | + if [ "$#" -gt 1 ]; then |
| 211 | + CURL_PARALLEL="$CURL_HAS_PARALLEL" |
| 212 | + else |
| 213 | + CURL_PARALLEL="" |
| 214 | + fi |
| 215 | + |
| 216 | + # Start assembling the command. |
| 217 | + # |
| 218 | + # We use 'set --' here (again) because (a) we don't have arrays on |
| 219 | + # POSIX shell, and (b) we need better control over the way we |
| 220 | + # split arguments. |
| 221 | + # |
| 222 | + # shellcheck disable=SC2086 |
| 223 | + set -- ${CMD} ${CURL_PARALLEL} |
| 224 | + |
| 225 | + NEXT_PARAMETER="" |
| 226 | + for url in ${URLS}; do |
| 227 | + # If the user did not provide an output path, define one. |
| 228 | + if [ "${HAS_USER_SET_OUTPUT}" = "false" ]; then |
| 229 | + OUTPUT_PATH="$(get_url_filename "${url}")" |
| 230 | + # If we could not get a path from the URL, use the default: index.html. |
| 231 | + [ -z "${OUTPUT_PATH}" ] && OUTPUT_PATH=index.html |
| 232 | + fi |
| 233 | + # shellcheck disable=SC2086 |
| 234 | + set -- "$@" ${NEXT_PARAMETER} ${PER_URL_PARAMETERS} ${CURL_HAS_NO_CLOBBER} ${CURL_OPTIONS} --output "${OUTPUT_PATH}" "${url}" |
| 235 | + NEXT_PARAMETER="--next" |
| 236 | + done |
| 237 | + |
| 238 | + if [ "${DRY_RUN}" = "false" ]; then |
| 239 | + exec "$@" |
| 240 | + else |
| 241 | + printf "%s\n" "$@" |
| 242 | + fi |
| 243 | +} |
| 244 | + |
| 245 | +# Default to decoding the output filename |
| 246 | +DECODE_FILENAME="true" |
| 247 | + |
| 248 | +# Use "${1-}" in order to avoid errors because of 'set -u'. |
| 249 | +while [ -n "${1-}" ]; do |
| 250 | + case "${1}" in |
| 251 | + --curl-options=*) |
| 252 | + opt=$(printf "%s\n" "${1}" | sed 's/^--curl-options=//') |
| 253 | + CURL_OPTIONS="${CURL_OPTIONS} ${opt}" |
| 254 | + ;; |
| 255 | + |
| 256 | + --curl-options) |
| 257 | + shift |
| 258 | + CURL_OPTIONS="${CURL_OPTIONS} ${1}" |
| 259 | + ;; |
| 260 | + |
| 261 | + --dry-run) |
| 262 | + DRY_RUN="true" |
| 263 | + ;; |
| 264 | + |
| 265 | + --output=*) |
| 266 | + opt=$(printf "%s\n" "${1}" | sed 's/^--output=//') |
| 267 | + HAS_USER_SET_OUTPUT="true" |
| 268 | + OUTPUT_PATH="${opt}" |
| 269 | + ;; |
| 270 | + |
| 271 | + -o|-O|--output) |
| 272 | + shift |
| 273 | + HAS_USER_SET_OUTPUT="true" |
| 274 | + OUTPUT_PATH="${1}" |
| 275 | + ;; |
| 276 | + |
| 277 | + -o*|-O*) |
| 278 | + opt=$(printf "%s\n" "${1}" | sed 's/^-[oO]//') |
| 279 | + HAS_USER_SET_OUTPUT="true" |
| 280 | + OUTPUT_PATH="${opt}" |
| 281 | + ;; |
| 282 | + |
| 283 | + --no-decode-filename) |
| 284 | + DECODE_FILENAME="false" |
| 285 | + ;; |
| 286 | + |
| 287 | + -h|--help) |
| 288 | + usage |
| 289 | + exit 0 |
| 290 | + ;; |
| 291 | + |
| 292 | + -V|--version) |
| 293 | + print_version |
| 294 | + exit 0 |
| 295 | + ;; |
| 296 | + |
| 297 | + --) |
| 298 | + # This is the start of the list of URLs. |
| 299 | + shift |
| 300 | + for url in "$@"; do |
| 301 | + # Encode whitespaces into %20, since wget supports those URLs. |
| 302 | + newurl=$(printf "%s\n" "${url}" | sed 's/ /%20/g') |
| 303 | + URLS="${URLS} ${newurl}" |
| 304 | + done |
| 305 | + break |
| 306 | + ;; |
| 307 | + |
| 308 | + -*) |
| 309 | + error "Unknown option: '$1'." |
| 310 | + ;; |
| 311 | + |
| 312 | + *) |
| 313 | + # This must be a URL. |
| 314 | + # Encode whitespaces into %20, since wget supports those URLs. |
| 315 | + newurl=$(printf "%s\n" "${1}" | sed 's/ /%20/g') |
| 316 | + URLS="${URLS} ${newurl}" |
| 317 | + ;; |
| 318 | + esac |
| 319 | + shift |
| 320 | +done |
| 321 | + |
| 322 | +sanitize |
| 323 | +exec_curl |
0 commit comments