Menu
Zero One Labs
  • Home
  • About
  • Scripts
  • Snippets
Zero One Labs
dlmedia screen shot

Download Media: An OS X/macOS bash script to download files from a few popular websites

Posted on December 18, 2016March 25, 2019 by Zan

I made this script so I could download the MP4 versions of Imgur GIFs and Gfycat MP4s because I like to make a lot of funny images for some online communities. I expanded to incorporate the YouTube-DL binary, ffmpeg, NHL videos, and Instagram videos and jpegs. Feel free to copy and write to this file in /usr/local/bin, or write it to a separate executable file and link it to /usr/local/bin so that you can call it directly without having to specify the path.

This script assumes you’re using OS X (10.6+) or macOS because it uses Mac-specific lingo to determine your user home folder.

If you want to know more about the options this script can use, simply invoke the script with the “-h” option (/usr/local/bin/dlmedia -h).

Note: The NHL option only works for NHL video pages. This will not work for embedded NHL videos. Since I use this script for pure personal reasons, I didn’t vet this script to have tons of error checks or input validation. Use this script at your own risk.

#!/bin/bash
# Author: Zan Bassi
#
# Example URLs (these URLs don't exist, btw)
# https://meowygif.com/PettyGaseousDinosaurs
# https://www.youbeetubees.com/watch?v=ABC13ndaiXYZ "A_Funny_YouTube_Song"
# https://www.inthotgram.com/p/ABC13ndaiXYZ/ "Funny cat video"

theServices=( gfycat imgur streamable nhl youtube instagram )

# Language section
lang_success_download="File downloaded successfully."
lang_change_dir="Changing directory to "

lang_prompt_yt_options="What would you like to do with this YouTube ID?"
lang_prompt_yt_option1="Download highest quality video and audio."
lang_prompt_yt_option2="Download audio only in MP3  format."
lang_prompt_yt_option3="Download audio only in wav format."
lang_error_yt_promptfail="You didn't type 1, 2, or 3."
lang_error_insta_trails="Missing a trailing slash in the URL. Fetching the full URL from the 301 redirect page."
lang_error_insta_confused="I couldn't figure out what file type to download (video or image). Exiting."

lang_error_imgur_gallery="The URL for must contain \"/gallery/\"."

lang_error_yt_no_youtubedl="You don't have youtube-dl installed. In order for this script to be able to download YouTube videos, you'll need to install \"youtube-dl\", which can be found here: https://rg3.github.io/youtube-dl/download.html"
lang_error_yt_no_ffmpeg="It looks like you do not have ffmpeg installed, and thus you won't be able to convert to mp3 or wav."

lang_error_download_failed="Looks like the file might not have downloaded. Please check the URL and try again. You can also run this script with the \"-v\" option to see where the script might be failing."
lang_error_download_dir_nonexistent="Download directory does not exist: "
lang_error_404_service="Could not find a function to download from "
lang_error_no_http="The script did not detect a URL that begins with \"http[s]\""
lang_error_DLURL_build="Looks like the HTML format, how the video file is referenced, or the variable \"theFileID\" was improperly calculated for this service. This script may need to be updated. Try running the script with the \"-v\" option."

_xstat=9
theDomain=""
curUser="$(ls -l /dev/console | awk '{print $3}')"
curHome="$(dscl . read /Users/${curUser} NFSHomeDirectory | awk -F ": " '{print $2}')"
# Default to the user's 'Downloads' directory.
downloadDir=""${curHome}"/Downloads"

# # # # # # # # # # # # # # # # #

echoUsage () {
	echo
  echo "usage: $0 [URL] [-d|--dir \"path\"] [-n|--name \"save as file name\"]  [--show] [-v]"
  echo
  echo -e "\tURL\t\t - URL to file you want to download."
  echo -e "\t\t\tThis script is non-officially configured to work with the following websites: ${theServices[@]}"
  echo
  echo -e "\t-d | --dir\t- Download directory. This requires a quoted full or relative file path."
  echo -e "\t\t\te.g. -d \"/Users/MyUsername/Downloads/dlmedia\""
  echo -e "\t\t\tNote: - the \"-d|--dir\" option does not require a trailing slash or escaped spaces (\"\\ \")"
  echo -e "\t\t\t      - the \"-d|--dir\" option must not be given special characters, such as $, #, !, \\, :, or %, etc."
  echo
  echo -e "\t-n | --name\t- What to name the file. If the name includes spaces, you must quote the name with double-quotes."
  echo -e "\t\t\te.g. -n \"Beefoven Pupstep\""
  echo -e "\t\t\tNote: the \"-n|--name\" option must not be given special characters, such as $, #, !, \\, :, or %, etc."
	echo
  echo -e "\t--show\t\t- Show/Reveal the downloaded file in the Finder."
  echo -e "\t\t\tNote: You may experience errors if the file path contains special characters."
  echo
  echo -e "\t-v\t\t- Verbose. This prints extra information when running this script."
  echo
  echo -e "Author note:\tThis script's sole intended use, is to download files which YOU own for backup purposes only."
  echo -e "\t\tIf the usage of this script happens to violate the official \"Terms of Service\" from any of the websites you intend to use it with,"
  echo -e "\t\tthen you must cease from using this script immediately."
  echo
  echo -e "\t\tIf you're having any issues with this script, feel free to contact me on my website zeroonelabs.com"
  echo
  echo -e "\t\t# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #"
  echo -e "\t\tIf you continue to use this script, you agree to use this at YOUR OWN RISK."
  echo -e "\t\t# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #"
  echo
  echo -e "Copyright:\tAll trademarks, registered trademarks, service marks, trade names, trade dress, product names and logos referenced in this document are the property of their respective owners. Any rights not expressly granted herein are reserved."
  echo
  exit
}
echoDie () {
  echo "${1}"
  exit 1
}

if [[ ! -n ${1} ]];then
  echoUsage
elif [[ "${1}" == "-h" ]] || [[ "${1}" == "--help" ]];then
  echoUsage
fi

theMainArg="${1}"
theName=""
http_var=""
isVerbose=false
theFinderShow=false
if [[ -n ${2} ]];then
	shift
	# 'shift' deletes the lowest indicie of the argument array, thus deleting the main
	#+ argument passed to the script, which should be the URL.
	theOpsPos=0
	theOps=( "${@}" )
	for theVar in "${theOps[@]}";do 
	# This is basically my own bash argument parser. Whatever.
		theOpsPos=$((${theOpsPos} + 1))

		if [[ "${theVar}" = "--name" ]] || [[ "${theVar}" = "-n" ]];then
			theName=${theOps[${theOpsPos}]}
		fi

		if [[ "${theVar}" = "--dir" ]] || [[ "${theVar}" = "-d" ]];then
			downloadDir="${theOps[${theOpsPos}]}"
	  	if [[ ! -d "${downloadDir}" ]];then
		  	echoDie "${lang_error_download_dir_nonexistent} ${downloadDir}"
		  fi
		fi
		if [[ ${theVar} = "-v" ]];then
			isVerbose=true
		fi
		if [[ ${theVar} = "--show" ]];then
			theFinderShow=true
		fi
	done
fi

echoVerbose () {
  if [[ ${isVerbose} = true ]];then
    echo "${1}"
  fi
}
if [[ ${theMainArg} =~ ^(http|https):// ]];then
  theDomain="$(echo "${theMainArg}"| awk -F/ '{print $3}')"
  theProto="$(echo "${theMainArg}"| awk -F: '{print $1}')"
else
  echoDie "${lang_error_no_http}"
fi

cdl () {
	echoVerbose "${lang_change_dir} ${downloadDir}"
	cd "${downloadDir}"
}

simple_vars () {
	theFileID="$(echo ${theMainArg}| awk -F"/" '{ print $NF }')"
	echoVerbose "The file ID = ${theFileID}"
}

http_stat () {
  http_var="$(curl -sSI "${theMainArg}" | head -n1 | awk '{print $2}')"
}

sanitize_var () {
   echoVerbose "Sanitization input: \"${1}\""
   stz="${1}" # receive input in first argument
   stz="${stz//[^[:alnum:]]/_}"     # replace all non-alnum characters to _
   echoVerbose "Sanitization step 1: ${stz}"
   stz="${stz//+(_)/_}"             # convert multiple - to single _
   echoVerbose "Sanitization step 2: ${stz}"
   stz="${stz/#_}"                  # remove _ from start
   echoVerbose "Sanitization step 3: ${stz}"
   sanitized="${stz/%_}"          # remove _ from end
   echoVerbose "Sanitization step 4: ${sanitized}"
}

dl_file () {
	theFileEx="${1}"
  if [[ "${theName}" = "" ]];then
    sanitize_var "${theFileID}"
    theSaveFile="${sanitized}.${theFileEx}"
  else
    sanitize_var "${theName}"
  	theSaveFile="${sanitized}.${theFileEx}"
  fi
  if [[ "${theDLURL}" = "" ]];then 
    echoDie "${lang_error_DLURL_build}"
  fi
	echoVerbose "theFileID : ${theFileID}"
	echoVerbose "theSaveFile : ${theSaveFile}"
	echoVerbose "theDLURL : ${theDLURL}"
	echo "Downloading the file ID \""${theFileID}"\" from ${theService} to \"${downloadDir}\" as \"${theSaveFile}.\""
  curl -s "${theDLURL}" -o "${theSaveFile}";_xstat=$?
  if [[ $_xstat = 0 ]];then echo "${lang_success_download}";else echoDie "${lang_error_download_failed}";fi
}

dl_gfycat () {
	http_stat
	simple_vars
	theFileURID="${theFileID}"
	if [[ "${http_var}" = "301" ]];then
    theMainArg="$(curl -sSI "${theMainArg}" |perl -n -e '/^Location: (.*)$/ && print "$1\n"')"
    # Remove the carriage return the perl output gives us.
    theMainArg=${theMainArg%$'\r'}
		theFileURID="$(echo ${theMainArg}| awk -F"/" '{ print $NF }')"
		echoVerbose "theFileURID : ${theFileURID}"
	fi
  theDLURL="$(curl --silent https://gfycat.com/"${theFileURID}"|grep -oiE "<source src=\"\w+://(\w+.)gfycat.com/${theFileID}.mp4" | grep -oiE "\w+://(\w+.)gfycat.com/${theFileID}.mp4")"
	dl_file "mp4"
}

dl_imgur () {
	simple_vars
	if [[ ! "${theMainArg}" =~ "/gallery/" ]];then
	  echoDie "${lang_error_imgur_gallery}"
	fi 
	theDLURL="$(curl --silent https://imgur.com/gallery/${theFileID}|grep -oiE "og:video\"\s+content=\"\w+://(\w+.)imgur.com/${theFileID}.mp4" | grep -oiE "\w+://(\w+.)imgur.com/${theFileID}.mp4")"
	dl_file "mp4"
}

dl_streamable () {
	simple_vars
  theDLURL="$(curl --silent "${theMainArg}"| grep -oiE "og:video\"\scontent=\"\w+://(\w|-)+\.streamable.com(/|\w)+\.mp4\?token=[0-9a-zA-Z]+&amp;expires=[0-9]+" | grep -ioE "\w+://(\w|-)+\.streamable.com(/|\w)+\.mp4\?token=[0-9a-zA-Z]+&amp;expires=[0-9]+" | sed 's/&amp\;/\&/g')"
	dl_file "mp4"
}

dl_nhl () {
	# TODO: Get embedded videos from non-standard NHL pages, i.e. highlight reel pages, etc.
  theDLURL="$(curl --silent "${theMainArg}"| grep -oE "content.*mp4.*>"| grep -oE "http.*\.mp4")"
  vidTitle="$(curl --silent "${theMainArg}"| grep -oE "<title>.*</title>"| sed 's/<[^>]*>//g'| sed 's/ | NHL.com//')"
  sanitize_var "${vidTitle}"
  theFileID="${sanitized}"
  
	echo "Downloading the video \""${theName}"\" from NHL.com"
	curl -s "${theDLURL}" -o ""${vidTitle}".mp4";_xstat=$?
  if [[ $_xstat = 0 ]];then echo "${lang_success_download}";else echoDie "${lang_error_download_failed} [$_xstat]";fi
}

dl_youtube () {
  if [[ ! -f /usr/local/bin/youtube-dl ]];then
    echoDie "${lang_error_yt_no_youtubedl}"
  fi
 	theFileID="$(echo "${theMainArg}"| grep -oE "v=([a-zA-Z0-9_-]{11})"| sed 's/v=//')"
	echo "${lang_prompt_yt_options}"
  echo
  echo -e "\t1) ${lang_prompt_yt_option1}"
  echo -e "\t2) ${lang_prompt_yt_option2}"
  echo -e "\t3) ${lang_prompt_yt_option3}"
  echo
  read -p 'Type 1, 2, or 3: ' theYTops

  if [[ ${theName} = "" ]];then 
    theSaveFile="%(title)s.%(ext)s"
  else
  	if [[ ${theYTops} = 1 ]];then  
  		theSaveFile="${theName}.mp4"
  	elif [[ ${theYTops} = 2 ]];then
  		theSaveFile="${theName}.mp3"
  	elif [[ ${theYTops} = 3 ]];then
  		theSaveFile="${theName}.wav"
  	fi
  fi

  if [[ ${theYTops} = 1 ]];then
  	youtube-dl -o "${theSaveFile}" -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' "${theFileID}";_xstat=$?
  elif [[ ${theYTops} = 2 ]] || [[ ${theYTops} = 3 ]];then
    if ! which ffmpeg >/dev/null;then
      echoDie "${lang_error_yt_no_ffmpeg}"
    fi
    if [[ ${theYTops} = 2 ]];then
      youtube-dl -o "${theSaveFile}" --extract-audio --audio-format mp3 "${theFileID}";_xstat=$?
    fi
    if [[ ${theYTops} = 3 ]];then
      youtube-dl -o "${theSaveFile}" --extract-audio --audio-format wav "${theFileID}";_xstat=$?
    fi
  else
    echoDie "${lang_error_yt_promptfail}"
  fi
  if [[ $_xstat = 0 ]];then
	  echo "${lang_success_download}"
  else 
  	echoDie "${lang_error_download_failed}"
  fi
}

dl_instagram () {
	# If there isn't a trailing slash, get the 301 redirect from Instagrams.
  if [[ ! "${theMainArg}" =~ /$ ]];then
  	echo "${lang_error_insta_trails}"
    theMainArg="$(curl -sSI "${theMainArg}" |perl -n -e '/^Location: (.*)$/ && print "$1\n"')"
    # Remove the carriage return the perl output gives us.
    theMainArg=${theMainArg%$'\r'}
  fi
	theFileID="$(echo "${theMainArg}"|awk -F "/" '{ print $(NF-1) }')"
	theDLType="$(curl --silent "${theMainArg}"| grep -iE "og:type\"\scontent=" | grep -ioE "(instapp:photo|video)")"
  if [[ "${theDLType}" = "video" ]];then
    fileEx="mp4"
		theDLURL="$(curl --silent "${theMainArg}"| grep -oiE "meta property=\"og:video\".*(mp4)\?(_|\w|=|\.|-)+"| grep -oE "http.*")"  
  elif [[ "${theDLType}" = "instapp:photo" ]];then
    fileEx="jpg"
		theDLURL="$(curl --silent "${theMainArg}"| grep -oiE "meta property=\"og:.*(jpg)\?(_|\w|=|\.|-)+"| grep -oE "http.*")"  
  else
    echoVerbose "theDLURL = ${theDLURL}"
    echoDie "${lang_error_insta_confused}"
  fi
	dl_file "${fileEx}"
}

finalArgs () {
	if [[ ${theFinderShow} = true ]];then
	  sleep 2
/usr/bin/osascript \
-e 'on run argv' \
-e 'set thePath to (item 1 of argv & "/" & item 2 of argv as POSIX file)' \
-e 'tell application "Finder"' \
-e 'Activate' \
-e 'select thePath' \
-e 'end tell' \
-e 'end run' \
"${downloadDir}" "${theSaveFile}" > /dev/null 2>&1
	fi
}
cdl
for theService in ${theServices[@]};do
  if [[ ${theMainArg} =~ gfycat ]];then
    dl_gfycat;finalArgs;exit
  fi
  if [[ ${theMainArg} =~ imgur ]];then
    dl_imgur;finalArgs;exit
  fi
  if [[ ${theMainArg} =~ streamable ]];then
    dl_streamable;finalArgs;exit
  fi
  if [[ ${theMainArg} =~ nhl ]];then
    dl_nhl;finalArgs;exit
  fi
  if [[ ${theMainArg} =~ youtube ]];then
    dl_youtube;finalArgs;exit
  fi
  if [[ ${theMainArg} =~ instagram ]];then
    dl_instagram;finalArgs;exit
  fi
    echoDie "${lang_error_404_service} ${theDomain}.";exit
done
exit

  • bash
  • gfycat
  • imgur
  • macos
  • nhl.com
  • osx
  • youtube-dl
  • Leave a Reply Cancel reply

    You must be logged in to post a comment.

    Login with your Social ID

    Categories

    • Articles
    • Bash
    • Mac
    • PackageMaker
    • Python
    • Scripts
    • Terminal
    • Uncategorized
    My LinkedIn
    ©2023 Zero One Labs | Powered by WordPress & Superb Themes