bookmarks/top_words.sh

39 lines
906 B
Bash
Raw Normal View History

2018-09-13 18:58:56 +00:00
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
2018-09-13 19:14:23 +00:00
fetch_top_words() {
local url
local basedir
2019-07-22 12:11:00 +00:00
2018-09-13 19:14:23 +00:00
url="${1:-}"
basedir=$(dirname "$0")
2019-07-22 12:11:00 +00:00
if [[ $(uname -s) == "Darwin" ]]; then
gnubin_dir="/usr/local/opt/grep/libexec/gnubin"
if test -f "$gnubin_dir"; then
brew install grep
fi
export PATH="$gnubin_dir:$PATH"
fi
2018-09-13 18:58:56 +00:00
2018-09-13 19:14:23 +00:00
if [[ "${url:-}" = "" ]]; then
2019-07-22 12:11:00 +00:00
\grep --only-matching --extended-regexp '[a-zA-Z]{3,}' "$basedir/README.md" \
2018-09-13 19:14:23 +00:00
| tr '[:upper:]' '[:lower:]' \
2019-07-22 12:11:00 +00:00
| \grep --invert-match --word-regexp --fixed-strings --file=stopwords.txt \
2018-09-13 19:14:23 +00:00
| sort \
2019-07-22 12:11:00 +00:00
| uniq -c \
2018-09-13 19:14:23 +00:00
| sort -n
else
curl "$url" \
2019-07-22 12:11:00 +00:00
| \grep --only-matching --extended-regexp '[a-zA-Z]{3,}' \
2018-09-13 19:14:23 +00:00
| tr '[:upper:]' '[:lower:]' \
2019-07-22 12:11:00 +00:00
| \grep --invert-match --word-regexp --fixed-strings --file=stopwords.txt \
2018-09-13 19:14:23 +00:00
| sort \
2019-07-22 12:11:00 +00:00
| uniq -c \
2018-09-13 19:14:23 +00:00
| sort -n
fi
}
fetch_top_words "$@"