Never been to CodeSnippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world (or not, you can keep them private!)

1 total

pcregrep - UTF-8 aware grep replacement

# we first have to download, compile & install the PCRE library, cf. http://www.pcre.org/pcre.txt
# requirement: Xcode, http://developer.apple.com/tools/xcode/index.html

cd ~/Desktop
/usr/bin/curl -L -O http://downloads.sourceforge.net/pcre/pcre-7.7.tar.gz
/usr/bin/tar -xzf pcre-7.7.tar.gz
cd pcre-7.7
./configure --help
./configure --prefix=/usr/local --enable-utf8 --enable-unicode-properties
# for Intel Macs, see http://hivelogic.com/articles/2005/12/ruby_rails_lighttpd_mysql_tiger
#./configure --prefix=/usr/local --enable-utf8 --enable-unicode-properties CFLAGS=-O1
/usr/bin/make
/usr/bin/sudo /usr/bin/make install 


ls -l /usr/local/bin/pcregrep
stat -x /usr/local/bin/pcregrep

pcregrep --version
pcregrep --help
pcregrep --help | pcregrep -i 'utf-?8'
pcregrep --help | pcregrep -i multiline

man pcregrep
man pcrepattern
man pcretest
man perlretut

man pcregrep | less -p utf-8
man pcregrep | less -p multiline
man perlretut | less -p 'single line and multi'

open /usr/local/share/doc/pcre/html/pcregrep.html


# check if character set encoding of Terminal.app is set to UTF-8
if [[ "$(/usr/bin/defaults read com.apple.Terminal StringEncoding)" != "4" ]]; then 
   echo 'Terminal.app does not use UTF-8 character set encoding!'
   exit 1
fi


utf8str=$'caf\303\251'

printf $utf8str | /usr/bin/egrep -o '.'
printf $utf8str | /usr/local/bin/pcregrep -o '.'
printf $utf8str | /usr/local/bin/pcregrep -ou '.'     # UTF-8 aware
printf $utf8str | /usr/local/bin/pcregrep -ou 'f.$'

printf $utf8str | /usr/bin/egrep -o '.' | wc -l
printf $utf8str | /usr/local/bin/pcregrep -o '.' | wc -l
printf $utf8str | /usr/local/bin/pcregrep -ou '.' | wc -l     # UTF-8 aware


#---------------------------------------------


# cf. also The Heirloom Project, http://heirloom.sourceforge.net
# download & install from http://homepage.mac.com/stefan.tramm/iWiki/HeirloomNotes.html

# backup ~/.bash_login with time stamp in filename
/bin/cp -ip "${HOME}/.bash_login"{,".orig-$(/bin/date +%Y-%m-%d-%H.%M.%S)"}


# To use the Heirloom tools insert the following statements into your ~/.profile or ~/.bash_login:

/bin/cat >> "${HOME}/.bash_login" <<-'EOF'

# Heirloom userland
# http://homepage.mac.com/stefan.tramm/iWiki/HeirloomNotes.html

HEIRLOOM=/opt/heirloom
if [[ -d $HEIRLOOM ]]; then
  export HEIRLOOM
  PATH=$PATH:$HEIRLOOM/5bin
else
  unset HEIRLOOM
fi

EOF


# make Heirloom source the ~/.bash_login shell functions
/bin/ls -lo /opt/heirloom/etc/5.rc
/usr/bin/sudo /bin/cp -ip /opt/heirloom/etc/5.rc{,.orig}   # backup
/usr/bin/sudo /bin/chmod 766 /opt/heirloom/etc/5.rc

# ~/.bash_login should "source ~/.bashrc" and "bind -f ~/.inputrc"
/usr/bin/sudo echo 'source ~/.bash_login' >> /opt/heirloom/etc/5.rc

/usr/bin/sudo /usr/sbin/chown root:admin /opt/heirloom/etc/5.rc
/usr/bin/sudo /bin/chmod 644 /opt/heirloom/etc/5.rc
/bin/ls -lo /opt/heirloom/etc/5.rc

# delete the last (added) line
#/usr/bin/sudo /usr/bin/sed -i '' '$,$d' /opt/heirloom/etc/5.rc

/usr/bin/open -e /opt/heirloom/etc/5.rc


source ~/.bash_login


/usr/bin/open -e /opt/heirloom/README
/usr/bin/open /opt/heirloom/{,{etc/,5bin/}}      # open three directories in one go
/usr/bin/open /opt/heirloom/doc/{,doctools}
/usr/bin/open /opt/heirloom/doc/{,doctools/{,quickstart.pdf}}  
/usr/bin/open /opt/heirloom/doc/doctools/quickstart.pdf       # explore Heirloom troff

5 man intro | less -p 'Multibyte character encodings'
5 man sh | less
5 man tsort | less
5 whodo
5
man pgrep
pgrep sh
pgrep bash
man bfs | less  # bfs - big file scanner


/usr/bin/stat -x $HEIRLOOM/5bin/5
/usr/bin/stat -f '%N:  %HT%SY' $HEIRLOOM/5bin/5
/usr/bin/stat -f $'%N:  \e[1m%HT%SY\e[m' /opt/heirloom/bin/tsort
/usr/bin/stat -f $'%N:  \e[1;31m%HT\e[m%SY' /opt/heirloom/5bin/awk


/usr/bin/open http://heirloom.sourceforge.net/man/grep.1.html
5 man grep | less


5

utf8str=$'caf\303\251'
echo $utf8str

printf "${utf8str}\n" | /usr/local/bin/pcregrep -u 'f.$'
printf "${utf8str}\n" | /opt/heirloom/5bin/grep 'f.$'
printf "${utf8str}\n" | /opt/heirloom/5bin/posix/grep -E -e 'f.$'
1 total