# we first have to download, compile & install the PCRE library, cf. http://www.pcre.org/pcre.txt # requirement: Xcode, http://developer.apple.com/tools/xcode/index.html cd ~/Desktop /usr/bin/curl -L -O http://downloads.sourceforge.net/pcre/pcre-7.7.tar.gz /usr/bin/tar -xzf pcre-7.7.tar.gz cd pcre-7.7 ./configure --help ./configure --prefix=/usr/local --enable-utf8 --enable-unicode-properties # for Intel Macs, see http://hivelogic.com/articles/2005/12/ruby_rails_lighttpd_mysql_tiger #./configure --prefix=/usr/local --enable-utf8 --enable-unicode-properties CFLAGS=-O1 /usr/bin/make /usr/bin/sudo /usr/bin/make install ls -l /usr/local/bin/pcregrep stat -x /usr/local/bin/pcregrep pcregrep --version pcregrep --help pcregrep --help | pcregrep -i 'utf-?8' pcregrep --help | pcregrep -i multiline man pcregrep man pcrepattern man pcretest man perlretut man pcregrep | less -p utf-8 man pcregrep | less -p multiline man perlretut | less -p 'single line and multi' open /usr/local/share/doc/pcre/html/pcregrep.html # check if character set encoding of Terminal.app is set to UTF-8 if [[ "$(/usr/bin/defaults read com.apple.Terminal StringEncoding)" != "4" ]]; then echo 'Terminal.app does not use UTF-8 character set encoding!' exit 1 fi utf8str=$'caf\303\251' printf $utf8str | /usr/bin/egrep -o '.' printf $utf8str | /usr/local/bin/pcregrep -o '.' printf $utf8str | /usr/local/bin/pcregrep -ou '.' # UTF-8 aware printf $utf8str | /usr/local/bin/pcregrep -ou 'f.$' printf $utf8str | /usr/bin/egrep -o '.' | wc -l printf $utf8str | /usr/local/bin/pcregrep -o '.' | wc -l printf $utf8str | /usr/local/bin/pcregrep -ou '.' | wc -l # UTF-8 aware #--------------------------------------------- # cf. also The Heirloom Project, http://heirloom.sourceforge.net # download & install from http://homepage.mac.com/stefan.tramm/iWiki/HeirloomNotes.html # backup ~/.bash_login with time stamp in filename /bin/cp -ip "${HOME}/.bash_login"{,".orig-$(/bin/date +%Y-%m-%d-%H.%M.%S)"} # To use the Heirloom tools insert the following statements into your ~/.profile or ~/.bash_login: /bin/cat >> "${HOME}/.bash_login" <<-'EOF' # Heirloom userland # http://homepage.mac.com/stefan.tramm/iWiki/HeirloomNotes.html HEIRLOOM=/opt/heirloom if [[ -d $HEIRLOOM ]]; then export HEIRLOOM PATH=$PATH:$HEIRLOOM/5bin else unset HEIRLOOM fi EOF # make Heirloom source the ~/.bash_login shell functions /bin/ls -lo /opt/heirloom/etc/5.rc /usr/bin/sudo /bin/cp -ip /opt/heirloom/etc/5.rc{,.orig} # backup /usr/bin/sudo /bin/chmod 766 /opt/heirloom/etc/5.rc # ~/.bash_login should "source ~/.bashrc" and "bind -f ~/.inputrc" /usr/bin/sudo echo 'source ~/.bash_login' >> /opt/heirloom/etc/5.rc /usr/bin/sudo /usr/sbin/chown root:admin /opt/heirloom/etc/5.rc /usr/bin/sudo /bin/chmod 644 /opt/heirloom/etc/5.rc /bin/ls -lo /opt/heirloom/etc/5.rc # delete the last (added) line #/usr/bin/sudo /usr/bin/sed -i '' '$,$d' /opt/heirloom/etc/5.rc /usr/bin/open -e /opt/heirloom/etc/5.rc source ~/.bash_login /usr/bin/open -e /opt/heirloom/README /usr/bin/open /opt/heirloom/{,{etc/,5bin/}} # open three directories in one go /usr/bin/open /opt/heirloom/doc/{,doctools} /usr/bin/open /opt/heirloom/doc/{,doctools/{,quickstart.pdf}} /usr/bin/open /opt/heirloom/doc/doctools/quickstart.pdf # explore Heirloom troff 5 man intro | less -p 'Multibyte character encodings' 5 man sh | less 5 man tsort | less 5 whodo 5 man pgrep pgrep sh pgrep bash man bfs | less # bfs - big file scanner /usr/bin/stat -x $HEIRLOOM/5bin/5 /usr/bin/stat -f '%N: %HT%SY' $HEIRLOOM/5bin/5 /usr/bin/stat -f $'%N: \e[1m%HT%SY\e[m' /opt/heirloom/bin/tsort /usr/bin/stat -f $'%N: \e[1;31m%HT\e[m%SY' /opt/heirloom/5bin/awk /usr/bin/open http://heirloom.sourceforge.net/man/grep.1.html 5 man grep | less 5 utf8str=$'caf\303\251' echo $utf8str printf "${utf8str}\n" | /usr/local/bin/pcregrep -u 'f.$' printf "${utf8str}\n" | /opt/heirloom/5bin/grep 'f.$' printf "${utf8str}\n" | /opt/heirloom/5bin/posix/grep -E -e 'f.$'
Never been to CodeSnippets before?
Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world (or not, you can keep them private!)
pcregrep - UTF-8 aware grep replacement (See related posts)
You need to create an account or log in to post comments to this site.
Related Posts
» Login window from the comman... in password shell osx mac bash unix window login utf8 UTF-8 pashua dialog
» Counting lines in awk grep shell osx mac bash line unix count file sed directory lines wc
» opening in a textmate projec... in grep regex shell osx mac unix textmate
» ws - web search from the com... in shell osx mac search bash unix commandline web
» snippet in shell osx mac html bash unix commandline web download convert textutil ed snippet
» Switching firewall settings ... in shell osx mac bash unix ipfw firewall ruleset switch
Snippets (source code soon to be available) developed by Peter Cooper and powered by Ruby On Rails