#!/usr/bin/perl # Created On : 2010 # Last Modified On: Fri Apr 16 02:05:56 2010 # Update Count : 151 =encoding utf8 =head1 NAME tongyong2hanyu -- Tongyong Pinyin to Hanyu Pinyin converter =head1 USAGE EXAMPLES $ tongyong2hanyu file jhuci => zhuqi Lioujiao => liujiao =head1 DESCRIPTION Input: Tongyong Pinyin multi-syllable strings and dashes (-). Output: lower case Hanyu Pinyin appended to each line. Empty lines are passed through unaltered. =head1 COPYRIGHT http://www.fsf.org/copyleft/gpl.html =head1 AUTHOR 積丹尼 Dan Jacobson http://jidanni.org/lang/pinyin/ ...whose goal is to help clean up the Taiwan Pinyin mess. =cut use strict; use warnings FATAL => 'all'; while (<>) { if (/^$/) { print; next; } chomp; my $original = $_; $_ = lc; s/-/'/g; s/([wf])ong/$1eng/g; s/wun/wen/; s/jh/zh/g; s/uei/ui/g; s/iou/iu/g; s/(\bn|[ljcs])yu/$1ü/g; s/[jz]ü/ju/g; s/[qc]ü/qu/g; s/[xs]ü/xu/g; s/([zcsjqx])y/$1i/g; s/ihu/i'hu/g; s/ci(?!h)/qi/g; s/si(?!h)/xi/g; s/ih/i/g; s/i'hu/ihu/g; #P.S.: siha si'ha sih'a: sia xi'ha si'a... print "$original => $_\n"; }