今天看到xuejun的一个取汉字首字符的函数,试用了一下,感觉很好用,不敢独享,拿出来跟大家分享:
$pbexportheader$uf_getfirstletter.srf
$pbexportcomments$ 返回给定汉字串的首字母串, xuejun , 19990821
global type uf_getfirstletter from function_object
end type
forward prototypes
global function string uf_getfirstletter (string as_inputstring)
end prototypes
global function string uf_getfirstletter (string as_inputstring);//function name : uf_getfirstletter
//used to : 返回给定汉字串的首字母串,即声母串
//input arguments: as_inputstring - string , 给定的汉字串
//return value : ls_returnstring - string , 给定的汉字串的声母串,一律为小写
//notice : 1. 此方法基于汉字的国标汉字库区位编码的有效性,不符合此编码的
// 系统此函数无效!
// 2. 若汉字串含有非汉字字符,如图形符号或ascii码,则这些非汉字字符
// 将保持不变.
//sample : ls_rtn = uf_getfirstletter("中华人民共和国")
// ls_rtn will be : zhrmghg
//scripts:
char lc_firstletter[23] //存放国标一级汉字不同读音的起始区位码对应读音
string ls_ch //临时单元
string ls_secondsectable //存放所有国标二级汉字读音
string ls_returnstr //返回串
integer li_secposvalue[23] //存放国标一级汉字不同读音的起始区位码
integer i , j
integer li_sectorcode //汉字区码
integer li_positioncode //汉字位码
integer li_secposcode //汉字区位码
integer li_offset //二级字库偏移量
//set initial value
li_secposvalue[]={1601,1637,1833,2078,2274,2302,2433,2594,2787,3106,3212,3472,3635,3722,3730,3858,4027,4086,4390,4558,4684,4925,5249 }
lc_firstletter[] = {"a", "b","c","d","e","f","g","h","j","k","l","m","n","o","p","q","r","s","t","w","x","y","z"}
ls_secondsectable="cjwgnspgcgne[y[btyyzdxykygt[jnnjqmbsgzscyjsyy[pgkbzgy[ywjkgkljywkpjqhy[w[dzlsgmrypywwcckznkyygttnjjnykkzytcjnmcylqlypyqfqrpzslwbtgkjfyxjwzltbncxjjjjtxdttsqzycdxxhgck[phffss[ybgxlppbyll[hlxs[zm[jhsojnghdzqyklgjhsgqzhxqgkezzwyscscjxyeyxadzpmdssmzjzqjyzc[j[wqjbyzpxgznzcpwhkxhqkmwfbpbydtjzzkqhylygxfptyjyyzpszlfchmqshgmxxsxj[[dcsbbqbefsjyhxwgzkpylqbgldlcctnmayddkssngycsgxlyzaybnptsdkdylhgymylcxpy[jndqjwxqxfyyfjlejpzrxccqwqqsbnkymgplbmjrqcflnymyqmsqyrbcjthztqfrxqhxmjjcjlxqgjmshzkbswyemyltxfsydswlycjqxsjnqbsctyhbftdcyzdjwyghqfrxwckqkxebptlpxjzsrmebwhjlbjslyysmdxlclqkxlhxjrzjmfqhxhwywsbhtrxxglhqhfnm[ykldyxzpylgg[mtcfpajjzyljtyanjgbjplqgdzyqyaxbkysecjsznslyzhsxlzcghpxzhznytdsbcjkdlzayfmydlebbgqyzkxgldndnyskjshdlyxbcghxypkdjmmzngmmclgwzszxzjfznmlzzthcsydbdllscddnlkjykjsycjlkwhqasdknhcsganhdaashtcplcpqybsdmpjlpzjoqlcdhjjysprchn[nnlhlyyqyhwzptczgwwmzffjqqqqyxaclbhkdjxdgmmydjxzllsygxgkjrywzwyclzmssjzldbyd[fcxyhlxchyzjq[[qagmnyxpfrkssbjlyxysyglnscmhzwwmnzjjlxxhchsy[[ttxrycyxbyhcsmxjsznpwgpxxtaybgajcxly[dccwzocwkccsbnhcpdyznfcyytyckxkybsqkkytqqxfcwchcykelzqbsqyjqcclmthsywhmktlkjlycxwheqqhtqh[pq[qscfymndmgbwhwlgsllysdlmlxpthmjhwljzyhzjxhtxjlhxrswlwzjcbxmhzqxsdzpmgfcsglsxymjshxpjxwmyqksmyplrthbxftpmhyxlchlhlzylxgsssstclsldclrpbhzhxyyfhb[gdmycnqqwlqhjj[ywjzyejjdhpblqxtqkwhlchqxagtlxljxmsl[htzkzjecxjcjnmfby[sfywybjzgnysdzsqyrsljpclpwxsdwejbjcbcnaytwgmpapclyqpclzxsbnmsggfnzjjbzsfzyndxhplqkzczwalsbccjx[yzgwkypsgxfzfcdkhjgxdlqfsgdslqwzkxtmhsbgzmjzrglyjbpmlmsxlzjqqhzyjczydjwbmyklddpmjegxyhylxhlqyqhkycwcjmyyxnatjhyccxzpcqlbzwwytwbqcmlpmyrjcccxfpznzzljplxxyztzlgdldcklyrzzgqtgjhhgjljaxfgfjzslcfdqzlclgjdjcsnzlljpjqdcclcjxmyzftsxgcgsbrzxjqqctzhgyqtjqqlzxjylylbcyamcstylpdjbyregklzyzhlyszqlznwczcllwjqjjjkdgjzolbbzppglghtgzxyghzmycnqsycyhbhgxkamtxyxnbskyzzgjzlqjdfcjxdygjqjjpmgwgjjjpkqsbgbmmcjssclpqpdxcdyyky[cjddyygywrhjrtgznyqldkljszzgzqzjgdykshpzmtlcpwnjafyzdjcnmwescyglbtzcgmssllyxqsxsbsjsbbsgghfjlypmzjnlyywdqshzxtyywhmzyhywdbxbtlmsyyyfsxjc[dxxlhjhf[sxzqhfzmzcztqcxzxrttdjhnnyzqqmnqdmmg[ydxmjgdhcdyzbffallztdltfxmxqzdngwqdbdczjdxbzgsqqddjcmbkzffxmkdmdsyyszcmljdsynsbrskmkmpcklgdbqtfzswtfgglyplljzhgj[gypzltcsmcnbtjbqfkthbyzgkpbbymtdssxtbnpdkleycjnyddykzddhqhsdzsctarlltkzlgecllkjlqjaqnbdkkghpjtzqksecshalqfmmgjnlyjbbtmlyzxdcjpldlpcqdhzycbzsczbzmsljflkrzjsnfrgjhxpdhyjybzgdlqcsezgxlblgyxtwmabchecmwyjyzlljjyhlg[djlslygkdzpzxjyyzlwcxszfgwyydlyhcljscmbjhblyzlycblydpdqysxqzbytdkyxjy[cnrjmpdjgklcljbctbjddbblblczqrppxjcjlzcshltoljnmdddlngkaqhqhjgykheznmshrp[qqjchgmfprxhjgdychghlyrzqlcyqjnzsqtkqjymszswlcfqqqxyfggyptqwlmcrnfkkfsyylqbmqammmyxctpshcptxxzzsmphpshmclmldqfyqxszyydyjzzhqpdszglstjbckbxyqzjsgpsxqzqzrqtbdkyxzkhhgflbcsmdldgdzdblzyycxnncsybzbfglzzxswmsccmqnjqsbdqsjtxxmbltxzclzshzcxrqjgjylxzfjphymzqqydfqjjlzznzjcdgzygctxmzysctlkphtxhtlbjxjlxscdqxcbbtjfqzfsltjbtkqbxxjjljchczdbzjdczjdcprnpqcjpfczlclzxzdmxmphjsgzgszzqlylwtjpfsyasmcjbtzkycwmytcsjjljcqlwzmalbxyfbpnlsfhtgjwejjxxglljstgshjqlzfkcgnnnszfdeqfhbsaqtgylbxmmygszldydqmjjrgbjtkgdhgkblqkbdmbylxwcxyttybkmrtjzxqjbhlmhmjjzmqasldcyxyqdlqcafywyxqhz"
//get it !
ls_returnstr = ""
for i=1 to len(as_inputstring) //依次处理as_inputstring中每个字符
ls_ch=mid(as_inputstring , i , 1)
if asc(ls_ch)<128 then // 非汉字
ls_returnstr = ls_returnstr+ls_ch // 不变
else // 是汉字
ls_ch = mid(as_inputstring , i , 2) // 取出此汉字
li_sectorcode = asc(left(ls_ch, 1)) - 160 //区码
li_positioncode = asc(right(ls_ch, 1)) - 160 //位码
li_secposcode = li_sectorcode*100 + li_positioncode // 区位码
if li_secposcode>1600 and li_secposcode<5590 then // 第一个字符
for j=23 to 1 step -1 // 找声母
if li_secposcode>=li_secposvalue[j] then
ls_returnstr = ls_returnstr + lc_firstletter[j]
exit
end if
next
else // 第一个字符
li_offset = (li_sectorcode - 56 ) *94 + li_positioncode - 1 // 计算偏移量
if li_offset>=0 and li_offset<=3007 then //二区汉字
ls_returnstr = ls_returnstr + mid(ls_secondsectable, li_offset , 1) //取出此字声母
end if
end if
i = i+1 // 指向下一个汉字
end if
next // 处理完毕
//return result
return lower( ls_returnstr ) //返回 as_inputstring 的声母串
end function