def is_chinese(uchar): """判断一个unicode是否是汉字""" if uchar >= u'/u4e00' and uchar <= u'/u9fa5': return True else: return Falsedef is_number(uchar): """判断一个unicode是否是数字""" if uchar >= u'/u0030' and uchar <= u'/u0039': return True else: return Falsedef is_alphabet(uchar): """判断一个unicode是否是英文字母""" if (uchar >= u'/u0041' and uchar <= u'/u005a') or (uchar >= u'/u0061' and uchar <= u'/u007a'): return True else: return False
def format_str(content): content = unicode(content,'utf-8') content_str = '' for i in content: if is_chinese(i): content_str = content_str+i return content_str