private void findnoutffile(string path)
{
system.io.streamreader reader = null;
stringbuilder sb;
stringbuilder sb2;
directoryinfo folder = new system.io.directoryinfo(path);
directoryinfo[] subfolders = folder.getdirectories();
for (int i=0;i<subfolders.length;i++)
{
findnoutffile(subfolders[i].fullname);
}
fileinfo[] subfiles = folder.getfiles();
for(int j=0;j<subfiles.length ;j++)
{
if(checkfiletype(subfiles[j].extension.tolower()))
{
filestream fs = new filestream(subfiles[j].fullname , filemode.open,fileaccess.read);
sb = new stringbuilder();
sb2 = new stringbuilder();
bool butf8 =isutf8(fs);
fs.close();
if (!butf8)
{
reader = new system.io.streamreader(subfiles[j].fullname,system.text.encoding.utf8);
sb2.append(reader.readtoend());
reader.close();
reader = new system.io.streamreader(subfiles[j].fullname, system.text.encoding.default,true);
sb.append(reader.readtoend());
reader.close();
}
}
}
}
//0000 0000-0000 007f - 0xxxxxxx (ascii converts to 1 octet!)
//0000 0080-0000 07ff - 110xxxxx 10xxxxxx ( 2 octet format)
//0000 0800-0000 ffff - 1110xxxx 10xxxxxx 10xxxxxx (3 octet format)
private static bool isutf8(filestream sbinputstream)
{
int i;
byte coctets; // octets to go in this utf-8 encoded character
byte chr;
bool ballascii= true;
long ilen = sbinputstream.length;
coctets= 0;
for( i=0; i < ilen; i++ )
{
chr = (byte)sbinputstream.readbyte();
if( (chr & 0x80) != 0 ) ballascii= false;
if( coctets == 0 )
{
if( chr >= 0x80 )
{
do
{
chr <<= 1;
coctets++;
}
while( (chr & 0x80) != 0 );
coctets--;
if( coctets == 0 ) return false;
}
}
else
{
if( (chr & 0xc0) != 0x80 )
{
return false;
}
coctets--;
}
}
if( coctets > 0 )
{
return false;
}
if( ballascii )
{
return false;
}
return true;
}
}
}
新闻热点
疑难解答