这篇文章如果讲解了用C#如何解析http报文,要解析http报文,需要哪些操作呢?下面小编给大家整理相关资料,需要的朋友可以参考下
下面通过一段内容有文字说明有代码分析,并附有展示图供大家学习。
要解析HTTP报文,需要实现以下操作:
读取HTTP报头提供的各种属性
分析属性值,从中获取内容编码和字符集编码
将报头数据和内容进行分离
判断内容是否文本还是二进制,如果是二进制的则不进行处理
如果内容是文本,按报头中提供的内容编码和字符集编码进行解压缩和解码
目前没有找到.Net框架内置的解析方法,理论上HttpClient等类在内部应该已经实现了解析,但不知为何没有公开这些处理方法。(亦或是我没找到)
那么只能自己来解析这些数据了。
我们先来看看这个经过gzip压缩的文本内容的HTTP报文:
这里提供一个老外写的简陋的解析类(已经过修改,原代码中存在一些严重BUG):
- public enum HTTPHeaderField
- {
- Accept = 0,
- Accept_Charset = 1,
- Accept_Encoding = 2,
- Accept_Language = 3,
- Accept_Ranges = 4,
- Authorization = 5,
- Cache_Control = 6,
- Connection = 7,
- Cookie = 8,
- Content_Length = 9,
- Content_Type = 10,
- Date = 11,
- Expect = 12,
- From = 13,
- Host = 14,
- If_Match = 15,
- If_Modified_Since = 16,
- If_None_Match = 17,
- If_Range = 18,
- If_Unmodified_Since = 19,
- Max_Forwards = 20,
- Pragma = 21,
- Proxy_Authorization = 22,
- Range = 23,
- Referer = 24,
- TE = 25,
- Upgrade = 26,
- User_Agent = 27,
- Via = 28,
- Warn = 29,
- Age = 30,
- Allow = 31,
- Content_Encoding = 32,
- Content_Language = 33,
- Content_Location = 34,
- Content_Disposition = 35,
- Content_MD5 = 36,
- Content_Range = 37,
- ETag = 38,
- Expires = 39,
- Last_Modified = 40,
- Location = 41,
- Proxy_Authenticate = 42,
- Refresh = 43,
- Retry_After = 44,
- Server = 45,
- Set_Cookie = 46,
- Trailer = 47,
- Transfer_Encoding = 48,
- Vary = 49,
- Warning = 50,
- WWW_Authenticate = 51
- };
- class HTTPHeader
- {
- #region PROPERTIES
- private string[] m_StrHTTPField = new string[52];
- private byte[] m_byteData = new byte[4096];
- public string[] HTTPField
- {
- get { return m_StrHTTPField; }
- set { m_StrHTTPField = value; }
- }
- public byte[] Data
- {
- get { return m_byteData; }
- set { m_byteData = value; }
- }
- #endregion
- // convertion
- System.Text.ASCIIEncoding encoding = new System.Text.ASCIIEncoding();
- #region CONSTRUCTEUR
- /// <summary>
- /// Constructeur par défaut - non utilisé
- /// </summary>
- private HTTPHeader()
- { }
- public HTTPHeader(byte[] ByteHTTPRequest)
- {
- string HTTPRequest = encoding.GetString(ByteHTTPRequest);
- try
- {
- int IndexHeaderEnd;
- string Header;
- // Si la taille de requête est supérieur ou égale à 1460, alors toutes la chaine est l'entête http
- if (HTTPRequest.Length <= 1460)
- Header = HTTPRequest;
- else
- {
- IndexHeaderEnd = HTTPRequest.IndexOf("/r/n/r/n");
- Header = HTTPRequest.Substring(0, IndexHeaderEnd);
- Data = ByteHTTPRequest.Skip(IndexHeaderEnd + 4).ToArray();
- }
- HTTPHeaderParse(Header);
- }
- catch (Exception)
- { }
- }
- #endregion
- #region METHODES
- private void HTTPHeaderParse(string Header)
- {
- #region HTTP HEADER REQUEST & RESPONSE
- HTTPHeaderField HHField;
- string HTTPfield, buffer;
- int Index;
- foreach (int IndexHTTPfield in Enum.GetValues(typeof(HTTPHeaderField)))
- {
- HHField = (HTTPHeaderField)IndexHTTPfield;
- HTTPfield = "/n" + HHField.ToString().Replace('_', '-') + ": "; //Ajout de /n devant pour éviter les doublons entre cookie et set_cookie
- // Si le champ n'est pas présent dans la requête, on passe au champ suivant
- Index = Header.IndexOf(HTTPfield);
- if (Index == -1)
- continue;
- buffer = Header.Substring(Index + HTTPfield.Length);
- Index = buffer.IndexOf("/r/n");
- if (Index == -1)
- m_StrHTTPField[IndexHTTPfield] = buffer.Trim();
- else
- m_StrHTTPField[IndexHTTPfield] = buffer.Substring(0, Index).Trim();
- //Console.WriteLine("Index = " + IndexHTTPfield + " | champ = " + HTTPfield.Substring(1) + " " + m_StrHTTPField[IndexHTTPfield]);
- }
- // Affichage de tout les champs
- /*for (int j = 0; j < m_StrHTTPField.Length; j++)
- {
- HHField = (HTTPHeaderField)j;
- Console.WriteLine("m_StrHTTPField[" + j + "]; " + HHField + " = " + m_StrHTTPField[j]);
- }
- */
- #endregion
- }
- #endregion
- }
编写以下代码以实现解析文件:
- class Program
- {
- static void Main(string[] args)
- {
- SRART: Console.WriteLine("输入待解析的HTTP报文数据文件完整路径:");
- var filename = Console.ReadLine();
- try
- {
- FileStream fs = new FileStream(filename, FileMode.Open);
- BinaryReader br = new BinaryReader(fs);
- var data = br.ReadBytes((int)fs.Length);
- var header = new HTTPHeader(data);
- var x = 0;
- foreach (var f in header.HTTPField)
- {
- if (!String.IsNullOrEmpty(f))
- {
- Console.WriteLine($"[{x:00}] - {(HTTPHeaderField) x} : {f}");
- }
- x++;
- }
- Console.WriteLine($"总数据尺寸{fs.Length}字节,实际数据尺寸{header.Data.Length}字节");
- Console.WriteLine(Encoding.UTF8.GetString(header.Data));
- Console.WriteLine();
- br.Close();
- fs.Close();
- }
- catch (Exception e)
- {
- Console.WriteLine(e);
- }
- goto SRART;
- }
- }
这里还未实现gzip解压缩和字符解码,直接用UTF8解码输出的。(需要时再写吧,都是体力活儿~)
效果图展示:
下面的图是没有经过gzip压缩过的数据。
以上就是用C#如何解析http报文的全部内容,哪位大侠还有好的方法欢迎提出宝贵意见,喜欢大家喜欢以上内容所述。
新闻热点
疑难解答