Source code for aptdetector.network.parser.httpparser

"""http_parser implementation

couple of functions that was required to parse and reassemble
http requests and responses
"""
from collections import defaultdict

from pcapparser import utils
from pcapparser.constant import HttpType, Compress
from pcapparser.reader import DataReader


[docs]class HttpRequestHeader(object): """mimic the header of a request packet""" def __init__(self): """initialize :class:`HttpRequestHeader`""" self.content_len = 0 self.method = b'' self.host = b'' self.uri = b'' self.transfer_encoding = b'' self.content_encoding = b'' self.content_type = b'' self.compress = Compress.IDENTITY self.chunked = False self.expect = b'' self.protocol = b'' self.raw_data = None
[docs]class HttpResponseHeader(object): """mimic the header of a response packet""" def __init__(self): """initialize :class:`HttpResponseHeader`""" self.content_len = 0 self.status_line = None self.status_code = None self.transfer_encoding = b'' self.content_encoding = b'' self.content_type = b'' self.compress = Compress.IDENTITY self.chunked = False self.connection_close = False self.raw_data = None
[docs]class RequestMessage(object): """used to pass data between requests""" def __init__(self): """initialize :class:`RequestMessage`""" self.expect_header = None self.filtered = False
[docs]class HttpParser(object): """parse http req & resp""" def __init__(self): """initialize :class:`HttpParser`""" self.cur_type = None self.inited = False self.is_http = False self.worker = None self.cur_data = None self.message = RequestMessage()
[docs] def send(self, http_type, data): """append http request or reponses if the connection is not initiated yet then start the connection and if it's still the current connection then append the packet Args: http_type (HttpType): type of packet data (str): data of unparsed packet Returns: None """ pkt = None if not self.inited: self._init(http_type, data) self.inited = True if not self.is_http: return None # still current http request/response if self.cur_type == http_type: self.cur_data.append(data) return None if self.cur_data is not None: reader = DataReader(self.cur_data) if self.cur_type == HttpType.REQUEST: pkt = self.read_request(reader, self.message) elif self.cur_type == HttpType.RESPONSE: self.read_response(reader, self.message) self.cur_type = http_type # new http request/response self.cur_data = [] self.cur_data.append(data) if pkt is not None: return pkt else: return None
def _init(self, http_type, data): """check if request is http Args: http_type (HttpType): :class:`HttpType.REQUEST` or :class:`HttpType.RESPONSE` data (str): data of unparsed packet """ if not utils.is_request(data) or http_type != HttpType.REQUEST: # not a http request self.is_http = False else: self.is_http = True
[docs] def finish(self): """if still have unprocessed data then process them""" if self.cur_data: reader = DataReader(self.cur_data) if self.cur_type == HttpType.REQUEST: pkt = self.read_request(reader, self.message) if pkt is not None: return pkt elif self.cur_type == HttpType.RESPONSE: self.read_response(reader, self.message) return None
[docs] def read_headers(self, reader, lines): """read headers Args: reader (DataReader): type of reader that is required lines (list): list of lines that we think is part of the header section Returns: `dict` of parsed_header """ header_dict = defaultdict(str) while True: line = reader.read_line() if line is None: break line = line.strip() if not line: break lines.append(line) key, value = utils.parse_http_header(line) if key is None: # incorrect headers. continue header_dict[key.lower()] = value return header_dict
[docs] def read_http_req_header(self, reader): """read & parse request http headers Args: reader (DataReader): type of reader that is required Returns: `None` if not a response or any error happend , `dict` of parsed_header otherwise """ line = reader.read_line() if line is None: return None line = line.strip() if not utils.is_request(line): return None req_header = HttpRequestHeader() items = line.split(b' ') if len(items) == 3: req_header.method = items[0] req_header.uri = items[1] req_header.protocol = items[2] lines = [line] header_dict = self.read_headers(reader, lines) if b"content-length" in header_dict: req_header.content_len = int(header_dict[b"content-length"]) if b"transfer-encoding" in header_dict and b'chunked' in header_dict[ b"transfer-encoding"]: req_header.chunked = True req_header.content_type = header_dict[b'content-type'] req_header.compress = utils.get_compress_type(header_dict[ b"content-encoding"]) req_header.host = header_dict[b"host"] if b'expect' in header_dict: req_header.expect = header_dict[b'expect'] req_header.raw_data = b'\n'.join(lines) return req_header
[docs] def read_http_resp_header(self, reader): """read & parse response http headers Args: reader (DataReader): type of reader that is required Returns: `None` if not a response or any error happend , `dict` of parsed_header otherwise """ line = reader.read_line() if line is None: return line line = line.strip() if not utils.is_response(line): return None resp_header = HttpResponseHeader() resp_header.status_line = line try: resp_header.status_code = int(line.split(' ')[1]) except Exception: pass lines = [line] header_dict = self.read_headers(reader, lines) if b"content-length" in header_dict: resp_header.content_len = int(header_dict[b"content-length"]) if b"transfer-encoding" in header_dict and b'chunked' in header_dict[ b"transfer-encoding"]: resp_header.chunked = True resp_header.content_type = header_dict[b'content-type'] resp_header.compress == utils.get_compress_type(header_dict[ b"content-encoding"]) resp_header.connection_close = (header_dict[b'connection'] == b'close') resp_header.raw_data = b'\n'.join(lines) return resp_header
[docs] def read_chunked_body(self, reader, skip=False): """read chunked body Args: reader (DataReader): type of reader that is required skip (bool): either return if error happened or continue to work Returns: if skip=False return None else return broken packet """ result = [] # read a chunk per loop while True: # read chunk size line cline = reader.read_line() if cline is None: # error occurred. if not skip: return b''.join(result) else: return chunk_size_end = cline.find(b';') if chunk_size_end < 0: chunk_size_end = len(cline) # skip chunk extension chunk_size_str = cline[0:chunk_size_end] # the last chunk if chunk_size_str[0] == b'0': # chunk footer header # TODO(handle additional http headers.) while True: cline = reader.read_line() if cline is None or len(cline.strip()) == 0: break if not skip: return b''.join(result) else: return # chunk size chunk_size_str = chunk_size_str.strip() try: chunk_len = int(chunk_size_str, 16) except Exception: return b''.join(result) data = reader.read(chunk_len) if data is None: # skip all # error occurred. if not skip: return b''.join(result) else: return if not skip: result.append(data) # a CR-LF to end this chunked response reader.read_line()
[docs] def read_request(self, reader, message): """read and output one http request. Args: reader (DataReader): type of reader that is required message (RequestMessage): response packet Returns: None if error happened , content otherwise """ if message.expect_header and not utils.is_request(reader.fetch_line()): req_header = message.expect_header message.expect_header = None else: req_header = self.read_http_req_header(reader) if req_header is None: # read header error, we skip all data. reader.skip_all() return None if req_header.expect: # it is expect:continue-100 post request message.expect_header = req_header # deal with body # if not req_header.chunked: # content = reader.read(req_header.content_len) # else: # content = self.read_chunked_body(reader) fullUrl = "http://" + str(req_header.host.decode("utf-8")) + str( req_header.uri.decode("utf-8")) return fullUrl
[docs] def read_response(self, reader, message): """read and output one http response Args: reader (DataReader): type of reader that is required message (RequestMessage): response packet Returns: None """ resp_header = self.read_http_resp_header(reader) if resp_header is None: reader.skip_all() return if message.expect_header: if resp_header.status_code == 100: # expected 100, we do not read body reader.skip_all() return
# read body # if not resp_header.chunked: # if resp_header.content_len == 0: # if resp_header.connection_close: # # we can't get content length, so assume it till the end of data. # resp_header.content_len = 10000000 # else: # # we can't get content length, and is not a chunked body, we cannot do nothing, # # just read all data. # resp_header.content_len = 10000000 # content = reader.read(resp_header.content_len) # else: # content = self.read_chunked_body(reader) # # if not message.filtered: # self.processor.on_http_resp(resp_header, content)