I’m starting from the Wireshark description of the Libpcap file format.
typedef struct pcap_hdr_s {
guint32 magic_number; /* magic number */
guint16 version_major; /* major version number */
guint16 version_minor; /* minor version number */
gint32 thiszone; /* GMT to local correction */
guint32 sigfigs; /* accuracy of timestamps */
guint32 snaplen; /* max length of captured packets, in octets */
guint32 network; /* data link type */
} pcap_hdr_t;
A quick sanity check with xxd confirms the magic number and major and minor versions.
We also see that the pcap file was created on a little-endian machine because the magic number 0xa1b2c3d4 was written with the least significant byte in the least memory address.
➜ pcap-the-flag git:(main) ✗ xxd -l 8 net.cap
00000000: d4c3 b2a1 0200 0400 ........
(defstruct pcap-hdr
(magic-number 0 :type (unsigned-byte 32))
(version-major 0 :type (unsigned-byte 16))
(version-minor 0 :type (unsigned-byte 16))
(thiszone 0 :type (signed-byte 32))
(sigfigs 0 :type (unsigned-byte 32))
(snaplen 0 :type (unsigned-byte 32))
(network 0 :type (unsigned-byte 32)))
Used by 1
Now might be a good time to introduce the overall structure of our parsing program.
@{global parameters}
@{pcap structs}
@{helpers for reading binary data from a stream}
@{functions for reading parts of the pcap file}
Let’s read the file and see what this looks like.
This next bit I’m taking from Peter Siebel’s “Practical Common Lisp”
(defun read-pcap-hdr (in)
(let ((hdr (make-pcap-hdr))
(magic-number (read-u32 in :big-endian)))
(if (= magic-number #xa1b2c3d4)
(setf *byte-order* :big-endian)
(setf *byte-order* :little-endian))
(setf (pcap-hdr-magic-number hdr) magic-number)
(setf (pcap-hdr-version-major hdr) (read-u16 in))
(setf (pcap-hdr-version-minor hdr) (read-u16 in))
(setf (pcap-hdr-thiszone hdr) (read-u32 in))
(setf (pcap-hdr-sigfigs hdr) (read-u32 in))
(setf (pcap-hdr-snaplen hdr) (read-u32 in))
(setf (pcap-hdr-network hdr) (read-u32 in))
hdr))
Used by 1
Reading the header file relies on a couple of helper functions for reading unsigned 16 and 32 bit integers.
(defun read-u32 (in &optional (byte-order *byte-order*))
(let ((u32 0))
(destructuring-bind (b1 b2 b3 b4) (byte-offsets byte-order 8 4)
(setf (ldb (byte 8 b1) u32) (read-byte in))
(setf (ldb (byte 8 b2) u32) (read-byte in))
(setf (ldb (byte 8 b3) u32) (read-byte in))
(setf (ldb (byte 8 b4) u32) (read-byte in)))
u32))
(defun read-u16 (in &optional (byte-order *byte-order*))
(let ((u16 0))
(destructuring-bind (b1 b2) (byte-offsets byte-order 8 2)
(format nil "~A ~A ~%" b1 b2)
(setf (ldb (byte 8 b1) u16) (read-byte in))
(setf (ldb (byte 8 b2) u16) (read-byte in)))
u16))
Used by 1
Those helpers use a byte-offsets function that returns a sequence of offsets that will map to the sequence of (setf (ldb ...)) calls so that the bytes will be set in the proper bits.
By that, I mean that if the byte sequence #x1A2B3C4D is read as a 4-byte integer in big-endian, then it will be #x1A2B3C4D. But if it is read as a 4-byte integer in little-endian, then it will be #x4D3C2B1A.
(defun byte-offsets (byte-order size count)
(do ((offsets '() (setf offsets (cons offset offsets)))
(offset 0 (+ offset size)))
((>= offset (* count size)) (if (eq byte-order :big-endian) offsets (reverse offsets)))))
We’ll use *byte-order* as a global default and initialize it to :big-endian.
But depending on the magic-number that we read in the pcap header file, we may change that parameter.
And this is enough to parse the header.
(with-open-file (stream "net.cap" :element-type '(unsigned-byte 8))
(read-pcap-hdr stream))
; => #S(PCAP-HDR
; :MAGIC-NUMBER 3569595041
; :VERSION-MAJOR 2
; :VERSION-MINOR 4
; :THISZONE 0
; :SIGFIGS 0
; :SNAPLEN 1514
; :NETWORK 1)
From the same Wireshark.org description of the global header, we have the record header.
typedef struct pcaprec_hdr_s {
guint32 ts_sec; /* timestamp seconds */
guint32 ts_usec; /* timestamp microseconds */
guint32 incl_len; /* number of octets of packet saved in file */
guint32 orig_len; /* actual length of packet */
} pcaprec_hdr_t;
Let’s add another struct.
(defstruct pcaprec-hdr
(ts-sec 0 :type (unsigned-byte 32))
(ts-usec 0 :type (unsigned-byte 32))
(incl-len 0 :type (unsigned-byte 32))
(orig-len 0 :type (signed-byte 32)))
And a function to read from the pcap file into the struct.
(defun read-pcaprec-hdr (in)
(let ((rec (make-pcaprec-hdr)))
(setf (pcaprec-hdr-ts-sec rec) (read-u32 in))
(setf (pcaprec-hdr-ts-usec rec) (read-u32 in))
(setf (pcaprec-hdr-incl-len rec) (read-u32 in))
(setf (pcaprec-hdr-orig-len rec) (read-u32 in))
rec))
The packet data is just incl_len bytes following the packet header.
Maybe we don’t need a struct for this. But I kind of like the idea of having parity: struct for pcap, record, data. And maybe it will be useful to have a struct to throw some metadata on later.
Oh.
Or even better, a packet struct that encompasses both the header and the data.
(defstruct pcap-packet
(header (make-pcaprec-hdr) :type pcaprec-hdr)
(data (make-array 0 :adjustable t :fill-pointer t :element-type '(unsigned-byte 8)) :type (vector (unsigned-byte 8) *)))
Which will be read by a new read-pcap-packet function.
(defun read-pcap-packet (in)
(let* ((header (read-pcaprec-hdr in))
(data (read-bytes in (pcaprec-hdr-incl-len header))))
(make-pcap-packet
:header header
:data data)))
Which will use a new read-bytes helper to read n bytes from a stream (the “data” of a packet).
(defun read-bytes (in n)
(let ((bytes (make-array n :adjustable t :fill-pointer t :element-type '(unsigned-byte 8))))
(read-sequence bytes in :end n)
bytes))
And with this, we are starting to get somewhere.
(with-open-file (stream "net.cap" :element-type '(unsigned-byte 8))
(let ((pcap-header (read-pcap-hdr stream))
(first-packet (read-pcap-packet stream)))
(list pcap-header first-packet)))
; => (#S(PCAP-HDR
; :MAGIC-NUMBER 3569595041
; :VERSION-MAJOR 2
; :VERSION-MINOR 4
; :THISZONE 0
; :SIGFIGS 0
; :SNAPLEN 1514
; :NETWORK 1)
; #S(PCAP-PACKET
; :HEADER #S(PCAPREC-HDR
; :TS-SEC 1473288256
; :TS-USEC 204554
; :INCL-LEN 78
; :ORIG-LEN 78)
; :DATA #(196 233 132 135 96 40 164 94 96 223 46 27 8 0 69 0 0 64 208 3 0 0
; 64 6 44 238 192 168 0 101 192 30 252 154 231 159 0 80 94 171 34 101
; 0 0 0 0 176 2 255 255 88 35 0 0 2 4 5 180 1 3 3 5 1 1 8 10 58 77
; 189 197 0 0 0 0 4 2 0 0)))
https://en.wikipedia.org/wiki/Ethernet_frame

Wikipedia shows a ~preamble~ and a ~start frame delimeter~ in the diagram. Those are physical layer bytes that aren’t included in the data layer.
Our parsing will start at the data layer with the destination mac address.
(defstruct ethernet-packet
(dst 0 :type (oct-vec 6))
(src 0 :type (oct-vec 6))
(len-typ 0 :type (oct-vec 2))
(data 0 :type (oct-vec *))
(chk 0 :type (oct-vec 4)))
I was tired of typing :type (vector (unsigned-byte 8) <x>) so I made an oct-vec macro.
(defmacro oct-vec (len)
`(vector (unsigned-byte 8) ,len))
https://www.rfc-editor.org/rfc/rfc791#page-11
3.1. Internet Header Format
A summary of the contents of the internet header follows:
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live | Protocol | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Destination Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Options | Padding |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Some of the fields in this struct are 3 and 4 bits in length.
Common Lisp lets you specify the size of a byte.
A type of (unsigned-byte 1) is the same thing as a bit.
So maybe I have a choice here. Which type do I go with?
(unsigned-byte <size>) or (vector bit <size>)?
I don’t know of a good reason to choose one over the other.
I’m tempted to go with (unsigned-byte <size>) since my other structs are using (unsigned-byte <size>).
The others feel different because they are typical sizes of 8/16/32.
So I really want to use bit-vectors for the 3 and 4 bit fields.
But I don’t want to mix bit-vectors and unsigned-bytes in this one.
Hm.
Well… there’s no builtin way to convert bit-vectors to ints and that will be a nice-to-have when we get to working with IP source and destination addresses.
I think I’ll use (unsigned-byte <size>) for everything.
(defstruct ip4-hdr
(version 0 :type (unsigned-byte 4))
(ihl 0 :type (unsigned-byte 4))
(type-of-service 0 :type (unsigned-byte 8))
(total-length 0 :type (unsigned-byte 16))
(identification 0 :type (unsigned-byte 16))
(flags 0 :type (unsigned-byte 3))
(fragment-offset 0 :type (unsigned-byte 13))
(time-to-live 0 :type (unsigned-byte 8))
(protocol 0 :type (unsigned-byte 8))
(header-checksum 0 :type (unsigned-byte 16))
(source-address 0 :type (unsigned-byte 32))
(dest-address 0 :type (unsigned-byte 32))
(options (make-ip4-option) :type ip4-option)
(padding (make-array 0 :element-type '(unsigned-byte 8) :adjustable t :fill-pointer t)
:type (vector (unsigned-byte 8) *)))
Options is an odd field because it’s variable length. From the RFC:
Case 1: A single octet of option-type.
Case 2: An option-type octet, an option-length octet, and the
actual option-data octets.
(defstruct ip4-option
(type (make-ip4-option-type) :type ip4-option-type)
(length 0 :type (unsigned-byte 8))
(data (make-array 0 :element-type '(unsigned-byte 8) :adjustable t :fill-pointer t) :type (vector (unsigned-byte 8) *)))
(defstruct ip4-option-type
(copied 0 :type (unsigned-byte 1))
(class 0 :type (unsigned-byte 2))
(number 0 :type (unsigned-byte 5)))
And the parsers for options and option types.
(defun parse-ip4-option-type (byte)
(make-ip4-option-type
:copied (ldb (byte 1 7) byte)
:class (ldb (byte 2 5) byte)
:number (ldb (byte 5 0) byte)))
(defun parse-ip4 (bytes)
(let ((hdr (make-ip4-hdr)))
(setf (ip4-hdr-version hdr) (ldb (byte 4 4) (aref bytes 0)))
(setf (ip4-hdr-ihl hdr) (ldb (byte 4 0) (aref bytes 0)))
(setf (ip4-hdr-type-of-service hdr) (ldb (byte 8 0) (aref bytes 1)))
(setf (ip4-hdr-total-length hdr) (ldb (byte 16 0) (aref bytes 2)))
(setf (ip4-hdr-identification hdr) (ldb (byte 16 0) (aref bytes 4)))
(setf (ip4-hdr-flags hdr) (ldb (byte 3 0) (aref bytes 6)))
(setf (ip4-hdr-fragment-offset hdr) (ldb (byte 13 3) (aref bytes 6)))
(setf (ip4-hdr-time-to-live hdr) (ldb (byte 8 0) (aref bytes 8)))
(setf (ip4-hdr-protocol hdr) (ldb (byte 8 0) (aref bytes 9)))
(setf (ip4-hdr-header-checksum hdr) (ldb (byte 16 0) (aref bytes 10)))
(setf (ip4-hdr-source-address hdr) (ldb (byte 32 0) (aref bytes 12)))
(setf (ip4-hdr-dest-address hdr) (ldb (byte 32 0) (aref bytes 16)))
;; I'm going to skip handling options for now.
(let ((option-type (parse-ip4-option-type (aref bytes 20)))
(start-of-data 24))
(when (/= (ip4-option-type-number option-type) 0)
(setf start-of-data (aref bytes 21)))
(list hdr (subseq bytes start-of-data)))))