4 files changed, 589 insertions, 0 deletions
diff --git a/sploit/payload/__init__.py b/sploit/payload/__init__.py
new file mode 100644
index 0000000..78769b4
--- /dev/null
+++ b/sploit/payload/__init__.py
@@ -0,0 +1,3 @@
+from .gadhint import *
+from .payload import *
+from .rop import *
diff --git a/sploit/payload/gadhint.py b/sploit/payload/gadhint.py
new file mode 100644
index 0000000..9b077fe
--- /dev/null
+++ b/sploit/payload/gadhint.py
@@ -0,0 +1,109 @@
+from dataclasses import dataclass, field
+from sploit.rev.gadget import Gadget
+
+@dataclass
+class GadHint:
+    """
+    User-annotated gadget description object
+
+    gadget (Gadget|int): The gadget being annotated.  May be a Gadget object or
+    an offset as an int.
+
+    pops (list[str]): The registers popped by this gadget, in order of
+    occurrence.
+
+    movs (dict{str:str}): The register-to-register moves made by this gadget.
+    Keys are destination register names, values are source register names.  The
+    order given is insignificant.
+
+    imms (dict{str:int}): The immediate-to-register loads made by this gadget.
+    Keys are destination register names, values are immediate values.  The order
+    given is insignificant.
+
+    writes (dict{str:str}): The register-to-memory moves (stores) made by this
+    gadget.  Keys are destination register names (expected to hold memory
+    locations), values are source register names (expected to hold direct
+    values).  The order given is insignificant.
+
+    requirements (dict{str:int}): The register state that is required before
+    this gadget should be executed.  Keys are register names, values are the
+    required register values.
+
+    stack (list[int]): A list of words to append to the stack following this
+    gadget.  The first element given is nearest to the top of the stack and the
+    rest follow in order.
+
+    align (bool): If True, this gadget expects the stack to be aligned prior
+    to entry.
+
+    syscall (bool): If True, this gadget contains a syscall instruction.
+
+    spm (int): "Stack pointer move" - The amount the stack pointer is adjusted
+    by this gadget.  The effect of executing a terminating "return" instruction
+    should not be accounted for.  A value of zero is taken as "unspecified".
+    """
+
+    gadget: int = 0
+    pops: list = field(default_factory=list)
+    movs: dict = field(default_factory=dict)
+    imms: dict = field(default_factory=dict)
+    writes: dict = field(default_factory=dict)
+    requirements: dict = field(default_factory=dict)
+    stack: list = field(default_factory=list)
+    align: bool = False
+    syscall: bool = False
+    spm: int = 0
+
+    @property
+    def offset(self):
+        """Return gadget offset as an integer."""
+        return int(self.gadget)
+
+    def __index__(self):
+        """Convert object to integer using offset value."""
+        return self.offset
+
+    def __add__(self, x):
+        """Return new object with adjusted offset."""
+        return GadHint(self.gadget + x, self.pops, self.movs, self.imms,
+                       self.writes, self.requirements, self.stack, self.align,
+                       self.syscall, self.spm)
+
+    def __sub__(self, x):
+        """Return new object with adjusted offset."""
+        return self + (-x)
+
+    def with_requirements(self, reqs):
+        """Return new object with additional requirements."""
+        for k, v in reqs.items():
+            if self.requirements.get(k, v) != v:
+                raise ValueError(
+                    f"GadHint: Conflicting gadget requirements: "
+                    f"{self.requirements}, {reqs}")
+
+        return GadHint(self.gadget, self.pops, self.movs, self.imms,
+                       self.writes, self.requirements | reqs, self.stack,
+                       self.align, self.syscall, self.spm)
+
+    def __repr__(self):
+        """Return human-readable GadHint."""
+        def fmt(name, prop):
+            if len(prop) > 0:
+                return f", {name}={prop}"
+            return ""
+
+        s = hex(self.gadget)
+        s = f"Gadget({s})" if type(self.gadget) is Gadget else s
+        s += fmt("pops", self.pops)
+        s += fmt("movs", self.movs)
+        s += fmt("imms", self.imms)
+        s += fmt("writes", self.writes)
+        s += fmt("requirements", self.requirements)
+        s += fmt("stack", self.stack)
+        if self.align:
+            s += ", align"
+        if self.syscall:
+            s += ", syscall"
+        if self.spm > 0:
+            s += f", spm={self.spm}"
+        return f"GadHint({s})"
diff --git a/sploit/payload/payload.py b/sploit/payload/payload.py
new file mode 100644
index 0000000..cf105c6
--- /dev/null
+++ b/sploit/payload/payload.py
@@ -0,0 +1,94 @@
+from sploit.arch import arch, itob
+from sploit.symtbl import Symtbl
+
+class Payload:
+    MAGIC = b'\xef'
+
+    def __init__(self, **kwargs):
+        self.payload = b''
+        self.sym = Symtbl(**kwargs)
+        self.ctrs = {}
+
+    def __len__(self):
+        return len(self.payload)
+
+    def __call__(self, badbytes=b''):
+        found = [ hex(x) for x in set(self.payload).intersection(badbytes) ]
+        if len(found) > 0:
+            raise Exception(f'Payload: bad bytes in content: {found}')
+        return self.payload
+
+    def _name(self, kind, sym):
+        if sym is not None: return sym
+        try: ctr = self.ctrs[kind]
+        except: ctr = 0
+        self.ctrs[kind] = ctr + 1
+        return f'{kind}_{ctr}'
+
+    def _append(self, value, sym):
+        (self.sym @ 0)[sym] = len(self)
+        self.payload += value
+        return self
+
+    def _prepend(self, value, sym):
+        self.sym >>= len(value)
+        (self.sym @ 0)[sym] = 0
+        self.payload = value + self.payload
+        return self
+
+    def end(self):
+        return self.sym.base + len(self)
+
+    def bin(self, *values, sym=None):
+        return self._append(b''.join(values), sym=self._name('bin', sym))
+
+    def str(self, *values, sym=None):
+        values = [ v.encode() + b'\x00' for v in values ]
+        return self.bin(*values, sym=self._name('str', sym))
+
+    def int(self, *values, sym=None):
+        values = [ itob(v) for v in values ]
+        return self.bin(*values, sym=self._name('int', sym))
+
+    def int8(self, *values, sym=None):
+        values = [ itob(v, 1) for v in values ]
+        return self.bin(*values, sym=self._name('int', sym))
+
+    def int16(self, *values, sym=None):
+        values = [ itob(v, 2) for v in values ]
+        return self.bin(*values, sym=self._name('int', sym))
+
+    def int32(self, *values, sym=None):
+        values = [ itob(v, 4) for v in values ]
+        return self.bin(*values, sym=self._name('int', sym))
+
+    def int64(self, *values, sym=None):
+        values = [ itob(v, 8) for v in values ]
+        return self.bin(*values, sym=self._name('int', sym))
+
+    def ret(self, *values, sym=None):
+        return self.int(*values, sym=self._name('ret', sym))
+
+    def sbp(self, *values, sym=None):
+        if len(values) == 0:
+            return self.rep(self.MAGIC, arch.wordsize, sym=self._name('sbp', sym))
+        return self.int(*values, sym=self._name('sbp', sym))
+
+    def rep(self, value, size, sym=None):
+        return self.bin(self._rep_helper(value, size), sym=self._name('rep', sym))
+
+    def pad(self, size, value=None, sym=None):
+        return self.bin(self._pad_helper(size, value), sym=self._name('pad', sym))
+
+    def pad_front(self, size, value=None, sym=None):
+        return self._prepend(self._pad_helper(size, value), sym=self._name('pad', sym))
+
+    def _rep_helper(self, value, size, *, explain=''):
+        if size < 0:
+            raise Exception(f'Payload: {explain}rep: available space is negative')
+        if (size := size / len(value)) != int(size):
+            raise Exception(f'Payload: {explain}rep: element does not divide the space evenly')
+        return value * int(size)
+
+    def _pad_helper(self, size, value):
+        return self._rep_helper(value or arch.nopcode, size - len(self), explain='pad: ')
diff --git a/sploit/payload/rop.py b/sploit/payload/rop.py
new file mode 100644
index 0000000..54226b4
--- /dev/null
+++ b/sploit/payload/rop.py
@@ -0,0 +1,383 @@
+"""
+ROP chain generation utilities
+
+This module contains tools for automating basic return-oriented-programming
+workloads, such as loading register values and calling into arbitrary functions
+or syscalls.  The tools are currently designed to work on x86 (32 or 64 bit)
+and ARM (32 bit only).
+
+The main appeal of the ROP class is the ability to abstract away the manual
+construction of ROP chain data, and instead make declarative statements like
+"call this function with these arguments."  The ROP class will also utilize its
+supplied binary objects to automatically find and use trivial gadgets.
+
+The user is able to provide annotations for more complicated gadgets, which help
+instruct the class how to incorporate them into a ROP chain.  This is done with
+the GadHint dataclass.  GadHint objects are provided to a ROP instance by
+including them in the Symtbl of one of the binary objects it is constructed with.
+If applicable, a user-supplied gadget will take precedence over automatic gadget
+searching.
+
+See the GadHint class to learn more about the descriptive attributes that are
+supported.
+"""
+
+from graphlib import TopologicalSorter
+
+from sploit.arch import arch, btoi, itob
+from sploit.payload.gadhint import GadHint
+from sploit.payload.payload import Payload
+
+class ROP(Payload):
+    """
+    ROP-enabled payload builder
+
+    POP_MAGIC (int): Magic value used for pop instructions where no specific
+    value is required by the user.
+
+    SPM_MAGIC (bytes): Magic value to fill the stack with when the best
+    available cleaning gadget is larger than is necessary.
+
+    objects (list[ELF]): The binary objects this ROP instance will consider
+    for gadget searching.
+
+    safe_syscalls (bool): If True, require that automatically found syscall
+    instructions are immediately followed by a return instruction.
+
+    align_calls (bool): If True, ensure that the stack return address into
+    function calls is aligned according to the architecture alignment property.
+    Knowledge of alignment is taken from the instance Symtbl's base value.
+
+    clean_stack (bool): If True, attempt to locate a cleaning gadget to "pop"
+    stack data that is leftover from a function call.  Required if attempting
+    to make multiple calls that involve stack-based arguments.
+    """
+
+    POP_MAGIC = 0xdead
+    SPM_MAGIC = b'\x69'
+
+    def __init__(self, *objects, safe_syscalls=True, align_calls=True,
+                 clean_stack=True, **symbols):
+        """Initialize new ROP builder instance."""
+        super().__init__(**symbols)
+        self.objects = objects
+        self.safe_syscalls = safe_syscalls
+        self.align_calls = align_calls
+        self.clean_stack = clean_stack
+
+    def gadgets(self, *regexes, cont=False):
+        """Return a list of matching gadgets, considering all objects."""
+        results = []
+        for obj in self.objects:
+            results += obj.gadgets(*regexes, cont=cont)
+        return results
+
+    def gadget(self, *regexes):
+        """Return the first matching gadget, considering all objects."""
+        for obj in self.objects:
+            try:
+                return obj.gadget(*regexes)
+            except:
+                pass
+        raise LookupError(
+            f"ROP: Need to define gadget symbol for {'; '.join(regexes)}")
+
+    def assign(self, *, sym=None, **sets):
+        """
+        Insert a ROP chain to control given registers.
+
+        **sets (str:int): Keyword arguments specify register assignments to
+        perform with this ROP chain.  Argument names correspond to register
+        names.
+
+        sym (str): If given, sym is the symbol name used to refer to the
+        inserted data.
+        """
+        gadget = GadHint(0, requirements=sets)
+        return self._start_chain(gadget, sym=self._name("assign", sym))
+
+    def call(self, func, *params, sym=None):
+        """
+        Insert a ROP chain to call function.
+
+        func (int): Entry address of function to call.
+        *params (int): Remaining positional args are passed to func.
+
+        sym (str): If given, sym is the symbol name used to refer to the
+        inserted data.
+        """
+        register_params = dict(zip(arch.funcargs, params))
+        stack_params = params[len(register_params):]
+        gadget = GadHint(func, requirements=register_params, stack=stack_params,
+                         align=self.align_calls)
+        return self._start_chain(gadget, sym=self._name("call", sym))
+
+    def syscall(self, *params, sym=None):
+        """
+        Insert a ROP chain to call kernel.
+
+        *params (int): The first argument is the syscall number.  Remaining
+        positional arguments are passed to the syscall.
+
+        sym (str): If given, sym is the symbol name used to refer to the
+        inserted data.
+        """
+        if len(params) > len(arch.kernargs):
+            raise TypeError("ROP: Too many arguments passed to syscall. "
+                "Target architecture supports up to {len(arch.kernargs)-1}.")
+
+        register_params = dict(zip(arch.kernargs, params))
+        gadget = self._get_gadget("syscall", {}).with_requirements(register_params)
+        return self._start_chain(gadget, sym=self._name("syscall", sym))
+
+    def memcpy(self, dst, src, *, sym=None):
+        """
+        Insert a ROP chain to write data into memory.
+
+        dst (int): The destination memory address.
+        src (bytes): The content to write.
+
+        sym (str): If given, sym is the symbol name used to refer to the
+        inserted data.
+        """
+        gadgets = []
+        for idx in range(0, len(src), arch.wordsize):
+            g = self._get_write(dst + idx, btoi(src[idx:idx+arch.wordsize]))
+            gadgets.append(g)
+        return self._start_chain(*gadgets, sym=self._name("memcpy", sym))
+
+    def _get_hints(self):
+        """Return all user-supplied gadget hints."""
+        return [h for obj in self.objects for _,h in obj.sym if type(h) is GadHint]
+
+    def _discover_requirements(self, seen, graph, current):
+        """
+        Populate gadget dependency graph.
+
+        This function recursively looks up gadgets to ensure all necessary
+        required gadgets can be found, and stores this information into the
+        given graph object.  Established dependencies encode the order that the
+        chain builder should attempt to satisfy register requirements.
+        Dependency loops are detected by the TopologicalSorter.
+
+        seen (set): Set of (register,value) tuples we have already discovered.
+        graph (TopologicalSorter): Dependency graph model object.
+        current (GadHint): Current gadget we are processing.
+        """
+        for r, v in current.requirements.items():
+            # We key on register name _and_ value because some gadgets may
+            # only be capable of storing specific values in a target register.
+            # Requiring a register to store different values may require the
+            # use of multiple gadgets.
+            if (r, v) not in seen:
+                gadget = self._get_gadget(r, current.requirements)
+
+                # Add gadget's requirements to the dependency graph.
+                # We say that each requirement is a 'successor' to this
+                # current gadget 'r', so that the chain builder will satisfy
+                # 'r' first.  This prevents the fulfillment of 'r' from
+                # colbbering targets it requires, as the builder will satisfy
+                # them afterward.
+                for x in gadget.requirements:
+                    graph.add(x, r)
+
+                # Treat gadget's load immediates as pseudo-requirements for
+                # the sake of target ordering, following the same logic
+                # as above.
+                for x in gadget.imms:
+                    graph.add(x, r)
+
+                # Mark node as visited
+                seen.add((r, v))
+                self._discover_requirements(seen, graph, gadget)
+
+    def _get_gadget(self, target, sets):
+        """
+        Get context-specific gadget.
+
+        target (str): Either "ret", "syscall", or the name of a register we
+        would like to modify.
+
+        sets (dict{str:int}): The set of other register requirements we are
+        trying to fulfill in parallel.  Values may affect the gadget we decide
+        to use.
+        """
+        # First, consider user-provided hints before automatically locating
+        # gadgets.
+        for hint in self._get_hints():
+            # Setup additional requirements based on hint's register moves.
+            # If a mov target is in sets, require to set the src to the 'sets'
+            # value.
+            addl_reqs = { src:sets[dst] for dst, src in hint.movs.items() if dst in sets }
+            hint = hint.with_requirements(addl_reqs)
+
+            # Pops will be accounted for by the chain builder.
+            # Immediates will be handled by gadget ordering in chain builder.
+            # Writes are a non-issue here.
+
+            if hint.syscall:
+                # Only consider syscalls if the target is syscall.
+                if target == "syscall":
+                    return hint
+            elif target in hint.imms:
+                if hint.imms[target] == sets[target]:
+                    return hint
+            elif target in hint.pops:
+                return hint
+            elif target in hint.movs:
+                return hint
+
+        # Automatically locate simple gadgets
+        if target == "ret":
+            return GadHint(self.gadget(arch.ret))
+
+        if target == "syscall":
+            insns = [arch.syscall, arch.ret] if self.safe_syscalls else [arch.syscall]
+            return GadHint(self.gadget(*insns), syscall=True)
+
+        # target == register
+        insns = [ i.format(target) for i in arch.popgad ]
+        return GadHint(self.gadget(*insns), pops=[target], spm=arch.wordsize)
+
+    def _get_clean(self, size):
+        """
+        Get a stack cleaning gadget that moves sp by _at least_ size.
+
+        size (int): Minimum stack pointer move.
+        """
+        # spm values of zero (the default) can't be trusted, as in this case
+        # the user likely hasn't annotated the GadHint properly.  Returning a
+        # larger move than requested is fine, since the chain builder can insert
+        # junk to be popped.
+        for hint in self._get_hints():
+            if hint.spm >= size and hint.spm > 0:
+                return hint
+
+        results = self.gadgets(*arch.cleangad)
+        table = { int(g.asm[0].group(1), 0): g for g in results }
+        sizes = sorted([ x for x in table.keys() if x >= size ])
+
+        if len(sizes) > 0:
+            return GadHint(table[sizes[0]], spm=sizes[0])
+
+        raise LookupError(
+            f"ROP: Need to define a stack move gadget of at least {size}")
+
+    def _get_write(self, dst, src):
+        """
+        Get a memory write gadget, injected with requirements for user data.
+
+        dst (int): The intended memory write location.
+        src (int): The intended value to write.
+        """
+        # If any exist, take the first write provided by user hints, assuming
+        # the user's intent to specifically use _this_ write.  Follow-on gadgets
+        # to setup the dst and src registers must be findable.
+        for hint in self._get_hints():
+            if hint.writes:
+                d, s = list(hint.writes.items())[0]
+                return hint.with_requirements({d:dst, s:src})
+
+        # Only take an automatic write gadget if we can prove up front that its
+        # requirements can be met, otherwise move on.  A later search result may
+        # pass the test.
+        results = self.gadgets(*arch.writegad)
+
+        for gad in results:
+            d = gad.asm[0].group("dst")
+            s = gad.asm[0].group("src")
+
+            try:
+                # Assert requirements are met.
+                gadget = GadHint(gad, writes={d: s}, requirements={d:dst, s:src})
+                self._discover_requirements(set(), TopologicalSorter(), gadget)
+                return gadget
+            except:
+                pass
+
+        raise LookupError("ROP: Need to define gadgets for memory write / deps")
+
+    def _start_chain(self, *gadgets, sym=None):
+        """
+        Insert a generic ROP chain.
+
+        *gadgets (GadHint): Annotated gadgets to prepare a chain from.
+
+        sym (str): If given, sym is the symbol name used to refer to the
+        inserted data.
+        """
+        stack = Payload(base=self.end())
+        for g in gadgets:
+            self._build_chain(stack, g, {})
+        return self.bin(stack(), sym=self._name("gadget", sym))
+
+    def _build_chain(self, stack, gadget, sets):
+        """
+        Generate chain data for a given ROP gadget.
+
+        This function recursively builds a ROP chain for the given gadget and
+        its requirements, storing data in the 'stack' object.
+
+        stack (Payload): Stack data being constructed.
+        gadget (GadHint): Current gadget we are processing.
+
+        sets (dict{str:int}): The set of other register requirements we are
+        trying to fulfill in parallel.
+        """
+        # Form a to-do-list of registers from our immediate requirements,
+        # attempting to order them such that we avoid overwriting/conflicting
+        # values (this may not be possible).
+        reqs = gadget.requirements
+        graph = TopologicalSorter({ r:set() for r in reqs })
+        self._discover_requirements(set(), graph, gadget)
+        to_do_list = [ x for x in graph.static_order() if x in reqs ]
+
+        # Start chain by satisfying to-do-list requirements.
+        while len(to_do_list) > 0:
+            g = self._get_gadget(to_do_list[0], reqs)
+            self._build_chain(stack, g, reqs)
+
+            # This gadget may satisfy multiple items in the to-do-list.
+            # Specifically, all of its pop and mov targets, and any load
+            # immediates that match our requirements.  Non-matching
+            # immediates will be handled by a later gadget.
+            imms = g.imms.keys() & reqs.keys()
+            imms = [ x for x in imms if g.imms[x] == reqs[x] ]
+            done = g.pops + list(g.movs) + imms
+            to_do_list = [ x for x in to_do_list if x not in done ]
+
+        # Append chain data to execute this gadget, but respect offset == 0
+        # as a way to disable this gadget (perform a NULL gadget).
+        if gadget.offset != 0:
+            # Stack alignment if required.
+            if gadget.align:
+                align = -stack.end() % arch.alignment
+                stack.rep(itob(self._get_gadget("ret", {})), align)
+
+            # "Return address" entry into this gadget.
+            stack.ret(gadget.offset)
+
+            # The gadget's "inner stack data" will be values to be popped
+            # and additional junk data to be deallocated by the gadget itself.
+            sp_dest = len(stack) + gadget.spm
+            stack.int(*[ sets.get(p, self.POP_MAGIC) for p in gadget.pops ])
+            if gadget.spm > 0:
+                stack.pad(sp_dest, self.SPM_MAGIC)
+
+            # The gadget's "outer stack data" will be the additional values
+            # explicitly specified by the gadget.  Append a separate gadget
+            # to clean up these values.
+            if len(gadget.stack) > 0:
+                size = len(gadget.stack) * arch.wordsize
+
+                if self.clean_stack:
+                    clean = self._get_clean(size)
+                    stack.ret(clean)
+                    sp_dest = len(stack) + clean.spm
+                else:
+                    ret = self._get_gadget("ret", {})
+                    stack.ret(ret)
+                    sp_dest = len(stack) + size
+
+                stack.int(*gadget.stack)
+                stack.pad(sp_dest, self.SPM_MAGIC)