API reference

ghidralib

This library is an attempt to provide a Pythonic standard library for Ghidra.

The main goal is to make writing quick&dirty scripts actually quick, and not that dirty.

There is no equivalent of FlatProgramAPI from GHidra. You are expected to start by getting an object of interest by calling instance methods, for example

>>> Function("main")
main

to get a function called "main". When you want to do something this library doesn't support (yet), you can always excape back to Ghidra's wrapped Java types, by getting a .raw property, for example:

>>> Function("main").raw.UNKNOWN_STACK_DEPTH_CHANGE
2147483647

For more details, see the documentation at https://msm-code.github.io/ghidralib/.

Addr = GenericAddress | int | str module-attribute

DataT = GhidraWrapper | JavaObject | str module-attribute

HIGHLIGHT_COLOR = SearchConstants.SEARCH_HIGHLIGHT_COLOR module-attribute

Reg = GhRegister | str module-attribute

Str = (str, bytes, unicode) module-attribute

T = TypeVar('T') module-attribute

__version__ = '0.2.0' module-attribute

bytes = str module-attribute

interpreter = get_current_interpreter() module-attribute

long = int module-attribute

AddressRange

Bases: GhidraWrapper

Wraps a Ghidra AddressRange object.

Source code in ghidralib.py
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
class AddressRange(GhidraWrapper):
    """Wraps a Ghidra AddressRange object."""

    @property
    def addresses(self):  # type: () -> list[int]
        """Return the addresses in this range."""
        return [a.getOffset() for a in self.raw.getAddresses(True)]

    def __iter__(self):  # type: () -> Iterator[int]
        """Iterate over the addresses in this range."""
        return self.addresses.__iter__()

    @property
    def start(self):  # type: () -> int
        """Get the first address in this range."""
        return self.raw.getMinAddress().getOffset()

    @property
    def end(self):  # type: () -> int
        """Get the last address in this range."""
        return self.raw.getMaxAddress().getOffset()

    @property
    def length(self):  # type: () -> int
        """Get the length of this range."""
        return self.raw.getLength()

    def __len__(self):  # type: () -> int
        """Get the length of this range."""
        return self.length

    def contains(self, addr):  # type: (Addr) -> bool
        """Return True if the given address is in this range.

        :param addr: address to check"""
        return self.raw.contains(resolve(addr))

    def __contains__(self, addr):  # type: (Addr) -> bool
        """Return True if the given address is in this range.
        :param addr: address to check"""
        return self.contains(addr)

    @property
    def is_empty(self):  # type: () -> bool
        """Return True if this range is empty."""
        return self.raw.isEmpty()

    def __nonzero__(self):  # type: () -> bool
        """Return True if this range is not empty."""
        return not self.is_empty

    def __and__(self, other):  # type: (AddressRange) -> AddressRange
        """Return the intersection of this range and the given range."""
        return AddressRange(self.raw.intersect(other.raw))

addresses property

Return the addresses in this range.

end property

Get the last address in this range.

is_empty property

Return True if this range is empty.

length property

Get the length of this range.

start property

Get the first address in this range.

__and__(other)

Return the intersection of this range and the given range.

Source code in ghidralib.py
1884
1885
1886
def __and__(self, other):  # type: (AddressRange) -> AddressRange
    """Return the intersection of this range and the given range."""
    return AddressRange(self.raw.intersect(other.raw))

__contains__(addr)

Return True if the given address is in this range.

Parameters:
  • addr

    address to check

Source code in ghidralib.py
1870
1871
1872
1873
def __contains__(self, addr):  # type: (Addr) -> bool
    """Return True if the given address is in this range.
    :param addr: address to check"""
    return self.contains(addr)

__iter__()

Iterate over the addresses in this range.

Source code in ghidralib.py
1841
1842
1843
def __iter__(self):  # type: () -> Iterator[int]
    """Iterate over the addresses in this range."""
    return self.addresses.__iter__()

__len__()

Get the length of this range.

Source code in ghidralib.py
1860
1861
1862
def __len__(self):  # type: () -> int
    """Get the length of this range."""
    return self.length

__nonzero__()

Return True if this range is not empty.

Source code in ghidralib.py
1880
1881
1882
def __nonzero__(self):  # type: () -> bool
    """Return True if this range is not empty."""
    return not self.is_empty

contains(addr)

Return True if the given address is in this range.

Parameters:
  • addr

    address to check

Source code in ghidralib.py
1864
1865
1866
1867
1868
def contains(self, addr):  # type: (Addr) -> bool
    """Return True if the given address is in this range.

    :param addr: address to check"""
    return self.raw.contains(resolve(addr))

AddressSet

Bases: GhidraWrapper

Wraps a Ghidra AddressSetView object.

Source code in ghidralib.py
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
class AddressSet(GhidraWrapper):
    """Wraps a Ghidra AddressSetView object."""

    @staticmethod
    def empty():  # type: () -> AddressSet
        """Create a new empty address set"""
        return AddressSet(GhAddressSet())

    @staticmethod
    def create(start, length):  # type: (Addr, int) -> AddressSet
        """Create a new AddressSet with given address and length."""
        addr = resolve(start)
        return AddressSet(GhAddressSet(addr, addr.add(length - 1)))

    @property
    def addresses(self):  # type: () -> list[int]
        """Return the addresses in this set."""
        return [a.getOffset() for a in self.raw.getAddresses(True)]

    @property
    def ranges(self):  # type: () -> list[AddressRange]
        return [AddressRange(r) for r in self.raw.iterator(True)]

    def __iter__(self):  # type: () -> Iterator[int]
        return self.addresses.__iter__()

    def contains(self, addr):  # type: (Addr) -> bool
        """Return True if the given address is in this range."""
        return self.raw.contains(resolve(addr))

    def __contains__(self, addr):  # type: (Addr) -> bool
        """Return True if the given address is in this range."""
        return self.contains(addr)

    @property
    def is_empty(self):  # type: () -> bool
        """Return True if this range is empty."""
        return self.raw.isEmpty()

    def __nonzero__(self):  # type: () -> bool
        """Return True if this range is not empty."""
        return not self.is_empty

    def __and__(self, other):  # type: (AddressSet) -> AddressSet
        """Return the intersection of this set and the given set."""
        return AddressSet(self.raw.intersect(other.raw))

    def __sub__(self, other):  # type: (AddressSet) -> AddressSet
        """Subtract the given set from this set."""
        return AddressSet(self.raw.subtract(other.raw))

    def __xor__(self, other):  # type: (AddressSet) -> AddressSet
        """Computes the symmetric difference of this set and the given set."""
        return AddressSet(self.raw.xor(other.raw))

    def __or__(self, other):  # type: (AddressSet) -> AddressSet
        """Computes the union of this set and the given set."""
        return AddressSet(self.raw.union(other.raw))

    def __get_highlighter(self):  # type: () -> Any
        tool = getState().getTool()
        service = tool.getService(ColorizingService)
        if service is None:
            raise RuntimeError("Cannot highlight without the ColorizingService")
        return service

    def highlight(self, color=HIGHLIGHT_COLOR):  # type: (Color) -> None
        service = self.__get_highlighter()
        service.setBackgroundColor(self.raw, color)

    def unhighlight(self):  # type: (Color) -> None
        service = self.__get_highlighter()
        service.clearBackgroundColor(self.raw)

addresses property

Return the addresses in this set.

is_empty property

Return True if this range is empty.

ranges property

__and__(other)

Return the intersection of this set and the given set.

Source code in ghidralib.py
1932
1933
1934
def __and__(self, other):  # type: (AddressSet) -> AddressSet
    """Return the intersection of this set and the given set."""
    return AddressSet(self.raw.intersect(other.raw))

__contains__(addr)

Return True if the given address is in this range.

Source code in ghidralib.py
1919
1920
1921
def __contains__(self, addr):  # type: (Addr) -> bool
    """Return True if the given address is in this range."""
    return self.contains(addr)

__get_highlighter()

Source code in ghidralib.py
1948
1949
1950
1951
1952
1953
def __get_highlighter(self):  # type: () -> Any
    tool = getState().getTool()
    service = tool.getService(ColorizingService)
    if service is None:
        raise RuntimeError("Cannot highlight without the ColorizingService")
    return service

__iter__()

Source code in ghidralib.py
1912
1913
def __iter__(self):  # type: () -> Iterator[int]
    return self.addresses.__iter__()

__nonzero__()

Return True if this range is not empty.

Source code in ghidralib.py
1928
1929
1930
def __nonzero__(self):  # type: () -> bool
    """Return True if this range is not empty."""
    return not self.is_empty

__or__(other)

Computes the union of this set and the given set.

Source code in ghidralib.py
1944
1945
1946
def __or__(self, other):  # type: (AddressSet) -> AddressSet
    """Computes the union of this set and the given set."""
    return AddressSet(self.raw.union(other.raw))

__sub__(other)

Subtract the given set from this set.

Source code in ghidralib.py
1936
1937
1938
def __sub__(self, other):  # type: (AddressSet) -> AddressSet
    """Subtract the given set from this set."""
    return AddressSet(self.raw.subtract(other.raw))

__xor__(other)

Computes the symmetric difference of this set and the given set.

Source code in ghidralib.py
1940
1941
1942
def __xor__(self, other):  # type: (AddressSet) -> AddressSet
    """Computes the symmetric difference of this set and the given set."""
    return AddressSet(self.raw.xor(other.raw))

contains(addr)

Return True if the given address is in this range.

Source code in ghidralib.py
1915
1916
1917
def contains(self, addr):  # type: (Addr) -> bool
    """Return True if the given address is in this range."""
    return self.raw.contains(resolve(addr))

create(start, length) staticmethod

Create a new AddressSet with given address and length.

Source code in ghidralib.py
1897
1898
1899
1900
1901
@staticmethod
def create(start, length):  # type: (Addr, int) -> AddressSet
    """Create a new AddressSet with given address and length."""
    addr = resolve(start)
    return AddressSet(GhAddressSet(addr, addr.add(length - 1)))

empty() staticmethod

Create a new empty address set

Source code in ghidralib.py
1892
1893
1894
1895
@staticmethod
def empty():  # type: () -> AddressSet
    """Create a new empty address set"""
    return AddressSet(GhAddressSet())

highlight(color=HIGHLIGHT_COLOR)

Source code in ghidralib.py
1955
1956
1957
def highlight(self, color=HIGHLIGHT_COLOR):  # type: (Color) -> None
    service = self.__get_highlighter()
    service.setBackgroundColor(self.raw, color)

unhighlight()

Source code in ghidralib.py
1959
1960
1961
def unhighlight(self):  # type: (Color) -> None
    service = self.__get_highlighter()
    service.clearBackgroundColor(self.raw)

BasicBlock

Bases: AddressSet, BodyTrait

Wraps a Ghidra CodeBlock object

Source code in ghidralib.py
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
class BasicBlock(AddressSet, BodyTrait):
    """Wraps a Ghidra CodeBlock object"""

    @staticmethod
    def _model(model):  # type: (str) -> Callable[[JavaObject], JavaObject]
        if model == "basic":
            return BasicBlockModel
        elif model == "simple":
            return SimpleBlockModel
        else:
            raise ValueError("Unsupported model type: %s" % model)

    @staticmethod
    def get(
        raw_or_address, model="basic"
    ):  # type: (JavaObject|str|Addr, str) -> BasicBlock|None
        """Get a BasicBlock object containing the given address, or return None.

        This function is tolerant and will accept different types of arguments:
        * address as int
        * Address object
        * symbol as string (will be resolved)
        * BasicBlock object (wrapped or unwrapped)

        :param raw_or_address: find basicblock that contains the given address.
        :param model: Ghidra supports different types of basic block "models".
        Supported options are "basic" and "simple"."""

        if raw_or_address is None:
            return None
        if can_resolve(raw_or_address):
            block_model = BasicBlock._model(model)(Program.current())
            addr = try_resolve(raw_or_address)
            if addr is None:
                return None
            raw = block_model.getFirstCodeBlockContaining(addr, TaskMonitor.DUMMY)
            if raw is None:
                return None
        else:
            raw = raw_or_address
        return BasicBlock(raw)

    @staticmethod
    def all(model="basic"):  # type: (str) -> list[BasicBlock]
        """Get a list of all basic blocks in the program."""
        block_model = BasicBlock._model(model)(Program.current())
        return [BasicBlock(b) for b in block_model.getCodeBlocks(TaskMonitor.DUMMY)]

    @property
    def name(self):  # type: () -> str
        """Get the name of this basic block.

        Return the symbol at the start of this basic block, if any. Otherwise,
        return the address of the first instruction as string."""
        return self.raw.getName()

    @property
    def address(self):  # type: () -> int
        """Get the address of the first instruction in this basic block."""
        return self.start_address

    @property
    def start_address(self):  # type: () -> int
        """Get the address of the first instruction in this basic block."""
        return self.raw.getMinAddress().getOffset()

    @property
    def end_address(self):  # type: () -> int
        """Get the address of the last byte in this basic block.

        Note: this is not the address of the last instruction.
        Note: end_address - start_address is equal to length - 1. For example,
        for one-byte basic block, start_address == end_address."""
        return self.raw.getMaxAddress().getOffset()

    @property
    def length(self):  # type: () -> int
        """Get the length of this basic block in bytes."""
        return self.end_address - self.start_address + 1

    @property
    def bytes(self):  # type: () -> bytes
        """Get the bytes of this basic block.

        :return: bytes of this basic block."""
        return read_bytes(self.start_address, self.length)

    @property
    def instructions(self):  # type: () -> list[Instruction]
        """Get a list of instructions in this basic block."""
        result = []
        instruction = getInstructionAt(resolve(self.start_address))
        while instruction and instruction.getAddress().getOffset() <= self.end_address:
            result.append(Instruction(instruction))
            instruction = instruction.getNext()
        return result

    @property
    def pcode(self):  # type: () -> list[PcodeOp]
        """Get a list of Pcode operations that this basic block was parsed to"""
        result = []
        for instruction in self.instructions:
            result.extend(instruction.pcode)
        return result

    @property
    def destinations(self):  # type: () -> list[BasicBlock]
        """Get a list of basic blocks that this basic block jumps to"""
        raw_refs = collect_iterator(self.raw.getDestinations(TaskMonitor.DUMMY))
        return [BasicBlock(raw.getDestinationBlock()) for raw in raw_refs]

    @property
    def sources(self):  # type: () -> list[BasicBlock]
        """Get a list of basic blocks that jump to this basic block"""
        raw_refs = collect_iterator(self.raw.getSources(TaskMonitor.DUMMY))
        return [BasicBlock(raw.getSourceBlock()) for raw in raw_refs]

    @property
    def body(self):  # type: () -> AddressSet
        """Get the address set of this basic block

        Technically BasicBlock (CodeBlock) is is already an AddressSet,
        but I think this is a useful distinction to keep."""
        return AddressSet(self.raw)

    @property
    def flow_type(self):  # type: () -> FlowType
        """Get the flow type of this basic block.

        In other words, if any weird things with control flow are happening
        in this node."""
        return FlowType(self.raw.getFlowType())

    def __eq__(self, other):  # type: (object) -> bool
        """Compare two basic blocks for equality.

        Apparently Ghidra doesn't know how to do this"""
        if not isinstance(other, BasicBlock):
            return False
        # This is not fully correct, but more correct than the default.
        return self.address == other.address

address property

Get the address of the first instruction in this basic block.

body property

Get the address set of this basic block

Technically BasicBlock (CodeBlock) is is already an AddressSet, but I think this is a useful distinction to keep.

bytes property

Get the bytes of this basic block.

Returns:
  • bytes of this basic block.

destinations property

Get a list of basic blocks that this basic block jumps to

end_address property

Get the address of the last byte in this basic block.

Note: this is not the address of the last instruction. Note: end_address - start_address is equal to length - 1. For example, for one-byte basic block, start_address == end_address.

flow_type property

Get the flow type of this basic block.

In other words, if any weird things with control flow are happening in this node.

instructions property

Get a list of instructions in this basic block.

length property

Get the length of this basic block in bytes.

name property

Get the name of this basic block.

Return the symbol at the start of this basic block, if any. Otherwise, return the address of the first instruction as string.

pcode property

Get a list of Pcode operations that this basic block was parsed to

sources property

Get a list of basic blocks that jump to this basic block

start_address property

Get the address of the first instruction in this basic block.

__eq__(other)

Compare two basic blocks for equality.

Apparently Ghidra doesn't know how to do this

Source code in ghidralib.py
2097
2098
2099
2100
2101
2102
2103
2104
def __eq__(self, other):  # type: (object) -> bool
    """Compare two basic blocks for equality.

    Apparently Ghidra doesn't know how to do this"""
    if not isinstance(other, BasicBlock):
        return False
    # This is not fully correct, but more correct than the default.
    return self.address == other.address

all(model='basic') staticmethod

Get a list of all basic blocks in the program.

Source code in ghidralib.py
2006
2007
2008
2009
2010
@staticmethod
def all(model="basic"):  # type: (str) -> list[BasicBlock]
    """Get a list of all basic blocks in the program."""
    block_model = BasicBlock._model(model)(Program.current())
    return [BasicBlock(b) for b in block_model.getCodeBlocks(TaskMonitor.DUMMY)]

get(raw_or_address, model='basic') staticmethod

Get a BasicBlock object containing the given address, or return None.

This function is tolerant and will accept different types of arguments: * address as int * Address object * symbol as string (will be resolved) * BasicBlock object (wrapped or unwrapped)

Parameters:
  • raw_or_address

    find basicblock that contains the given address.

  • model

    Ghidra supports different types of basic block "models". Supported options are "basic" and "simple".

Source code in ghidralib.py
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
@staticmethod
def get(
    raw_or_address, model="basic"
):  # type: (JavaObject|str|Addr, str) -> BasicBlock|None
    """Get a BasicBlock object containing the given address, or return None.

    This function is tolerant and will accept different types of arguments:
    * address as int
    * Address object
    * symbol as string (will be resolved)
    * BasicBlock object (wrapped or unwrapped)

    :param raw_or_address: find basicblock that contains the given address.
    :param model: Ghidra supports different types of basic block "models".
    Supported options are "basic" and "simple"."""

    if raw_or_address is None:
        return None
    if can_resolve(raw_or_address):
        block_model = BasicBlock._model(model)(Program.current())
        addr = try_resolve(raw_or_address)
        if addr is None:
            return None
        raw = block_model.getFirstCodeBlockContaining(addr, TaskMonitor.DUMMY)
        if raw is None:
            return None
    else:
        raw = raw_or_address
    return BasicBlock(raw)

BlockGraph

Bases: PcodeBlock

Source code in ghidralib.py
1221
1222
1223
1224
class BlockGraph(PcodeBlock):
    @property
    def blocks(self):  # type: () -> list[PcodeBlock]
        return [_pcode_node(self.raw.getBlock(i)) for i in range(self.raw.getSize())]

blocks property

BodyTrait

A trait for objects that have a body.

It provides generic methods that work with anything that has a body (an assigned set of addresses in the program), such as highlighting.

Source code in ghidralib.py
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
class BodyTrait:
    """A trait for objects that have a body.

    It provides generic methods that work with anything that has a body
    (an assigned set of addresses in the program), such as highlighting."""

    @property
    @abstractmethod
    def body(self):  # type: () -> AddressSet
        """The body of this object"""

    def highlight(self, color=HIGHLIGHT_COLOR):  # type: (Color) -> None
        """Highlight this instruction in the listing."""
        self.body.highlight(color)

    def unhighlight(self):  # type: () -> None
        """Clear the highlight from this instruction."""
        self.body.unhighlight()

body abstractmethod property

The body of this object

highlight(color=HIGHLIGHT_COLOR)

Highlight this instruction in the listing.

Source code in ghidralib.py
732
733
734
def highlight(self, color=HIGHLIGHT_COLOR):  # type: (Color) -> None
    """Highlight this instruction in the listing."""
    self.body.highlight(color)

unhighlight()

Clear the highlight from this instruction.

Source code in ghidralib.py
736
737
738
def unhighlight(self):  # type: () -> None
    """Clear the highlight from this instruction."""
    self.body.unhighlight()

ClangTokenGroup

Bases: GhidraWrapper

Represents a group of clang tokens from a decompiler.

Warning: Currently this class is experimental, and should not be relied upon, except to get the Java object (with .raw) or maybe dump (.dump()).

Source code in ghidralib.py
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
class ClangTokenGroup(GhidraWrapper):
    """Represents a group of clang tokens from a decompiler.

    Warning: Currently this class is experimental, and should not be relied upon,
    except to get the Java object (with .raw) or maybe dump (.dump())."""

    def _cleanup(self, token):  # type: (JavaObject) -> JavaObject
        new = GhClangTokenGroup(token.Parent())
        for token in list(token.iterator()):
            if isinstance(token, (ClangCommentToken, ClangBreak)):
                continue
            if isinstance(token, ClangSyntaxToken):
                if not token.getText() or token.getText().isspace():
                    continue
            if isinstance(token, GhClangTokenGroup):
                token = self._cleanup(token)
            new.AddTokenGroup(token)
        return new

    @property
    def cleaned(self):  # type: () -> ClangTokenGroup
        """Remove all whitespace and comments from this token group, recursively."""
        return ClangTokenGroup(self._cleanup(self.raw))

    def _dump(self, token, indent=0):  # type: (JavaObject, int) -> None
        if isinstance(token, GhClangTokenGroup):
            print("{}[group]".format(indent * "  ", token.__class__.__name__))
            for child in token.iterator():
                self._dump(child, indent + 1)
        else:
            print("{}{} ({})".format(indent * "  ", token, token.__class__.__name__))

    def dump(self):  # type: () -> None
        self._dump(self.raw)

cleaned property

Remove all whitespace and comments from this token group, recursively.

dump()

Source code in ghidralib.py
2410
2411
def dump(self):  # type: () -> None
    self._dump(self.raw)

DataType

Bases: GhidraWrapper

Source code in ghidralib.py
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
class DataType(GhidraWrapper):
    @staticmethod
    def get(name_or_raw):  # type: (DataT) -> DataType|None
        """Gets a data type by name, or returns None if not found.

        Warning: this method is relatively slow, since it scans
        all data types in all data type managers.

            >>> DataType.get("int")
            int

        :param name_or_raw: the name of the data type
        :return: the data type, or None if not found"""
        if not isinstance(name_or_raw, Str):
            return DataType(name_or_raw)

        for datatype in DataType.all():
            if datatype.name == name_or_raw:
                return DataType(datatype)
        return None

    @staticmethod
    def all(only_local=False):  # type: (bool) -> list[DataType]
        """Get all data types

        :param only_local: if True, return only local data types. Otherwise,
          will scan all data types in all data type managers."""
        datatypes = list(Program.current().getDataTypeManager().getAllDataTypes())
        if not only_local:
            managers = (
                getState()
                .getTool()
                .getService(DataTypeManagerService)
                .getDataTypeManagers()
            )
            for manager in managers:
                for datatype in manager.getAllDataTypes():
                    datatypes.append(datatype)
        return [DataType(raw) for raw in datatypes]

    @property
    def name(self):  # type: () -> str
        """Get a name of this data type

            >>> DataType('int').name
            'int'
        .
        """
        return self.raw.getName()

    def get_name(self, value):  # type: (int) -> str
        """If this data type is an enum, get the name of the value.

        :param value: the value to get the name of"""
        return self.raw.getName(value)

    def length(self):  # type: () -> int
        """Get the length of this data type in bytes

            >>> DataType('int').length()
            4
        .
        """
        return self.raw.getLength()

    __len__ = length

    @staticmethod
    def from_c(c_code, insert=True):  # type: (str, bool) -> DataType
        """Parse C structure definition and return the parsed DataType.

        If insert (true by default), add it to current program.
        Example of a valid c_code is `typedef void* HINTERNET;`

            >>> DataType.from_c('typedef void* HINTERNET;')
            HINTERNET
            >>> DataType.from_c("struct test { short a; short b; short c;};")
            pack()
            Structure test {
            0   short   2   a   ""
            2   short   2   b   ""
            4   short   2   c   ""
            }
            Length: 6 Alignment: 2

        :param c_code: the C structure definition
        :param insert: if True, add the data type to the current program
        """
        dtm = Program.current().getDataTypeManager()
        parser = CParser(dtm)

        new_dt = parser.parse(c_code)

        if insert:
            transaction = dtm.startTransaction("Adding new data")
            dtm.addDataType(new_dt, None)
            dtm.endTransaction(transaction, True)

        return new_dt

__len__ = length class-attribute instance-attribute

name property

Get a name of this data type

>>> DataType('int').name
'int'

.

all(only_local=False) staticmethod

Get all data types

Parameters:
  • only_local

    if True, return only local data types. Otherwise, will scan all data types in all data type managers.

Source code in ghidralib.py
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
@staticmethod
def all(only_local=False):  # type: (bool) -> list[DataType]
    """Get all data types

    :param only_local: if True, return only local data types. Otherwise,
      will scan all data types in all data type managers."""
    datatypes = list(Program.current().getDataTypeManager().getAllDataTypes())
    if not only_local:
        managers = (
            getState()
            .getTool()
            .getService(DataTypeManagerService)
            .getDataTypeManagers()
        )
        for manager in managers:
            for datatype in manager.getAllDataTypes():
                datatypes.append(datatype)
    return [DataType(raw) for raw in datatypes]

from_c(c_code, insert=True) staticmethod

Parse C structure definition and return the parsed DataType.

If insert (true by default), add it to current program. Example of a valid c_code is typedef void* HINTERNET;

>>> DataType.from_c('typedef void* HINTERNET;')
HINTERNET
>>> DataType.from_c("struct test { short a; short b; short c;};")
pack()
Structure test {
0   short   2   a   ""
2   short   2   b   ""
4   short   2   c   ""
}
Length: 6 Alignment: 2
Parameters:
  • c_code

    the C structure definition

  • insert

    if True, add the data type to the current program

Source code in ghidralib.py
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
@staticmethod
def from_c(c_code, insert=True):  # type: (str, bool) -> DataType
    """Parse C structure definition and return the parsed DataType.

    If insert (true by default), add it to current program.
    Example of a valid c_code is `typedef void* HINTERNET;`

        >>> DataType.from_c('typedef void* HINTERNET;')
        HINTERNET
        >>> DataType.from_c("struct test { short a; short b; short c;};")
        pack()
        Structure test {
        0   short   2   a   ""
        2   short   2   b   ""
        4   short   2   c   ""
        }
        Length: 6 Alignment: 2

    :param c_code: the C structure definition
    :param insert: if True, add the data type to the current program
    """
    dtm = Program.current().getDataTypeManager()
    parser = CParser(dtm)

    new_dt = parser.parse(c_code)

    if insert:
        transaction = dtm.startTransaction("Adding new data")
        dtm.addDataType(new_dt, None)
        dtm.endTransaction(transaction, True)

    return new_dt

get(name_or_raw) staticmethod

Gets a data type by name, or returns None if not found.

Warning: this method is relatively slow, since it scans all data types in all data type managers.

>>> DataType.get("int")
int
Parameters:
  • name_or_raw

    the name of the data type

Returns:
  • the data type, or None if not found

Source code in ghidralib.py
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
@staticmethod
def get(name_or_raw):  # type: (DataT) -> DataType|None
    """Gets a data type by name, or returns None if not found.

    Warning: this method is relatively slow, since it scans
    all data types in all data type managers.

        >>> DataType.get("int")
        int

    :param name_or_raw: the name of the data type
    :return: the data type, or None if not found"""
    if not isinstance(name_or_raw, Str):
        return DataType(name_or_raw)

    for datatype in DataType.all():
        if datatype.name == name_or_raw:
            return DataType(datatype)
    return None

get_name(value)

If this data type is an enum, get the name of the value.

Parameters:
  • value

    the value to get the name of

Source code in ghidralib.py
3117
3118
3119
3120
3121
def get_name(self, value):  # type: (int) -> str
    """If this data type is an enum, get the name of the value.

    :param value: the value to get the name of"""
    return self.raw.getName(value)

length()

Get the length of this data type in bytes

>>> DataType('int').length()
4

.

Source code in ghidralib.py
3123
3124
3125
3126
3127
3128
3129
3130
def length(self):  # type: () -> int
    """Get the length of this data type in bytes

        >>> DataType('int').length()
        4
    .
    """
    return self.raw.getLength()

Emulator

Bases: GhidraWrapper

Wraps a Ghidra EmulatorHelper object.

Source code in ghidralib.py
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
class Emulator(GhidraWrapper):
    """Wraps a Ghidra EmulatorHelper object."""

    def __init__(self):  # type: () -> None
        """Create a new Emulator object."""
        raw = EmulatorHelper(Program.current())
        GhidraWrapper.__init__(self, raw)

        # Use max_addr/2-0x8000 as stack pointer - this is 0x7fff8000 on 32-bit CPU.
        max_pointer = toAddr(0).getAddressSpace().getMaxAddress().getOffset()
        max_pointer = max_pointer % 2**64  # Java signed ints everywhere strike again.
        stack_off = ((max_pointer + 1) >> 1) - 0x8000
        self.raw.writeRegister(self.raw.getStackPointerRegister(), stack_off)

        # TODO: add a simple allocation manager
        self._hooks = {}  # type: dict[int, Callable[[Emulator], str|None]]

    def add_hook(
        self, address, hook
    ):  # type: (Addr, Callable[[Emulator], str|None]) -> None
        """Add a hook at a specified address.

        Hook is a function that gets emulator as parameter. It can return one of:

        * 'continue' or None, to continue execution normally
        * 'break' to stop execution
        * 'skip' to skip the next instruction

        Note: multiple hooks at the same address are not currently supported."""
        addr = resolve(address).getOffset()
        if addr in self._hooks:
            raise ValueError("Multiple hooks at the same address are not supported")
        self._hooks[addr] = hook

    def has_hook_at(self, address):  # type: (Addr) -> bool
        addr = resolve(address).getOffset()
        return addr in self._hooks

    def delete_hook_at(self, address):  # type: (Addr) -> None
        addr = resolve(address).getOffset()
        del self._hooks[addr]

    @property
    def pc(self):  # type: () -> int
        """Get the program counter of the emulated program."""
        return self.raw.getExecutionAddress().getOffset()

    @pc.setter
    def pc(self, address):  # type: (Addr) -> None
        """Set the program counter of the emulated program."""
        self.set_pc(address)

    def set_pc(self, address):  # type: (Addr) -> None
        """Set the program counter of the emulated program."""
        pc = self.raw.getPCRegister()
        self.raw.writeRegister(pc, resolve(address).getOffset())

    @property
    def sp_register(self):  # type: () -> str
        """Get the stack pointer register name for the emulated architecture."""
        return self.raw.getStackPointerRegister().getName()

    @property
    def sp(self):  # type: () -> int
        """Get the current stack pointer register value."""
        return self.read_register(self.sp_register)

    @sp.setter
    def sp(self, value):  # type: (Addr) -> None
        """Set the current stack pointer register value.

        :param value: new stack pointer value."""
        self.set_sp(value)

    def set_sp(self, value):  # type: (Addr) -> None
        """Set the current stack pointer register value.

        :param value: new stack pointer value."""
        self.write_register(self.sp_register, resolve(value).getOffset())

    def __getitem__(self, reg):  # type: (Reg|int) -> int
        """Read the register of the emulated program.

            >>> emulator.write_register("eax", 1337)
            >>> emulator["eax"]
            1337

        :param reg: the register or address to read from"""
        return self.read_register(reg)

    def __setitem__(self, reg, value):  # type: (Reg, int) -> None
        """Write to the register of the emulated program.

            >>> emulator["eax"] = 1234
            >>> emulator.read_register("eax")
            1337

        :param reg: the register to write to
        :param value: the value to write"""
        self.write_register(reg, value)

    def read_register(self, reg):  # type: (Reg) -> int
        """Read from the register of the emulated program.

            >>> emulator.write_register("eax", 1337)
            >>> emulator.read_register("eax")
            1337

        :param reg: the register to read from."""
        return _python_int(self.raw.readRegister(reg))

    def read_bytes(self, address, length):  # type: (Addr, int) -> bytes
        """Read `length` bytes at `address` from the emulated program.

            >>> emulator.write_bytes(0x1000, "1")
            >>> emulator.read_bytes(0x1000, 1)
            '1'

        :param address: the address to read from
        :param length: the length to read"""
        bytelist = self.raw.readMemory(resolve(address), length)
        return _bytes_from_bytelist(bytelist)

    def read_u8(self, address):  # type: (Addr) -> int
        """Read a byte from the emulated program.

            >>> emulator.write_u8(0x1000, 13)
            >>> emulator.read_u8(0x1000)
            13

        :param address: the address to read from"""
        return from_bytes(self.read_bytes(address, 1))

    def read_u16(self, address):  # type: (Addr) -> int
        """Read a 16bit unsigned integer from the emulated program.

            >>> emulator.write_u16(0x1000, 123)
            >>> emulator.read_u16(0x1000)
            123

        :param address: the address to read from"""
        return from_bytes(self.read_bytes(address, 2))

    def read_u32(self, address):  # type: (Addr) -> int
        """Read a 32bit unsigned integer from the emulated program.

            >>> emulator.write_u32(0x1000, 123)
            >>> emulator.read_u32(0x1000)
            123

        :param address: the address to read from"""
        return from_bytes(self.read_bytes(address, 4))

    def read_u64(self, address):  # type: (Addr) -> int
        """Read a 64bit unsigned integer from the emulated program.

            >>> emulator.write_u64(0x1000, 123)
            >>> emulator.read_u64(0x1000)
            123

        :param address: the address to read from"""
        return from_bytes(self.read_bytes(address, 8))

    def read_cstring(self, address):  # type: (Addr) -> str
        """Read a null-terminated string from the emulated program.

        This function reads bytes until a nullbyte is encountered.

            >>> emu.read_cstring(0x1000)
            'Hello, world!'

        :param address: address from which to start reading."""
        addr = resolve(address)
        string = ""
        while True:
            c = self.read_u8(addr)
            if c == 0:
                break
            string += chr(c)
            addr = addr.add(1)
        return string

    def read_unicode(self, address):  # type: (Addr) -> str
        """Read a null-terminated utf-16 string from the emulated program.

        This function reads bytes until a null character is encountered.

            >>> emu.read_unicode(0x1000)
            'Hello, world!'

        :param address: address from which to start reading."""
        addr = resolve(address)
        string = ""
        while True:
            c = self.read_u16(addr)
            if c == 0:
                break
            string += chr(c)
            addr = addr.add(2)
        return string

    def read_varnode(self, varnode):  # type: (Varnode) -> int
        """Read from the varnode from the emulated program.

        This method can't read hash varnodes.

            >>> fnc = Function("AddNumbers")
            >>> emu = Emulator()
            >>> emu.write_varnode(fnc.parameters[0].varnode, 2)
            >>> emu.write_varnode(fnc.parameters[1].varnode, 2)
            >>> emu.emulate(fnc.entrypoint, stop_when=lambda emu: emu.pc not in fnc.body)
            >>> emu.read_varnode(func.return_variable.varnode)
            4

        :param varnode: the varnode to read from."""
        varnode = Varnode(varnode)
        if varnode.is_constant:
            return varnode.offset
        elif varnode.is_address:
            rawnum = self.read_bytes(varnode.offset, varnode.size)
            return from_bytes(rawnum)
        elif varnode.is_unique:
            space = Program.current().getAddressFactory().getUniqueSpace()
            offset = space.getAddress(varnode.offset)
            rawnum = self.read_bytes(offset, varnode.size)
            return from_bytes(rawnum)
        elif varnode.is_stack:
            return self.raw.readStackValue(varnode.offset, varnode.size, False)
        elif varnode.is_register:
            language = Program.current().getLanguage()
            reg = language.getRegister(varnode.raw.getAddress(), varnode.size)
            return self.read_register(reg)
        raise RuntimeError("Unsupported varnode type")

    def write_register(self, reg, value):  # type: (Reg, int) -> None
        """Write to the register of the emulated program.

            >>> emulator.write_register("eax", 1)
            >>> emulator.read_register("eax")
            1

        :param reg: the register to write to
        :param value: the value to write"""
        self.raw.writeRegister(reg, value)

    def write_bytes(self, address, value):  # type: (Addr, bytes) -> None
        """Write to the memory of the emulated program.

            >>> emulator.write_bytes(0x1000, "1")
            >>> emulator.read_bytes(0x1000, 1)
            '1'

        :param address: the address to write to
        :param value: the value to write"""
        self.raw.writeMemory(resolve(address), value)

    def write_u8(self, address, value):  # type: (Addr, int) -> None
        """Write a byte to the emulated program.

            >>> emulator.write_u8(0x1000, 13)
            >>> emulator.read_u8(0x1000)
            13

        :param address: the address to write to"""
        assert 0 <= value < 2**8, "value out of range"
        self.write_bytes(address, to_bytes(value, 1))

    def write_u16(self, address, value):  # type: (Addr, int) -> None
        """Write a 16bit unsigned integer to the emulated program.

            >>> emulator.write_u16(0x1000, 13)
            >>> emulator.read_u16(0x1000)
            13

        :param address: the address to write to"""
        assert 0 <= value < 2**16, "value out of range"
        self.write_bytes(address, to_bytes(value, 2))

    def write_u32(self, address, value):  # type: (Addr, int) -> None
        """Write a 32bit unsigned integer to the emulated program.

            >>> emulator.write_u32(0x1000, 13)
            >>> emulator.read_u32(0x1000)
            13

        :param address: the address to write to"""
        assert 0 <= value < 2**32, "value out of range"
        self.write_bytes(address, to_bytes(value, 4))

    def write_u64(self, address, value):  # type: (Addr, int) -> None
        """Write a 64bit unsigned integer to the emulated program.

            >>> emulator.write_u64(0x1000, 13)
            >>> emulator.read_u64(0x1000)
            13

        :param address: the address to write to"""
        assert 0 <= value < 2**64, "value out of range"
        self.write_bytes(address, to_bytes(value, 8))

    def write_varnode(self, varnode, value):  # type: (Varnode, int) -> None
        """Set a varnode value in the emulated context.

        This method can't set hash and constant varnodes.

            >>> fnc = Function("AddNumbers")
            >>> emu = Emulator()
            >>> emu.write_varnode(fnc.parameters[0].varnode, 2)
            >>> emu.write_varnode(fnc.parameters[1].varnode, 2)
            >>> emu.emulate(fnc.entrypoint, stop_when=lambda emu: emu.pc not in fnc.body)
            >>> emu.read_varnode(func.return_variable.varnode)
            4

        :param varnode: the varnode to read from."""
        varnode = Varnode(varnode)
        if varnode.is_constant:
            raise ValueError("Can't set value of a constant varnodes")
        elif varnode.is_address:
            self.write_bytes(varnode.offset, to_bytes(value, varnode.size))
        elif varnode.is_unique:
            space = Program.current().getAddressFactory().getUniqueSpace()
            offset = space.getAddress(varnode.offset)
            self.write_bytes(offset, to_bytes(value, varnode.size))
        elif varnode.is_stack:
            self.raw.writeStackValue(varnode.offset, varnode.size, value)
        elif varnode.is_register:
            language = Program.current().getLanguage()
            reg = language.getRegister(varnode.raw.getAddress(), varnode.size)
            self.raw.writeRegister(reg, value)
        else:
            raise RuntimeError("Unsupported varnode type")

    def __run_with_hooks(self):  # type: () -> bool
        """Run the Ghidra emulator, and transparently handle all hooks.

        :return: True if emulator stopped at a breakpoint, or
          hook asked emulator to stop (by returning break)."""

        while not getMonitor().isCancelled():
            is_breakpoint = self.raw.run(getMonitor())
            if self.pc not in self._hooks:
                return is_breakpoint

            result = self._hooks[self.pc](self)
            if self.__handle_hook_result(result):
                return True

        return False

    def add_breakpoint(self, address):  # type: (Addr) -> None
        """Add a breakpoint at the given address.

        :param address: the address to break on"""
        self.raw.setBreakpoint(resolve(address))

    def clear_breakpoint(self, address):  # type: (Addr) -> None
        """Clear a breakpoint at the given address.

        :param address: the address to clear breakpoint from"""
        self.raw.clearBreakpoint(resolve(address))

    def emulate_fast(self, start, ends):  # type: (Addr, Addr|list[Addr]) -> None
        """Emulate from start to end address, using Ghidra for fast emulation.

        The main loop of this function is in Java, which makes it faster, but makes
        some features (like callbacks) impossible. This function stops on error,
        when PC reaches one of the ends, and will also call hooks.

        This method will set a breakpoint at the end address, and clear it after
        the emulation is done.

            >>> emulator.write_bytes(0x2000, "1")
            >>> emulator.emulate_fast(0x1000, 0x1005)
            >>> emulator.read_bytes(0x2000, 1)
            '0'

        :param start: the start address to emulate
        :param ends: one or many end address"""
        self.set_pc(start)

        if not isinstance(ends, (list, tuple)):
            ends = [ends]

        for end in ends:
            self.add_breakpoint(end)

        is_breakpoint = self.__run_with_hooks()

        for end in ends:
            self.clear_breakpoint(end)

        if not is_breakpoint:
            err = self.raw.getLastError()
            raise RuntimeError("Error when running: {}".format(err))

    def __handle_hook_result(self, result):  # type: (str|None) -> bool
        """Handle a hook return value and return True if emulation should stop."""
        if result is None or result == "continue":
            return False
        elif result == "skip":
            self.pc = Instruction(self.pc).next.address
            return False
        elif result == "break":
            return True
        else:
            raise RuntimeError("Invalid hook return value: {}".format(result))

    def single_step(self):  # type: () -> bool
        """Do a single emulation step. This will step into calls.

        Note: This method *will* call hooks.

        :return: True if the emulation should be stopped, False otherwise."""
        success = self.raw.step(getMonitor())
        if not success:
            err = self.raw.getLastError()
            raise RuntimeError("Error at {}: {}".format(self.pc, err))

        if self.pc in self._hooks:
            result = self._hooks[self.pc](self)
            return self.__handle_hook_result(result)
        if self.is_at_breakpoint:
            return True
        return False

    @staticmethod
    def new(
        start,
        ends=[],
        callback=lambda emu: None,
        stop_when=lambda emu: False,
        maxsteps=2**48,
    ):  # type: (Addr, Addr|list[Addr], Callable[[Emulator], str|None], Callable[[Emulator], bool], int) -> Emulator
        """Emulate from start to end address, with callback for each executed address.

            >>> Emulator.new("main", maxsteps=100)["EAX"]
            128

        This function is a convenience wrapper around emulate and can be always
        replaced by three lines of code. The above is equivalent to:

            >>> emu = Emulator()
            >>> emu.emulate("main", maxsteps=100)
            >>> emu["EAX"]
            128

        This function may be used for quickly doing one-off emulations.

        See `emulate` documentation for info about this method parameters."""
        emu = Emulator()
        emu.emulate(start, ends, callback, stop_when, maxsteps)
        return emu

    def emulate(
        self,
        start,
        ends=[],
        callback=lambda emu: None,
        stop_when=lambda emu: False,
        maxsteps=2**48,
    ):  # type: (Addr, Addr|list[Addr], Callable[[Emulator], str|None], Callable[[Emulator], bool], int) -> None
        """Emulate from start to end address, with callback for each executed address.

            >>> emu = Emulator()
            >>> def callback(emu):
            >>>     print("executing {:x}'.format(emu.pc))
            >>> emu.emulate(Function("main").entrypoint, callback=callback, maxsteps=3)
            SUB ESP,0x2d4
            PUSH EBX
            PUSH EBP

        Callback should return one of:

        * 'continue' or None, to continue execution normally
        * 'break' to stop execution
        * 'skip' to skip the next instruction
        * 'retry' like continue, but call the callback again (useful after pc change)
        * 'continue_then_break' to execute one last instruction before stopping

        Returning another value will cause an exception Callback is executed before
        stop_when condition is checked.

        This method is very flexible, but because of that it may be slower than
        pure Ghidra implementation. Consider .emulate_fast() when this method is too
        slow for you.

        :param start: the start address to emulate
        :param end: the end address to emulate
        :param callback: the callback to call before each executed instruction.
          Return one of the predefined constants here (see the docs for more info).
        :param stop_when: the callback to call before each executed instruction.
          Return True here to stop emulation.
        :param maxsteps: the maximum number of steps to execute"""
        self.set_pc(start)

        if not isinstance(ends, (list, tuple)):
            ends = [ends]
        ends = [resolve(e).getOffset() for e in ends]

        while maxsteps > 0:
            maxsteps -= 1
            if self.pc in ends:
                break

            command = callback(self)
            if command == "retry":
                continue
            elif command == "continue_then_break":
                maxsteps = 0
            elif self.__handle_hook_result(command):
                return

            if stop_when(self):
                return

            if self.single_step():
                return

    @property
    def is_at_breakpoint(self):  # type: () -> bool
        """Check if the emulator is at a breakpoint"""
        return self.raw.getEmulator().isAtBreakpoint()

    # Basic unicorn compatibility, because why not
    # You may prefer these aliases if you already know Unicorn API.
    reg_write = write_register
    reg_read = read_register
    mem_write = write_bytes
    mem_read = read_bytes
    mem_map = (
        lambda _1, _2, _3: None
    )  # This is a noop - all memory is already available.
    emu_start = lambda self, begin, until: self.emulate(begin, until)

emu_start = lambda self, begin, until: self.emulate(begin, until) class-attribute instance-attribute

is_at_breakpoint property

Check if the emulator is at a breakpoint

mem_map = lambda _1, _2, _3: None class-attribute instance-attribute

mem_read = read_bytes class-attribute instance-attribute

mem_write = write_bytes class-attribute instance-attribute

pc property writable

Get the program counter of the emulated program.

reg_read = read_register class-attribute instance-attribute

reg_write = write_register class-attribute instance-attribute

sp property writable

Get the current stack pointer register value.

sp_register property

Get the stack pointer register name for the emulated architecture.

__getitem__(reg)

Read the register of the emulated program.

>>> emulator.write_register("eax", 1337)
>>> emulator["eax"]
1337
Parameters:
  • reg

    the register or address to read from

Source code in ghidralib.py
3248
3249
3250
3251
3252
3253
3254
3255
3256
def __getitem__(self, reg):  # type: (Reg|int) -> int
    """Read the register of the emulated program.

        >>> emulator.write_register("eax", 1337)
        >>> emulator["eax"]
        1337

    :param reg: the register or address to read from"""
    return self.read_register(reg)

__handle_hook_result(result)

Handle a hook return value and return True if emulation should stop.

Source code in ghidralib.py
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
def __handle_hook_result(self, result):  # type: (str|None) -> bool
    """Handle a hook return value and return True if emulation should stop."""
    if result is None or result == "continue":
        return False
    elif result == "skip":
        self.pc = Instruction(self.pc).next.address
        return False
    elif result == "break":
        return True
    else:
        raise RuntimeError("Invalid hook return value: {}".format(result))

__init__()

Create a new Emulator object.

Source code in ghidralib.py
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
def __init__(self):  # type: () -> None
    """Create a new Emulator object."""
    raw = EmulatorHelper(Program.current())
    GhidraWrapper.__init__(self, raw)

    # Use max_addr/2-0x8000 as stack pointer - this is 0x7fff8000 on 32-bit CPU.
    max_pointer = toAddr(0).getAddressSpace().getMaxAddress().getOffset()
    max_pointer = max_pointer % 2**64  # Java signed ints everywhere strike again.
    stack_off = ((max_pointer + 1) >> 1) - 0x8000
    self.raw.writeRegister(self.raw.getStackPointerRegister(), stack_off)

    # TODO: add a simple allocation manager
    self._hooks = {}  # type: dict[int, Callable[[Emulator], str|None]]

__run_with_hooks()

Run the Ghidra emulator, and transparently handle all hooks.

Returns:
  • True if emulator stopped at a breakpoint, or hook asked emulator to stop (by returning break).

Source code in ghidralib.py
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
def __run_with_hooks(self):  # type: () -> bool
    """Run the Ghidra emulator, and transparently handle all hooks.

    :return: True if emulator stopped at a breakpoint, or
      hook asked emulator to stop (by returning break)."""

    while not getMonitor().isCancelled():
        is_breakpoint = self.raw.run(getMonitor())
        if self.pc not in self._hooks:
            return is_breakpoint

        result = self._hooks[self.pc](self)
        if self.__handle_hook_result(result):
            return True

    return False

__setitem__(reg, value)

Write to the register of the emulated program.

>>> emulator["eax"] = 1234
>>> emulator.read_register("eax")
1337
Parameters:
  • reg

    the register to write to

  • value

    the value to write

Source code in ghidralib.py
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
def __setitem__(self, reg, value):  # type: (Reg, int) -> None
    """Write to the register of the emulated program.

        >>> emulator["eax"] = 1234
        >>> emulator.read_register("eax")
        1337

    :param reg: the register to write to
    :param value: the value to write"""
    self.write_register(reg, value)

add_breakpoint(address)

Add a breakpoint at the given address.

Parameters:
  • address

    the address to break on

Source code in ghidralib.py
3517
3518
3519
3520
3521
def add_breakpoint(self, address):  # type: (Addr) -> None
    """Add a breakpoint at the given address.

    :param address: the address to break on"""
    self.raw.setBreakpoint(resolve(address))

add_hook(address, hook)

Add a hook at a specified address.

Hook is a function that gets emulator as parameter. It can return one of:

  • 'continue' or None, to continue execution normally
  • 'break' to stop execution
  • 'skip' to skip the next instruction

Note: multiple hooks at the same address are not currently supported.

Source code in ghidralib.py
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
def add_hook(
    self, address, hook
):  # type: (Addr, Callable[[Emulator], str|None]) -> None
    """Add a hook at a specified address.

    Hook is a function that gets emulator as parameter. It can return one of:

    * 'continue' or None, to continue execution normally
    * 'break' to stop execution
    * 'skip' to skip the next instruction

    Note: multiple hooks at the same address are not currently supported."""
    addr = resolve(address).getOffset()
    if addr in self._hooks:
        raise ValueError("Multiple hooks at the same address are not supported")
    self._hooks[addr] = hook

clear_breakpoint(address)

Clear a breakpoint at the given address.

Parameters:
  • address

    the address to clear breakpoint from

Source code in ghidralib.py
3523
3524
3525
3526
3527
def clear_breakpoint(self, address):  # type: (Addr) -> None
    """Clear a breakpoint at the given address.

    :param address: the address to clear breakpoint from"""
    self.raw.clearBreakpoint(resolve(address))

delete_hook_at(address)

Source code in ghidralib.py
3206
3207
3208
def delete_hook_at(self, address):  # type: (Addr) -> None
    addr = resolve(address).getOffset()
    del self._hooks[addr]

emulate(start, ends=[], callback=lambda emu: None, stop_when=lambda emu: False, maxsteps=2 ** 48)

Emulate from start to end address, with callback for each executed address.

>>> emu = Emulator()
>>> def callback(emu):
>>>     print("executing {:x}'.format(emu.pc))
>>> emu.emulate(Function("main").entrypoint, callback=callback, maxsteps=3)
SUB ESP,0x2d4
PUSH EBX
PUSH EBP

Callback should return one of:

  • 'continue' or None, to continue execution normally
  • 'break' to stop execution
  • 'skip' to skip the next instruction
  • 'retry' like continue, but call the callback again (useful after pc change)
  • 'continue_then_break' to execute one last instruction before stopping

Returning another value will cause an exception Callback is executed before stop_when condition is checked.

This method is very flexible, but because of that it may be slower than pure Ghidra implementation. Consider .emulate_fast() when this method is too slow for you.

Parameters:
  • start

    the start address to emulate

  • end

    the end address to emulate

  • callback

    the callback to call before each executed instruction. Return one of the predefined constants here (see the docs for more info).

  • stop_when

    the callback to call before each executed instruction. Return True here to stop emulation.

  • maxsteps

    the maximum number of steps to execute

Source code in ghidralib.py
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
def emulate(
    self,
    start,
    ends=[],
    callback=lambda emu: None,
    stop_when=lambda emu: False,
    maxsteps=2**48,
):  # type: (Addr, Addr|list[Addr], Callable[[Emulator], str|None], Callable[[Emulator], bool], int) -> None
    """Emulate from start to end address, with callback for each executed address.

        >>> emu = Emulator()
        >>> def callback(emu):
        >>>     print("executing {:x}'.format(emu.pc))
        >>> emu.emulate(Function("main").entrypoint, callback=callback, maxsteps=3)
        SUB ESP,0x2d4
        PUSH EBX
        PUSH EBP

    Callback should return one of:

    * 'continue' or None, to continue execution normally
    * 'break' to stop execution
    * 'skip' to skip the next instruction
    * 'retry' like continue, but call the callback again (useful after pc change)
    * 'continue_then_break' to execute one last instruction before stopping

    Returning another value will cause an exception Callback is executed before
    stop_when condition is checked.

    This method is very flexible, but because of that it may be slower than
    pure Ghidra implementation. Consider .emulate_fast() when this method is too
    slow for you.

    :param start: the start address to emulate
    :param end: the end address to emulate
    :param callback: the callback to call before each executed instruction.
      Return one of the predefined constants here (see the docs for more info).
    :param stop_when: the callback to call before each executed instruction.
      Return True here to stop emulation.
    :param maxsteps: the maximum number of steps to execute"""
    self.set_pc(start)

    if not isinstance(ends, (list, tuple)):
        ends = [ends]
    ends = [resolve(e).getOffset() for e in ends]

    while maxsteps > 0:
        maxsteps -= 1
        if self.pc in ends:
            break

        command = callback(self)
        if command == "retry":
            continue
        elif command == "continue_then_break":
            maxsteps = 0
        elif self.__handle_hook_result(command):
            return

        if stop_when(self):
            return

        if self.single_step():
            return

emulate_fast(start, ends)

Emulate from start to end address, using Ghidra for fast emulation.

The main loop of this function is in Java, which makes it faster, but makes some features (like callbacks) impossible. This function stops on error, when PC reaches one of the ends, and will also call hooks.

This method will set a breakpoint at the end address, and clear it after the emulation is done.

>>> emulator.write_bytes(0x2000, "1")
>>> emulator.emulate_fast(0x1000, 0x1005)
>>> emulator.read_bytes(0x2000, 1)
'0'
Parameters:
  • start

    the start address to emulate

  • ends

    one or many end address

Source code in ghidralib.py
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
def emulate_fast(self, start, ends):  # type: (Addr, Addr|list[Addr]) -> None
    """Emulate from start to end address, using Ghidra for fast emulation.

    The main loop of this function is in Java, which makes it faster, but makes
    some features (like callbacks) impossible. This function stops on error,
    when PC reaches one of the ends, and will also call hooks.

    This method will set a breakpoint at the end address, and clear it after
    the emulation is done.

        >>> emulator.write_bytes(0x2000, "1")
        >>> emulator.emulate_fast(0x1000, 0x1005)
        >>> emulator.read_bytes(0x2000, 1)
        '0'

    :param start: the start address to emulate
    :param ends: one or many end address"""
    self.set_pc(start)

    if not isinstance(ends, (list, tuple)):
        ends = [ends]

    for end in ends:
        self.add_breakpoint(end)

    is_breakpoint = self.__run_with_hooks()

    for end in ends:
        self.clear_breakpoint(end)

    if not is_breakpoint:
        err = self.raw.getLastError()
        raise RuntimeError("Error when running: {}".format(err))

has_hook_at(address)

Source code in ghidralib.py
3202
3203
3204
def has_hook_at(self, address):  # type: (Addr) -> bool
    addr = resolve(address).getOffset()
    return addr in self._hooks

new(start, ends=[], callback=lambda emu: None, stop_when=lambda emu: False, maxsteps=2 ** 48) staticmethod

Emulate from start to end address, with callback for each executed address.

>>> Emulator.new("main", maxsteps=100)["EAX"]
128

This function is a convenience wrapper around emulate and can be always replaced by three lines of code. The above is equivalent to:

>>> emu = Emulator()
>>> emu.emulate("main", maxsteps=100)
>>> emu["EAX"]
128

This function may be used for quickly doing one-off emulations.

See emulate documentation for info about this method parameters.

Source code in ghidralib.py
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
@staticmethod
def new(
    start,
    ends=[],
    callback=lambda emu: None,
    stop_when=lambda emu: False,
    maxsteps=2**48,
):  # type: (Addr, Addr|list[Addr], Callable[[Emulator], str|None], Callable[[Emulator], bool], int) -> Emulator
    """Emulate from start to end address, with callback for each executed address.

        >>> Emulator.new("main", maxsteps=100)["EAX"]
        128

    This function is a convenience wrapper around emulate and can be always
    replaced by three lines of code. The above is equivalent to:

        >>> emu = Emulator()
        >>> emu.emulate("main", maxsteps=100)
        >>> emu["EAX"]
        128

    This function may be used for quickly doing one-off emulations.

    See `emulate` documentation for info about this method parameters."""
    emu = Emulator()
    emu.emulate(start, ends, callback, stop_when, maxsteps)
    return emu

read_bytes(address, length)

Read length bytes at address from the emulated program.

>>> emulator.write_bytes(0x1000, "1")
>>> emulator.read_bytes(0x1000, 1)
'1'
Parameters:
  • address

    the address to read from

  • length

    the length to read

Source code in ghidralib.py
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
def read_bytes(self, address, length):  # type: (Addr, int) -> bytes
    """Read `length` bytes at `address` from the emulated program.

        >>> emulator.write_bytes(0x1000, "1")
        >>> emulator.read_bytes(0x1000, 1)
        '1'

    :param address: the address to read from
    :param length: the length to read"""
    bytelist = self.raw.readMemory(resolve(address), length)
    return _bytes_from_bytelist(bytelist)

read_cstring(address)

Read a null-terminated string from the emulated program.

This function reads bytes until a nullbyte is encountered.

>>> emu.read_cstring(0x1000)
'Hello, world!'
Parameters:
  • address

    address from which to start reading.

Source code in ghidralib.py
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
def read_cstring(self, address):  # type: (Addr) -> str
    """Read a null-terminated string from the emulated program.

    This function reads bytes until a nullbyte is encountered.

        >>> emu.read_cstring(0x1000)
        'Hello, world!'

    :param address: address from which to start reading."""
    addr = resolve(address)
    string = ""
    while True:
        c = self.read_u8(addr)
        if c == 0:
            break
        string += chr(c)
        addr = addr.add(1)
    return string

read_register(reg)

Read from the register of the emulated program.

>>> emulator.write_register("eax", 1337)
>>> emulator.read_register("eax")
1337
Parameters:
  • reg

    the register to read from.

Source code in ghidralib.py
3269
3270
3271
3272
3273
3274
3275
3276
3277
def read_register(self, reg):  # type: (Reg) -> int
    """Read from the register of the emulated program.

        >>> emulator.write_register("eax", 1337)
        >>> emulator.read_register("eax")
        1337

    :param reg: the register to read from."""
    return _python_int(self.raw.readRegister(reg))

read_u16(address)

Read a 16bit unsigned integer from the emulated program.

>>> emulator.write_u16(0x1000, 123)
>>> emulator.read_u16(0x1000)
123
Parameters:
  • address

    the address to read from

Source code in ghidralib.py
3301
3302
3303
3304
3305
3306
3307
3308
3309
def read_u16(self, address):  # type: (Addr) -> int
    """Read a 16bit unsigned integer from the emulated program.

        >>> emulator.write_u16(0x1000, 123)
        >>> emulator.read_u16(0x1000)
        123

    :param address: the address to read from"""
    return from_bytes(self.read_bytes(address, 2))

read_u32(address)

Read a 32bit unsigned integer from the emulated program.

>>> emulator.write_u32(0x1000, 123)
>>> emulator.read_u32(0x1000)
123
Parameters:
  • address

    the address to read from

Source code in ghidralib.py
3311
3312
3313
3314
3315
3316
3317
3318
3319
def read_u32(self, address):  # type: (Addr) -> int
    """Read a 32bit unsigned integer from the emulated program.

        >>> emulator.write_u32(0x1000, 123)
        >>> emulator.read_u32(0x1000)
        123

    :param address: the address to read from"""
    return from_bytes(self.read_bytes(address, 4))

read_u64(address)

Read a 64bit unsigned integer from the emulated program.

>>> emulator.write_u64(0x1000, 123)
>>> emulator.read_u64(0x1000)
123
Parameters:
  • address

    the address to read from

Source code in ghidralib.py
3321
3322
3323
3324
3325
3326
3327
3328
3329
def read_u64(self, address):  # type: (Addr) -> int
    """Read a 64bit unsigned integer from the emulated program.

        >>> emulator.write_u64(0x1000, 123)
        >>> emulator.read_u64(0x1000)
        123

    :param address: the address to read from"""
    return from_bytes(self.read_bytes(address, 8))

read_u8(address)

Read a byte from the emulated program.

>>> emulator.write_u8(0x1000, 13)
>>> emulator.read_u8(0x1000)
13
Parameters:
  • address

    the address to read from

Source code in ghidralib.py
3291
3292
3293
3294
3295
3296
3297
3298
3299
def read_u8(self, address):  # type: (Addr) -> int
    """Read a byte from the emulated program.

        >>> emulator.write_u8(0x1000, 13)
        >>> emulator.read_u8(0x1000)
        13

    :param address: the address to read from"""
    return from_bytes(self.read_bytes(address, 1))

read_unicode(address)

Read a null-terminated utf-16 string from the emulated program.

This function reads bytes until a null character is encountered.

>>> emu.read_unicode(0x1000)
'Hello, world!'
Parameters:
  • address

    address from which to start reading.

Source code in ghidralib.py
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
def read_unicode(self, address):  # type: (Addr) -> str
    """Read a null-terminated utf-16 string from the emulated program.

    This function reads bytes until a null character is encountered.

        >>> emu.read_unicode(0x1000)
        'Hello, world!'

    :param address: address from which to start reading."""
    addr = resolve(address)
    string = ""
    while True:
        c = self.read_u16(addr)
        if c == 0:
            break
        string += chr(c)
        addr = addr.add(2)
    return string

read_varnode(varnode)

Read from the varnode from the emulated program.

This method can't read hash varnodes.

>>> fnc = Function("AddNumbers")
>>> emu = Emulator()
>>> emu.write_varnode(fnc.parameters[0].varnode, 2)
>>> emu.write_varnode(fnc.parameters[1].varnode, 2)
>>> emu.emulate(fnc.entrypoint, stop_when=lambda emu: emu.pc not in fnc.body)
>>> emu.read_varnode(func.return_variable.varnode)
4
Parameters:
  • varnode

    the varnode to read from.

Source code in ghidralib.py
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
def read_varnode(self, varnode):  # type: (Varnode) -> int
    """Read from the varnode from the emulated program.

    This method can't read hash varnodes.

        >>> fnc = Function("AddNumbers")
        >>> emu = Emulator()
        >>> emu.write_varnode(fnc.parameters[0].varnode, 2)
        >>> emu.write_varnode(fnc.parameters[1].varnode, 2)
        >>> emu.emulate(fnc.entrypoint, stop_when=lambda emu: emu.pc not in fnc.body)
        >>> emu.read_varnode(func.return_variable.varnode)
        4

    :param varnode: the varnode to read from."""
    varnode = Varnode(varnode)
    if varnode.is_constant:
        return varnode.offset
    elif varnode.is_address:
        rawnum = self.read_bytes(varnode.offset, varnode.size)
        return from_bytes(rawnum)
    elif varnode.is_unique:
        space = Program.current().getAddressFactory().getUniqueSpace()
        offset = space.getAddress(varnode.offset)
        rawnum = self.read_bytes(offset, varnode.size)
        return from_bytes(rawnum)
    elif varnode.is_stack:
        return self.raw.readStackValue(varnode.offset, varnode.size, False)
    elif varnode.is_register:
        language = Program.current().getLanguage()
        reg = language.getRegister(varnode.raw.getAddress(), varnode.size)
        return self.read_register(reg)
    raise RuntimeError("Unsupported varnode type")

set_pc(address)

Set the program counter of the emulated program.

Source code in ghidralib.py
3220
3221
3222
3223
def set_pc(self, address):  # type: (Addr) -> None
    """Set the program counter of the emulated program."""
    pc = self.raw.getPCRegister()
    self.raw.writeRegister(pc, resolve(address).getOffset())

set_sp(value)

Set the current stack pointer register value.

Parameters:
  • value

    new stack pointer value.

Source code in ghidralib.py
3242
3243
3244
3245
3246
def set_sp(self, value):  # type: (Addr) -> None
    """Set the current stack pointer register value.

    :param value: new stack pointer value."""
    self.write_register(self.sp_register, resolve(value).getOffset())

single_step()

Do a single emulation step. This will step into calls.

Note: This method will call hooks.

Returns:
  • True if the emulation should be stopped, False otherwise.

Source code in ghidralib.py
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
def single_step(self):  # type: () -> bool
    """Do a single emulation step. This will step into calls.

    Note: This method *will* call hooks.

    :return: True if the emulation should be stopped, False otherwise."""
    success = self.raw.step(getMonitor())
    if not success:
        err = self.raw.getLastError()
        raise RuntimeError("Error at {}: {}".format(self.pc, err))

    if self.pc in self._hooks:
        result = self._hooks[self.pc](self)
        return self.__handle_hook_result(result)
    if self.is_at_breakpoint:
        return True
    return False

write_bytes(address, value)

Write to the memory of the emulated program.

>>> emulator.write_bytes(0x1000, "1")
>>> emulator.read_bytes(0x1000, 1)
'1'
Parameters:
  • address

    the address to write to

  • value

    the value to write

Source code in ghidralib.py
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
def write_bytes(self, address, value):  # type: (Addr, bytes) -> None
    """Write to the memory of the emulated program.

        >>> emulator.write_bytes(0x1000, "1")
        >>> emulator.read_bytes(0x1000, 1)
        '1'

    :param address: the address to write to
    :param value: the value to write"""
    self.raw.writeMemory(resolve(address), value)

write_register(reg, value)

Write to the register of the emulated program.

>>> emulator.write_register("eax", 1)
>>> emulator.read_register("eax")
1
Parameters:
  • reg

    the register to write to

  • value

    the value to write

Source code in ghidralib.py
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
def write_register(self, reg, value):  # type: (Reg, int) -> None
    """Write to the register of the emulated program.

        >>> emulator.write_register("eax", 1)
        >>> emulator.read_register("eax")
        1

    :param reg: the register to write to
    :param value: the value to write"""
    self.raw.writeRegister(reg, value)

write_u16(address, value)

Write a 16bit unsigned integer to the emulated program.

>>> emulator.write_u16(0x1000, 13)
>>> emulator.read_u16(0x1000)
13
Parameters:
  • address

    the address to write to

Source code in ghidralib.py
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
def write_u16(self, address, value):  # type: (Addr, int) -> None
    """Write a 16bit unsigned integer to the emulated program.

        >>> emulator.write_u16(0x1000, 13)
        >>> emulator.read_u16(0x1000)
        13

    :param address: the address to write to"""
    assert 0 <= value < 2**16, "value out of range"
    self.write_bytes(address, to_bytes(value, 2))

write_u32(address, value)

Write a 32bit unsigned integer to the emulated program.

>>> emulator.write_u32(0x1000, 13)
>>> emulator.read_u32(0x1000)
13
Parameters:
  • address

    the address to write to

Source code in ghidralib.py
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
def write_u32(self, address, value):  # type: (Addr, int) -> None
    """Write a 32bit unsigned integer to the emulated program.

        >>> emulator.write_u32(0x1000, 13)
        >>> emulator.read_u32(0x1000)
        13

    :param address: the address to write to"""
    assert 0 <= value < 2**32, "value out of range"
    self.write_bytes(address, to_bytes(value, 4))

write_u64(address, value)

Write a 64bit unsigned integer to the emulated program.

>>> emulator.write_u64(0x1000, 13)
>>> emulator.read_u64(0x1000)
13
Parameters:
  • address

    the address to write to

Source code in ghidralib.py
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
def write_u64(self, address, value):  # type: (Addr, int) -> None
    """Write a 64bit unsigned integer to the emulated program.

        >>> emulator.write_u64(0x1000, 13)
        >>> emulator.read_u64(0x1000)
        13

    :param address: the address to write to"""
    assert 0 <= value < 2**64, "value out of range"
    self.write_bytes(address, to_bytes(value, 8))

write_u8(address, value)

Write a byte to the emulated program.

>>> emulator.write_u8(0x1000, 13)
>>> emulator.read_u8(0x1000)
13
Parameters:
  • address

    the address to write to

Source code in ghidralib.py
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
def write_u8(self, address, value):  # type: (Addr, int) -> None
    """Write a byte to the emulated program.

        >>> emulator.write_u8(0x1000, 13)
        >>> emulator.read_u8(0x1000)
        13

    :param address: the address to write to"""
    assert 0 <= value < 2**8, "value out of range"
    self.write_bytes(address, to_bytes(value, 1))

write_varnode(varnode, value)

Set a varnode value in the emulated context.

This method can't set hash and constant varnodes.

>>> fnc = Function("AddNumbers")
>>> emu = Emulator()
>>> emu.write_varnode(fnc.parameters[0].varnode, 2)
>>> emu.write_varnode(fnc.parameters[1].varnode, 2)
>>> emu.emulate(fnc.entrypoint, stop_when=lambda emu: emu.pc not in fnc.body)
>>> emu.read_varnode(func.return_variable.varnode)
4
Parameters:
  • varnode

    the varnode to read from.

Source code in ghidralib.py
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
def write_varnode(self, varnode, value):  # type: (Varnode, int) -> None
    """Set a varnode value in the emulated context.

    This method can't set hash and constant varnodes.

        >>> fnc = Function("AddNumbers")
        >>> emu = Emulator()
        >>> emu.write_varnode(fnc.parameters[0].varnode, 2)
        >>> emu.write_varnode(fnc.parameters[1].varnode, 2)
        >>> emu.emulate(fnc.entrypoint, stop_when=lambda emu: emu.pc not in fnc.body)
        >>> emu.read_varnode(func.return_variable.varnode)
        4

    :param varnode: the varnode to read from."""
    varnode = Varnode(varnode)
    if varnode.is_constant:
        raise ValueError("Can't set value of a constant varnodes")
    elif varnode.is_address:
        self.write_bytes(varnode.offset, to_bytes(value, varnode.size))
    elif varnode.is_unique:
        space = Program.current().getAddressFactory().getUniqueSpace()
        offset = space.getAddress(varnode.offset)
        self.write_bytes(offset, to_bytes(value, varnode.size))
    elif varnode.is_stack:
        self.raw.writeStackValue(varnode.offset, varnode.size, value)
    elif varnode.is_register:
        language = Program.current().getLanguage()
        reg = language.getRegister(varnode.raw.getAddress(), varnode.size)
        self.raw.writeRegister(reg, value)
    else:
        raise RuntimeError("Unsupported varnode type")

FlowType

Bases: GhidraWrapper

Wraps a Ghidra FlowType object

Source code in ghidralib.py
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
class FlowType(GhidraWrapper):
    """Wraps a Ghidra FlowType object"""

    # TODO is class this necessary? This is just a subclass of RefType.

    @property
    def is_call(self):  # type: () -> bool
        """Return True if this flow is a call."""
        return self.raw.isCall()

    @property
    def is_jump(self):  # type: () -> bool
        """Return True if this flow is a jump."""
        return self.raw.isJump()

    @property
    def is_computed(self):  # type: () -> bool
        """Return True if this flow is a computed jump."""
        return self.raw.isComputed()

    @property
    def is_conditional(self):  # type: () -> bool
        """Return True if this flow is a conditional jump."""
        return self.raw.isConditional()

    @property
    def is_unconditional(self):  # type: () -> bool
        """Return True if this flow is an unconditional jump."""
        return not self.is_conditional

    @property
    def is_terminal(self):  # type: () -> bool
        """Return True if this flow is a terminator."""
        return self.raw.isTerminal()

    @property
    def has_fallthrough(self):  # type: () -> bool
        """Return True if this flow has a fallthrough."""
        return self.raw.hasFallthrough()

    @property
    def is_override(self):  # type: () -> bool
        """Return True if this flow is an override."""
        return self.raw.isOverride()

has_fallthrough property

Return True if this flow has a fallthrough.

is_call property

Return True if this flow is a call.

is_computed property

Return True if this flow is a computed jump.

is_conditional property

Return True if this flow is a conditional jump.

is_jump property

Return True if this flow is a jump.

is_override property

Return True if this flow is an override.

is_terminal property

Return True if this flow is a terminator.

is_unconditional property

Return True if this flow is an unconditional jump.

Function

Bases: GhidraWrapper, BodyTrait

Wraps a Ghidra Function object.

Source code in ghidralib.py
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
class Function(GhidraWrapper, BodyTrait):
    """Wraps a Ghidra Function object."""

    UNDERLYING_CLASS = GhFunction

    @staticmethod
    def get(addr):  # type: (JavaObject|str|Addr) -> Function|None
        """Return a function at the given address, or None if no function
        exists there."""
        if isinstance(addr, GhFunction):
            return Function(addr)
        if isinstance(addr, Function):
            return Function(addr.raw)
        addr = try_resolve(addr)
        if addr is None:
            return None
        raw = Program.current().getListing().getFunctionContaining(addr)
        if raw is None:
            return None
        return Function(raw)  # type: ignore

    @staticmethod
    def all():  # type: () -> list[Function]
        """Return all functions in the current program."""
        raw_functions = Program.current().getFunctionManager().getFunctions(True)
        return [Function(f) for f in raw_functions]

    @staticmethod
    def create(address, name):  # type: (Addr, str) -> Function
        """Create a new function at the given address with the given name."""
        func = createFunction(resolve(address), name)
        return Function(func)

    @property
    def return_type(self):  # type: () -> DataType
        """Get the return type of this function."""
        return DataType(self.raw.getReturnType())

    @property
    def return_variable(self):  # type: () -> Parameter
        """Get the variable representing a return value of this function."""
        return Parameter(self.raw.getReturn())

    @property
    def entrypoint(self):  # type: () -> int
        """Get the entrypoint of this function."""
        return self.raw.getEntryPoint().getOffset()

    @property
    def address(self):  # type: () -> int
        """Get the address of this function."""
        return self.entrypoint

    @property
    def exitpoints(self):  # type: () -> list[int]
        """Get a list of exit points for the function.

        This will return a list of addresses of function terminators. For example,
        if a function has two RETs, this function will return their addresses."""
        return [i.address for i in self.instructions if i.flow_type.is_terminal]

    @property
    def name(self):  # type: () -> str
        """Get the name of this function."""
        return self.raw.getName()

    @property
    def comment(self):  # type: () -> str|None
        """Get the comment of this function, if any."""
        return self.raw.getComment()

    def set_comment(self, comment):  # type: (str|None) -> None
        """Set the comment of this function."""
        self.raw.setComment(comment)

    @property
    def is_thunk(self):  # type: () -> bool
        """Return True if this function is a thunk."""
        return self.raw.isThunk()

    @property
    def is_external(self):  # type: () -> bool
        """Return True if this function is external."""
        return self.raw.isExternal()

    @property
    def repeatable_comment(self):  # type: () -> str|None
        """Get the repeatable comment of this function, if any."""
        return self.raw.getRepeatableComment()

    def set_repeatable_comment(self, comment):  # type: (str|None) -> None
        """Set the repeatable comment of this function."""
        self.raw.setRepeatableComment(comment)

    @property
    def parameters(self):  # type: () -> list[Parameter]
        """Get the parameters of this function."""
        return [Parameter(raw) for raw in self.raw.getParameters()]

    def add_named_parameter(self, datatype, name):  # type: (DataT, str) -> None
        """Add a parameter with a specified name to this function.

        Warning: adding a register parameter will switch the function into
        custom storage mode. Adding named parameters in custom storage is not
        implemented"""
        if self.raw.hasCustomVariableStorage():
            raise ValueError(
                "Sorry, adding named parameters is not implemented "
                "for functions with custom storage"
            )
        data = DataType(datatype)
        param = ParameterImpl(name, data.raw, 0, Program.current())
        self.raw.addParameter(param, SourceType.USER_DEFINED)

    def add_register_parameter(
        self, datatype, register, name
    ):  # type: (DataT, Reg, str) -> None
        """Add a parameter stored in a specified register to this function.

        Warning: adding a register parameter will switch the function into
        custom storage mode. Adding named parameters in custom storage will
        not work anymore"""
        if not self.raw.hasCustomVariableStorage():
            self.raw.setCustomVariableStorage(True)
        reg = Register(register)
        data = DataType(datatype)
        param = ParameterImpl(name, data.raw, reg.raw, Program.current())
        self.raw.addParameter(param, SourceType.USER_DEFINED)

    def fixup_body(self):  # type: () -> bool
        """Fixup the function body: follow control flow and add thunks."""
        return CreateFunctionCmd.fixupFunctionBody(
            Program.current(), self.raw, getMonitor()
        )

    @property
    def local_variables(self):  # type: () -> list[Variable]
        """Get the local variables of this function."""
        return [Variable(raw) for raw in self.raw.getLocalVariables()]

    @property
    def variables(self):  # type: () -> list[Variable]
        """Get all variables defined in this function."""
        return [Variable(raw) for raw in self.raw.getAllVariables()]

    @property
    def varnodes(self):  # type: () -> list[Varnode]
        """Get all varnodes associated with a variable in this function."""
        varnodes = []
        for var in self.variables:
            varnodes.extend(var.varnodes)
        return varnodes

    @property
    def high_variables(self):  # type: () -> list[HighVariable]
        """Get all variables defined in this function.

        Warning: this method needs to decompile the function, and is therefore slow."""
        return self.high_function.variables

    @property
    def stack(self):  # type: () -> list[Variable]
        """Get the defined stack variables (both parameters and locals)."""
        raw_vars = self.raw.getStackFrame().getStackVariables()
        return [Variable(raw) for raw in raw_vars]

    def rename(self, name):  # type: (str) -> None
        """Change the name of this function."""
        self.raw.setName(name, SourceType.USER_DEFINED)

    @property
    def instructions(self):  # type: () -> list[Instruction]
        """Get the assembler instructions for this function."""
        listing = Program.current().getListing()
        raw_instructions = listing.getInstructions(self.raw.getBody(), True)
        return [Instruction(raw) for raw in raw_instructions]

    @property
    def xrefs(self):  # type: () -> list[Reference]
        """Get the references to this function."""
        raw_refs = getReferencesTo(resolve(self.entrypoint))
        return [Reference(raw) for raw in raw_refs]

    xrefs_to = xrefs

    @property
    def xref_addrs(self):  # type: () -> list[int]
        """Get the source addresses of references to this function."""
        return [xref.from_address for xref in self.xrefs]

    @property
    def callers(self):  # type: () -> list[Function]
        """Get all functions that call this function."""
        return [
            Function(raw) for raw in self.raw.getCallingFunctions(TaskMonitor.DUMMY)
        ]

    @property
    def called(self):  # type: () -> list[Function]
        """Get all functions that are called by this function."""
        return [Function(raw) for raw in self.raw.getCalledFunctions(TaskMonitor.DUMMY)]

    @property
    def fixup(self):  # type: () -> str|None
        """Get the fixup of this function."""
        return self.raw.getCallFixup()

    @fixup.setter
    def fixup(self, fixup):  # type: (str|None) -> None
        """Set the fixup of this function.

        :param fixup: The new fixup to set."""
        self.raw.setCallFixup(fixup)

    @property
    def calls(self):  # type: () -> list[FunctionCall]
        """Get all function calls to this function."""
        calls = []
        for ref in self.xrefs:
            if ref.is_call:
                calls.append(FunctionCall(self, ref.from_address))
        return calls

    @property
    def basicblocks(self):  # type: () -> list[BasicBlock]
        """Get the basic blocks of this function."""
        block_model = BasicBlockModel(Program.current())
        blocks = block_model.getCodeBlocksContaining(
            self.raw.getBody(), TaskMonitor.DUMMY
        )
        return [BasicBlock(block) for block in blocks]

    def _decompile(self, simplify="decompile"):  # type: (str) -> JavaObject
        """Decompile this function (internal helper)."""
        decompiler = DecompInterface()
        decompiler.openProgram(Program.current())
        decompiler.setSimplificationStyle(simplify)
        decompiled = decompiler.decompileFunction(self.raw, 5, TaskMonitor.DUMMY)
        decompiler.closeProgram()
        decompiler.dispose()
        if decompiled is None:
            raise RuntimeError("Failed to decompile function {}".format(self.name))
        return decompiled

    def decompile(self):  # type: () -> str
        """Get decompiled C code for the function as string."""
        decompiled = self._decompile()
        return decompiled.getDecompiledFunction().getC()

    @property
    def clang_tokens(self):  # type: () -> ClangTokenGroup
        """Get clang tokens for the decompiled function.

        This returns a ClangTokenGroup object. TODO: wrap the return value."""
        decompiled = self._decompile()
        return ClangTokenGroup(decompiled.getCCodeMarkup())

    @property
    def high_function(self):  # type: () -> HighFunction
        """Decompile this function, and return a high-level function.

        Warning: this method needs to decompile the function, and is therefore slow."""
        return self.get_high_function()

    def get_high_function(self, simplify="decompile"):  # type: (str) -> HighFunction
        """Decompile this function, and return a high-level function.

        Warning: this method needs to decompile the function, and is therefore slow.

        :simplify: the simplification style to use.
        See DecompilerInterface.setSimplificationStyle."""
        decompiled = self._decompile(simplify)
        return HighFunction(decompiled.getHighFunction())

    def get_high_pcode(self, simplify="decompile"):  # type: (str) -> list[PcodeOp]
        """Decompile this function, and return its high-level Pcode.

        Warning: this method needs to decompile the function, and is therefore slow.

        :simplify: the simplification style to use.
        See DecompilerInterface.setSimplificationStyle."""
        return self.get_high_function(simplify).pcode

    @property
    def pcode_tree(self):  # type: () -> BlockGraph
        """Get an AST-like representation of the function's Pcode.

        Warning: this method needs to decompile the function, and is therefore slow."""
        return self.get_high_function().pcode_tree

    @property
    def pcode(self):  # type: () -> list[PcodeOp]
        """Get the (low-level) Pcode for this function."""
        result = []
        for block in self.basicblocks:
            result.extend(block.pcode)
        return result

    @property
    def high_pcode(self):  # type: () -> list[PcodeOp]
        """Get the (high-level) Pcode for this function.

        Warning: this method needs to decompile the function, and is therefore slow."""
        return self.get_high_pcode()

    @property
    def high_basicblocks(self):  # type: () -> list[PcodeBlock]
        """Get the (high-level) Pcode basic blocks for this function.

        Warning: this method needs to decompile the function, and is therefore slow."""
        return self.high_function.basicblocks

    def get_high_pcode_at(self, address):  # type: (Addr) -> list[PcodeOp]
        """Get the high-level Pcode at the given address.

        Do not use this function in a loop! Better decompile the whole function first.

        Warning: this method needs to decompile the function, and is therefore slow.

        :param address: the address to get the Pcode for."""
        return self.get_high_function().get_pcode_at(address)

    @property
    def high_symbols(self):  # type: () -> list[HighSymbol]
        """Get the high-level symbols for this function.

        Warning: this method needs to decompile the function, and is therefore slow."""
        return self.get_high_function().symbols

    @property
    def primary_symbols(self):  # type: () -> list[Symbol]
        """Get the primary symbols for this function."""
        symtable = Program.current().getSymbolTable()
        syms = symtable.getPrimarySymbolIterator(self.raw.getBody(), True)
        return [Symbol(s) for s in syms]

    @property
    def symbols(self):  # type: () -> list[Symbol]
        """Get the symbols for this function.

        Unfortunately, the implementation of this function has to iterate over
        all function addresses (because SymbolTable doesn't export the right method),
        so it may be quite slow when called frequently. Consider using primary_symbols
        if adequate."""
        body = self.raw.getBody()
        symbols = []
        symtable = Program.current().getSymbolTable()
        for rng in body:
            for addr in rng:
                symbols.extend(symtable.getSymbols(addr))
        return [Symbol(raw) for raw in symbols]

    @property
    def body(self):  # type: () -> AddressSet
        """Get the set of addresses of this function."""
        return AddressSet(self.raw.getBody())

    @property
    def control_flow(self):  # type: () -> Graph[BasicBlock]
        """Get the control flow graph of this function.

        In other words, get a graph that represents how the control flow
        can move between basic blocks in this function."""
        return Graph.construct(self.basicblocks, lambda v: v.destinations)

    def emulate(self, *args, **kwargs):  # type: (int, Emulator) -> Emulator
        """Emulate the function call with given args, and return final emulation state.

        The arguments are passed using a calling convention defined in Ghidra. If
        you want to use a different calling convention, or do additional setup,
        you have to use the Emulator class directly.

        You can pass your own emulator using the `emulator` kwarg. You can use this
        to do a pre-call setup (for example, write string parameters to memory). But
        don't use this to change call parameters, as they are always overwriten.

            >>> fnc = Function("ResolveName")
            >>> emu = fnc.emulate(1379010213)
            >>> emu.read_unicode(emu["eax"])
            "HKEY_CLASSES_ROOT"

        :param args: The arguments to pass to the function.
        :param kwargs: pass `emulator` kwarg to use the provided emulator
          (default: create a new one)."""
        if "emulator" in kwargs:
            # Jython doesn't support keyword arguments after args, apparently
            emulator = kwargs["emulator"]
        else:
            emulator = Emulator()

        if len(args) != len(self.raw.getParameters()):
            raise ValueError(
                "Wrong number of arguments for {} - got {} expected {}".format(
                    self.name, len(args), len(self.raw.getParameters())
                )
            )

        for param, value in zip(self.parameters, args):
            emulator.write_varnode(param.varnode, value)

        emulator.emulate(self.entrypoint, stop_when=lambda emu: emu.pc not in self.body)
        return emulator

    def emulate_simple(self, *args, **kwargs):  # type: (int, Emulator) -> int
        """Emulate the function call with given args, and return the return value.

        The arguments are passed using a calling convention defined in Ghidra. If
        you want to use a different calling convention, or do additional setup,
        you have to use the Emulator class directly.

        You can pass your own emulator using the `emulator` kwarg. You can use this
        to do a pre-call setup (for example, write string parameters to memory). But
        don't use this to change call parameters, as they are always overwriten.

        Note: the name is not great, but I can't think of a better name that is
        not also very long.

            >>> fnc = Function("CustomHash")
            >>> fnc.emulate_simple("HKEY_CLASSES_ROOT")
            1379010213

        :param args: The arguments to pass to the function.
        :param kwargs: pass `emulator` kwarg to use the provided emulator
          (default: create a new one)."""
        context = self.emulate(*args, **kwargs)
        return context.read_varnode(self.return_variable.varnode)

    def symbolic_context(self):  # type: () -> SymbolicPropogator
        """Returns a SymbolicPropogator instance for this function.

        This can be used to get a known values of registers at various addresses.

            >>> fnc = Function(0x004061EC)
            >>> ctx = fnc.symbolic_context()
            >>> print(ctx.register(0x004061fb, "eax"))

        TODO: This method should implement a hack described in
        https://github.com/NationalSecurityAgency/ghidra/issues/3581
        because built-in Ghidra symbolic propagator doesn't support memory accesses.

        :return: a SymbolicPropogator instance with this function context."""
        propagator = SymbolicPropogator.create()
        evaluator = ConstantPropagationContextEvaluator(getMonitor())
        propagator.flow_constants(self.entrypoint, self.body, evaluator)
        return propagator

UNDERLYING_CLASS = GhFunction class-attribute instance-attribute

address property

Get the address of this function.

basicblocks property

Get the basic blocks of this function.

body property

Get the set of addresses of this function.

called property

Get all functions that are called by this function.

callers property

Get all functions that call this function.

calls property

Get all function calls to this function.

clang_tokens property

Get clang tokens for the decompiled function.

This returns a ClangTokenGroup object. TODO: wrap the return value.

comment property

Get the comment of this function, if any.

control_flow property

Get the control flow graph of this function.

In other words, get a graph that represents how the control flow can move between basic blocks in this function.

entrypoint property

Get the entrypoint of this function.

exitpoints property

Get a list of exit points for the function.

This will return a list of addresses of function terminators. For example, if a function has two RETs, this function will return their addresses.

fixup property writable

Get the fixup of this function.

high_basicblocks property

Get the (high-level) Pcode basic blocks for this function.

Warning: this method needs to decompile the function, and is therefore slow.

high_function property

Decompile this function, and return a high-level function.

Warning: this method needs to decompile the function, and is therefore slow.

high_pcode property

Get the (high-level) Pcode for this function.

Warning: this method needs to decompile the function, and is therefore slow.

high_symbols property

Get the high-level symbols for this function.

Warning: this method needs to decompile the function, and is therefore slow.

high_variables property

Get all variables defined in this function.

Warning: this method needs to decompile the function, and is therefore slow.

instructions property

Get the assembler instructions for this function.

is_external property

Return True if this function is external.

is_thunk property

Return True if this function is a thunk.

local_variables property

Get the local variables of this function.

name property

Get the name of this function.

parameters property

Get the parameters of this function.

pcode property

Get the (low-level) Pcode for this function.

pcode_tree property

Get an AST-like representation of the function's Pcode.

Warning: this method needs to decompile the function, and is therefore slow.

primary_symbols property

Get the primary symbols for this function.

repeatable_comment property

Get the repeatable comment of this function, if any.

return_type property

Get the return type of this function.

return_variable property

Get the variable representing a return value of this function.

stack property

Get the defined stack variables (both parameters and locals).

symbols property

Get the symbols for this function.

Unfortunately, the implementation of this function has to iterate over all function addresses (because SymbolTable doesn't export the right method), so it may be quite slow when called frequently. Consider using primary_symbols if adequate.

variables property

Get all variables defined in this function.

varnodes property

Get all varnodes associated with a variable in this function.

xref_addrs property

Get the source addresses of references to this function.

xrefs property

Get the references to this function.

xrefs_to = xrefs class-attribute instance-attribute

add_named_parameter(datatype, name)

Add a parameter with a specified name to this function.

Warning: adding a register parameter will switch the function into custom storage mode. Adding named parameters in custom storage is not implemented

Source code in ghidralib.py
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
def add_named_parameter(self, datatype, name):  # type: (DataT, str) -> None
    """Add a parameter with a specified name to this function.

    Warning: adding a register parameter will switch the function into
    custom storage mode. Adding named parameters in custom storage is not
    implemented"""
    if self.raw.hasCustomVariableStorage():
        raise ValueError(
            "Sorry, adding named parameters is not implemented "
            "for functions with custom storage"
        )
    data = DataType(datatype)
    param = ParameterImpl(name, data.raw, 0, Program.current())
    self.raw.addParameter(param, SourceType.USER_DEFINED)

add_register_parameter(datatype, register, name)

Add a parameter stored in a specified register to this function.

Warning: adding a register parameter will switch the function into custom storage mode. Adding named parameters in custom storage will not work anymore

Source code in ghidralib.py
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
def add_register_parameter(
    self, datatype, register, name
):  # type: (DataT, Reg, str) -> None
    """Add a parameter stored in a specified register to this function.

    Warning: adding a register parameter will switch the function into
    custom storage mode. Adding named parameters in custom storage will
    not work anymore"""
    if not self.raw.hasCustomVariableStorage():
        self.raw.setCustomVariableStorage(True)
    reg = Register(register)
    data = DataType(datatype)
    param = ParameterImpl(name, data.raw, reg.raw, Program.current())
    self.raw.addParameter(param, SourceType.USER_DEFINED)

all() staticmethod

Return all functions in the current program.

Source code in ghidralib.py
2473
2474
2475
2476
2477
@staticmethod
def all():  # type: () -> list[Function]
    """Return all functions in the current program."""
    raw_functions = Program.current().getFunctionManager().getFunctions(True)
    return [Function(f) for f in raw_functions]

create(address, name) staticmethod

Create a new function at the given address with the given name.

Source code in ghidralib.py
2479
2480
2481
2482
2483
@staticmethod
def create(address, name):  # type: (Addr, str) -> Function
    """Create a new function at the given address with the given name."""
    func = createFunction(resolve(address), name)
    return Function(func)

decompile()

Get decompiled C code for the function as string.

Source code in ghidralib.py
2696
2697
2698
2699
def decompile(self):  # type: () -> str
    """Get decompiled C code for the function as string."""
    decompiled = self._decompile()
    return decompiled.getDecompiledFunction().getC()

emulate(*args, **kwargs)

Emulate the function call with given args, and return final emulation state.

The arguments are passed using a calling convention defined in Ghidra. If you want to use a different calling convention, or do additional setup, you have to use the Emulator class directly.

You can pass your own emulator using the emulator kwarg. You can use this to do a pre-call setup (for example, write string parameters to memory). But don't use this to change call parameters, as they are always overwriten.

>>> fnc = Function("ResolveName")
>>> emu = fnc.emulate(1379010213)
>>> emu.read_unicode(emu["eax"])
"HKEY_CLASSES_ROOT"
Parameters:
  • args

    The arguments to pass to the function.

  • kwargs

    pass emulator kwarg to use the provided emulator (default: create a new one).

Source code in ghidralib.py
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
def emulate(self, *args, **kwargs):  # type: (int, Emulator) -> Emulator
    """Emulate the function call with given args, and return final emulation state.

    The arguments are passed using a calling convention defined in Ghidra. If
    you want to use a different calling convention, or do additional setup,
    you have to use the Emulator class directly.

    You can pass your own emulator using the `emulator` kwarg. You can use this
    to do a pre-call setup (for example, write string parameters to memory). But
    don't use this to change call parameters, as they are always overwriten.

        >>> fnc = Function("ResolveName")
        >>> emu = fnc.emulate(1379010213)
        >>> emu.read_unicode(emu["eax"])
        "HKEY_CLASSES_ROOT"

    :param args: The arguments to pass to the function.
    :param kwargs: pass `emulator` kwarg to use the provided emulator
      (default: create a new one)."""
    if "emulator" in kwargs:
        # Jython doesn't support keyword arguments after args, apparently
        emulator = kwargs["emulator"]
    else:
        emulator = Emulator()

    if len(args) != len(self.raw.getParameters()):
        raise ValueError(
            "Wrong number of arguments for {} - got {} expected {}".format(
                self.name, len(args), len(self.raw.getParameters())
            )
        )

    for param, value in zip(self.parameters, args):
        emulator.write_varnode(param.varnode, value)

    emulator.emulate(self.entrypoint, stop_when=lambda emu: emu.pc not in self.body)
    return emulator

emulate_simple(*args, **kwargs)

Emulate the function call with given args, and return the return value.

The arguments are passed using a calling convention defined in Ghidra. If you want to use a different calling convention, or do additional setup, you have to use the Emulator class directly.

You can pass your own emulator using the emulator kwarg. You can use this to do a pre-call setup (for example, write string parameters to memory). But don't use this to change call parameters, as they are always overwriten.

Note: the name is not great, but I can't think of a better name that is not also very long.

>>> fnc = Function("CustomHash")
>>> fnc.emulate_simple("HKEY_CLASSES_ROOT")
1379010213
Parameters:
  • args

    The arguments to pass to the function.

  • kwargs

    pass emulator kwarg to use the provided emulator (default: create a new one).

Source code in ghidralib.py
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
def emulate_simple(self, *args, **kwargs):  # type: (int, Emulator) -> int
    """Emulate the function call with given args, and return the return value.

    The arguments are passed using a calling convention defined in Ghidra. If
    you want to use a different calling convention, or do additional setup,
    you have to use the Emulator class directly.

    You can pass your own emulator using the `emulator` kwarg. You can use this
    to do a pre-call setup (for example, write string parameters to memory). But
    don't use this to change call parameters, as they are always overwriten.

    Note: the name is not great, but I can't think of a better name that is
    not also very long.

        >>> fnc = Function("CustomHash")
        >>> fnc.emulate_simple("HKEY_CLASSES_ROOT")
        1379010213

    :param args: The arguments to pass to the function.
    :param kwargs: pass `emulator` kwarg to use the provided emulator
      (default: create a new one)."""
    context = self.emulate(*args, **kwargs)
    return context.read_varnode(self.return_variable.varnode)

fixup_body()

Fixup the function body: follow control flow and add thunks.

Source code in ghidralib.py
2581
2582
2583
2584
2585
def fixup_body(self):  # type: () -> bool
    """Fixup the function body: follow control flow and add thunks."""
    return CreateFunctionCmd.fixupFunctionBody(
        Program.current(), self.raw, getMonitor()
    )

get(addr) staticmethod

Return a function at the given address, or None if no function exists there.

Source code in ghidralib.py
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
@staticmethod
def get(addr):  # type: (JavaObject|str|Addr) -> Function|None
    """Return a function at the given address, or None if no function
    exists there."""
    if isinstance(addr, GhFunction):
        return Function(addr)
    if isinstance(addr, Function):
        return Function(addr.raw)
    addr = try_resolve(addr)
    if addr is None:
        return None
    raw = Program.current().getListing().getFunctionContaining(addr)
    if raw is None:
        return None
    return Function(raw)  # type: ignore

get_high_function(simplify='decompile')

Decompile this function, and return a high-level function.

Warning: this method needs to decompile the function, and is therefore slow.

:simplify: the simplification style to use. See DecompilerInterface.setSimplificationStyle.

Source code in ghidralib.py
2716
2717
2718
2719
2720
2721
2722
2723
2724
def get_high_function(self, simplify="decompile"):  # type: (str) -> HighFunction
    """Decompile this function, and return a high-level function.

    Warning: this method needs to decompile the function, and is therefore slow.

    :simplify: the simplification style to use.
    See DecompilerInterface.setSimplificationStyle."""
    decompiled = self._decompile(simplify)
    return HighFunction(decompiled.getHighFunction())

get_high_pcode(simplify='decompile')

Decompile this function, and return its high-level Pcode.

Warning: this method needs to decompile the function, and is therefore slow.

:simplify: the simplification style to use. See DecompilerInterface.setSimplificationStyle.

Source code in ghidralib.py
2726
2727
2728
2729
2730
2731
2732
2733
def get_high_pcode(self, simplify="decompile"):  # type: (str) -> list[PcodeOp]
    """Decompile this function, and return its high-level Pcode.

    Warning: this method needs to decompile the function, and is therefore slow.

    :simplify: the simplification style to use.
    See DecompilerInterface.setSimplificationStyle."""
    return self.get_high_function(simplify).pcode

get_high_pcode_at(address)

Get the high-level Pcode at the given address.

Do not use this function in a loop! Better decompile the whole function first.

Warning: this method needs to decompile the function, and is therefore slow.

Parameters:
  • address

    the address to get the Pcode for.

Source code in ghidralib.py
2764
2765
2766
2767
2768
2769
2770
2771
2772
def get_high_pcode_at(self, address):  # type: (Addr) -> list[PcodeOp]
    """Get the high-level Pcode at the given address.

    Do not use this function in a loop! Better decompile the whole function first.

    Warning: this method needs to decompile the function, and is therefore slow.

    :param address: the address to get the Pcode for."""
    return self.get_high_function().get_pcode_at(address)

rename(name)

Change the name of this function.

Source code in ghidralib.py
2618
2619
2620
def rename(self, name):  # type: (str) -> None
    """Change the name of this function."""
    self.raw.setName(name, SourceType.USER_DEFINED)

set_comment(comment)

Set the comment of this function.

Source code in ghidralib.py
2523
2524
2525
def set_comment(self, comment):  # type: (str|None) -> None
    """Set the comment of this function."""
    self.raw.setComment(comment)

set_repeatable_comment(comment)

Set the repeatable comment of this function.

Source code in ghidralib.py
2542
2543
2544
def set_repeatable_comment(self, comment):  # type: (str|None) -> None
    """Set the repeatable comment of this function."""
    self.raw.setRepeatableComment(comment)

symbolic_context()

Returns a SymbolicPropogator instance for this function.

This can be used to get a known values of registers at various addresses.

>>> fnc = Function(0x004061EC)
>>> ctx = fnc.symbolic_context()
>>> print(ctx.register(0x004061fb, "eax"))

TODO: This method should implement a hack described in https://github.com/NationalSecurityAgency/ghidra/issues/3581 because built-in Ghidra symbolic propagator doesn't support memory accesses.

Returns:
  • a SymbolicPropogator instance with this function context.

Source code in ghidralib.py
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
def symbolic_context(self):  # type: () -> SymbolicPropogator
    """Returns a SymbolicPropogator instance for this function.

    This can be used to get a known values of registers at various addresses.

        >>> fnc = Function(0x004061EC)
        >>> ctx = fnc.symbolic_context()
        >>> print(ctx.register(0x004061fb, "eax"))

    TODO: This method should implement a hack described in
    https://github.com/NationalSecurityAgency/ghidra/issues/3581
    because built-in Ghidra symbolic propagator doesn't support memory accesses.

    :return: a SymbolicPropogator instance with this function context."""
    propagator = SymbolicPropogator.create()
    evaluator = ConstantPropagationContextEvaluator(getMonitor())
    propagator.flow_constants(self.entrypoint, self.body, evaluator)
    return propagator

FunctionCall

Bases: BodyTrait

Represents a function call at a given location in the program.

Can be used to get the function being called and the parameters passed to it.

Source code in ghidralib.py
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
class FunctionCall(BodyTrait):
    """Represents a function call at a given location in the program.

    Can be used to get the function being called and the parameters passed to it."""

    def __init__(self, function, address):  # type: (Function, Addr) -> None
        self.called_function = function
        self._address = resolve(address)

    @property
    def address(self):  # type: () -> int
        return self._address.getOffset()

    @property
    def caller(self):  # type: () -> Function|None
        """Get the function where this function call takes place."""
        return Function.get(self._address)

    calling_function = caller

    @property
    def instruction(self):  # type: () -> Instruction
        return Instruction(self._address)

    @property
    def callee(self):  # type: () -> Function
        """Get the function being called."""
        return self.called_function

    def infer_context(self):  # type: () -> Emulator
        """Emulate the code before this function call, and return the state.

        The goal of this function is to recover the state of the CPU
        before the function call, as well as possible. This will work well when
        parameters are constants written just before the call, for example:

            mov eax, 30
            mov ebx, DAT_encrypted_string
            call decrypt_string

        Then recovering eax is as simple as call.infer_context()["eax"]."""
        basicblock = BasicBlock(self._address)
        return Emulator.new(basicblock.start_address, self._address)

    @property
    def high_pcodeop(self):  # type: () -> PcodeOp|None
        """Get the high-level PcodeOp for this function call.

        High-level Pcode `call` ops have the parameters resolved, so we
        can use them to read them when analysing Pcode.

        Warning: this works on decompiled functions only, so it will work
          if the call is done from a region not recognised as function.
        Warning: this method needs to decompile the function, and is therefore slow."""
        for pcode_op in PcodeOp.get_high_pcode_at(self._address):
            if pcode_op.opcode != pcode_op.CALL:
                continue
            return pcode_op

        raise RuntimeError("No CALL at {}".format(self.address))

    @property
    def high_varnodes(self):  # type: () -> list[Varnode]
        """Get a list of the arguments passed to this function call, as high varnodes.

        In other words, decompile the function, and return the varnodes associated with
        the function parameters, as seen by Ghidra decompiler.

        Warning: this works on decompiled functions only, so it will work
          if the call is done from a region not recognised as function.
        Warning: this method needs to decompile the function, and is therefore slow."""
        op = self.high_pcodeop
        if not op:
            return []
        return op.inputs[1:]  # skip function addr

    def infer_args(self):  # type: () -> list[int|None]
        """Get a list of the arguments passed to this function call, as integers.

        This method tries to get arguments of this function, as seen by Ghidra
        decompiler. A limited symbolic execution is performed to resolve the pointers.
        If it's not possible to get an argument, None is stored in its place.

        Warning: this works on decompiled functions only, so it will work
          if the call is done from a region not recognised as function.
        Warning: this method needs to decompile the function, and is therefore slow.
        """
        args = []
        for varnode in self.high_varnodes:
            if varnode.value is not None:
                args.append(varnode.value)
            else:
                args.append(None)
        return args

    @property
    def body(self):
        return self.instruction.body

address property

body property

called_function = function instance-attribute

callee property

Get the function being called.

caller property

Get the function where this function call takes place.

calling_function = caller class-attribute instance-attribute

high_pcodeop property

Get the high-level PcodeOp for this function call.

High-level Pcode call ops have the parameters resolved, so we can use them to read them when analysing Pcode.

Warning: this works on decompiled functions only, so it will work if the call is done from a region not recognised as function. Warning: this method needs to decompile the function, and is therefore slow.

high_varnodes property

Get a list of the arguments passed to this function call, as high varnodes.

In other words, decompile the function, and return the varnodes associated with the function parameters, as seen by Ghidra decompiler.

Warning: this works on decompiled functions only, so it will work if the call is done from a region not recognised as function. Warning: this method needs to decompile the function, and is therefore slow.

instruction property

__init__(function, address)

Source code in ghidralib.py
2283
2284
2285
def __init__(self, function, address):  # type: (Function, Addr) -> None
    self.called_function = function
    self._address = resolve(address)

infer_args()

Get a list of the arguments passed to this function call, as integers.

This method tries to get arguments of this function, as seen by Ghidra decompiler. A limited symbolic execution is performed to resolve the pointers. If it's not possible to get an argument, None is stored in its place.

Warning: this works on decompiled functions only, so it will work if the call is done from a region not recognised as function. Warning: this method needs to decompile the function, and is therefore slow.

Source code in ghidralib.py
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
def infer_args(self):  # type: () -> list[int|None]
    """Get a list of the arguments passed to this function call, as integers.

    This method tries to get arguments of this function, as seen by Ghidra
    decompiler. A limited symbolic execution is performed to resolve the pointers.
    If it's not possible to get an argument, None is stored in its place.

    Warning: this works on decompiled functions only, so it will work
      if the call is done from a region not recognised as function.
    Warning: this method needs to decompile the function, and is therefore slow.
    """
    args = []
    for varnode in self.high_varnodes:
        if varnode.value is not None:
            args.append(varnode.value)
        else:
            args.append(None)
    return args

infer_context()

Emulate the code before this function call, and return the state.

The goal of this function is to recover the state of the CPU before the function call, as well as possible. This will work well when parameters are constants written just before the call, for example:

mov eax, 30
mov ebx, DAT_encrypted_string
call decrypt_string

Then recovering eax is as simple as call.infer_context()["eax"].

Source code in ghidralib.py
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
def infer_context(self):  # type: () -> Emulator
    """Emulate the code before this function call, and return the state.

    The goal of this function is to recover the state of the CPU
    before the function call, as well as possible. This will work well when
    parameters are constants written just before the call, for example:

        mov eax, 30
        mov ebx, DAT_encrypted_string
        call decrypt_string

    Then recovering eax is as simple as call.infer_context()["eax"]."""
    basicblock = BasicBlock(self._address)
    return Emulator.new(basicblock.start_address, self._address)

GenericT

Source code in ghidralib.py
435
436
class GenericT:
    pass

GhidraWrapper

Bases: object

The base class for all Ghidra wrappers.

This function tries to be as transparent as possible - for example, it will not raise an error on double-wrapping, or when passed instead of a Java type.

>>> instr = getInstructionAt(getAddr(0x1234))
>>> GhidraWrapper(instr)
<Instruction 0x1234>
>>> GhidraWrapper(GhidraWrapper(instr))
<Instruction 0x1234>
>>> getInstructionBefore(Instruction(instr))
<Instruction 0x1233>

Similarly, equality is based on the underlying Java object.

Source code in ghidralib.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
class GhidraWrapper(object):
    """The base class for all Ghidra wrappers.

    This function tries to be as transparent as possible - for example, it will
    not raise an error on double-wrapping, or when passed instead of a
    Java type.

        >>> instr = getInstructionAt(getAddr(0x1234))
        >>> GhidraWrapper(instr)
        <Instruction 0x1234>
        >>> GhidraWrapper(GhidraWrapper(instr))
        <Instruction 0x1234>
        >>> getInstructionBefore(Instruction(instr))
        <Instruction 0x1233>

    Similarly, equality is based on the underlying Java object."""

    def __init__(self, raw):  # type: (JavaObject|int|str|GhidraWrapper) -> None
        """Initialize the wrapper.

        This function will try to resolve the given object to a Ghidra object.
        The algorithm is as follows:

        * If "raw" is a primitive type (int, long, str, unicode, Address),
          try to resolve it with a static "get" method of the subclass.
        * If "raw" is a GhidraWrapper, unwrap it (so GhidraWrapper(GhidraWrapper(x))
          is always the same as GhidraWrapper(x).
        * If "raw" is None at this point, raise an exception.
        * If the subclass has attribute UNDERLYING_CLASS, assert that the wrapped
          type is of the expected type.
        * Save the final "raw" value."""

        if isinstance(raw, (int, long, str, unicode, GenericAddress)):
            # Someone passed a primitive type to us.
            # If possible, try to resolve it with a "get" method.
            if hasattr(self, "get"):
                new_raw = self.get(raw)  # type: ignore
                if new_raw is None:
                    # Show original data for better error messages
                    raise RuntimeError("Unable to wrap " + str(raw))
                raw = new_raw
            else:
                raise RuntimeError("Unable to wrap a primitive: " + str(raw))

        while isinstance(raw, GhidraWrapper):
            # In case someone tries to Function(Function("main")) us
            raw = raw.raw

        if raw is None:
            raise RuntimeError("Object doesn't exist (refusing to wrap None)")

        # TODO - remove the conditional checks and implement this everywhere
        if hasattr(self, "UNDERLYING_CLASS"):
            wrapped_type = getattr(self, "UNDERLYING_CLASS")
            if not isinstance(raw, wrapped_type):
                raise RuntimeError(
                    "You are trying to wrap {} as {}".format(
                        raw.__class__.__name__, self.__class__.__name__
                    )
                )

        def _java_cast(raw):  # type: (Any) -> JavaObject
            """This function exists only to make type-checker happy"""
            return raw

        self.raw = _java_cast(raw)

    def __str__(self):  # type: () -> str
        """Return a string representation of this object.

        This just forwards the call to the underlying object."""
        return self.raw.__str__()

    def __repr__(self):  # type: () -> str
        """Return a string representation of this object.

        This just forwards the call to the underlying object."""
        return self.raw.__repr__()

    def __tojava__(self, klass):
        """Make it possible to pass this object to Java methods.

        This only works in Jython, I didn't find a way to do this in JPype yet."""
        return self.raw

    def __hash__(self):  # type: () -> int
        """Return the hash of this object.

        This just forwards the call to the underlying object."""
        return self.raw.hashCode()

    def __eq__(self, other):  # type: (object) -> bool
        """Check if this object is equal to another.

        This just forwards the call to the underlying object."""
        if isinstance(other, GhidraWrapper):
            return self.raw.equals(other.raw)
        return self.raw.equals(other)

raw = _java_cast(raw) instance-attribute

__eq__(other)

Check if this object is equal to another.

This just forwards the call to the underlying object.

Source code in ghidralib.py
321
322
323
324
325
326
327
def __eq__(self, other):  # type: (object) -> bool
    """Check if this object is equal to another.

    This just forwards the call to the underlying object."""
    if isinstance(other, GhidraWrapper):
        return self.raw.equals(other.raw)
    return self.raw.equals(other)

__hash__()

Return the hash of this object.

This just forwards the call to the underlying object.

Source code in ghidralib.py
315
316
317
318
319
def __hash__(self):  # type: () -> int
    """Return the hash of this object.

    This just forwards the call to the underlying object."""
    return self.raw.hashCode()

__init__(raw)

Initialize the wrapper.

This function will try to resolve the given object to a Ghidra object. The algorithm is as follows:

  • If "raw" is a primitive type (int, long, str, unicode, Address), try to resolve it with a static "get" method of the subclass.
  • If "raw" is a GhidraWrapper, unwrap it (so GhidraWrapper(GhidraWrapper(x)) is always the same as GhidraWrapper(x).
  • If "raw" is None at this point, raise an exception.
  • If the subclass has attribute UNDERLYING_CLASS, assert that the wrapped type is of the expected type.
  • Save the final "raw" value.
Source code in ghidralib.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
def __init__(self, raw):  # type: (JavaObject|int|str|GhidraWrapper) -> None
    """Initialize the wrapper.

    This function will try to resolve the given object to a Ghidra object.
    The algorithm is as follows:

    * If "raw" is a primitive type (int, long, str, unicode, Address),
      try to resolve it with a static "get" method of the subclass.
    * If "raw" is a GhidraWrapper, unwrap it (so GhidraWrapper(GhidraWrapper(x))
      is always the same as GhidraWrapper(x).
    * If "raw" is None at this point, raise an exception.
    * If the subclass has attribute UNDERLYING_CLASS, assert that the wrapped
      type is of the expected type.
    * Save the final "raw" value."""

    if isinstance(raw, (int, long, str, unicode, GenericAddress)):
        # Someone passed a primitive type to us.
        # If possible, try to resolve it with a "get" method.
        if hasattr(self, "get"):
            new_raw = self.get(raw)  # type: ignore
            if new_raw is None:
                # Show original data for better error messages
                raise RuntimeError("Unable to wrap " + str(raw))
            raw = new_raw
        else:
            raise RuntimeError("Unable to wrap a primitive: " + str(raw))

    while isinstance(raw, GhidraWrapper):
        # In case someone tries to Function(Function("main")) us
        raw = raw.raw

    if raw is None:
        raise RuntimeError("Object doesn't exist (refusing to wrap None)")

    # TODO - remove the conditional checks and implement this everywhere
    if hasattr(self, "UNDERLYING_CLASS"):
        wrapped_type = getattr(self, "UNDERLYING_CLASS")
        if not isinstance(raw, wrapped_type):
            raise RuntimeError(
                "You are trying to wrap {} as {}".format(
                    raw.__class__.__name__, self.__class__.__name__
                )
            )

    def _java_cast(raw):  # type: (Any) -> JavaObject
        """This function exists only to make type-checker happy"""
        return raw

    self.raw = _java_cast(raw)

__repr__()

Return a string representation of this object.

This just forwards the call to the underlying object.

Source code in ghidralib.py
303
304
305
306
307
def __repr__(self):  # type: () -> str
    """Return a string representation of this object.

    This just forwards the call to the underlying object."""
    return self.raw.__repr__()

__str__()

Return a string representation of this object.

This just forwards the call to the underlying object.

Source code in ghidralib.py
297
298
299
300
301
def __str__(self):  # type: () -> str
    """Return a string representation of this object.

    This just forwards the call to the underlying object."""
    return self.raw.__str__()

__tojava__(klass)

Make it possible to pass this object to Java methods.

This only works in Jython, I didn't find a way to do this in JPype yet.

Source code in ghidralib.py
309
310
311
312
313
def __tojava__(self, klass):
    """Make it possible to pass this object to Java methods.

    This only works in Jython, I didn't find a way to do this in JPype yet."""
    return self.raw

Graph

Bases: GenericT, GhidraWrapper

Wraps a Ghidra AttributedGraph object.

We'd like to store arbitrary object in the graph, but it only supports strings for keys (and names). We have a way to convert objects we are interested in to strings - see _get_unique_string() method.

Source code in ghidralib.py
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
class Graph(GenericT, GhidraWrapper):
    """Wraps a Ghidra AttributedGraph object.

    We'd like to store arbitrary object in the graph, but it only supports
    strings for keys (and names). We have a way to convert objects we are
    interested in to strings - see _get_unique_string() method."""

    # TODO: maybe this should be a GDirectedGraph, so we get some algorithms
    # for free, and we can just convert it for display.

    def __init__(self, raw):  # type: (AttributedGraph) -> None
        """Create a new Graph wrapper.

        We have to keep track of additional data, since AttributedGraph is a bit
        clunky and can only store string IDs and string values.

        :param raw: The AttributedGraph object to wrap."""
        GhidraWrapper.__init__(self, raw)
        self.data = {}

    @staticmethod
    def create(name=None, description=None):  # type: (str|None, str|None) -> Graph[Any]
        """Create a new Graph.

        :param name: The name of the graph. If None, a default name will be used.
        :param description: The description of the graph. If
        None, a default description will be used.
        :returns: a new Graph object.
        """
        name = name or "Graph"
        description = description or "Graph"
        graphtype = GraphType(name, description, ArrayList([]), ArrayList([]))
        return Graph(AttributedGraph(name, graphtype, description))

    @staticmethod
    def construct(
        vertexlist, getedges
    ):  # type: (list[T], Callable[[T], list[T]]) -> Graph[T]
        """Create a new Graph from a list of vertices and a function to get edges.

        :param vertexlist: The list of vertices.
        :param getedges: A function that gets a list of destinations from a vertex."""
        g = Graph.create()
        for v in vertexlist:
            g.vertex(v)
        for v in vertexlist:
            for dest in getedges(v):
                if dest in g:
                    g.edge(v, dest)
        return g

    def __contains__(self, vtx):  # type: (T) -> bool
        """Check if a given vertex exists in this graph.

        :param vtx: The ID of the vertex to check."""
        vid = _get_unique_string(vtx)
        vobj = self.raw.getVertex(vid)
        return self.raw.containsVertex(vobj)

    def has_vertex(self, vtx):  # type: (T) -> bool
        """Check if a given vertex exists in this graph.

        :param vtx: The ID of the vertex to check."""
        return vtx in self

    def vertex(self, vtx, name=None):  # type: (T, str|None) -> T
        """Get or create a vertex in this graph.

        :param vtx: The ID of the new vertex, or any "Vertexable" object
        that can be used to identify the vertex.
        :param name: The name of the vertex. If not provided,
        the ID will be used as the name.
        :returns: vtx parameter is returned"""
        vid = _get_unique_string(vtx)
        name = name or str(vtx)
        self.raw.addVertex(vid, name)
        self.data[vid] = vtx
        return vtx

    def edge(self, src, dst):  # type: (T, T) -> None
        """Create an edge between two vertices in this graph.

        :param src: The source vertex ID.
        :param dst: The destination vertex ID."""
        srcid = _get_unique_string(src)
        dstid = _get_unique_string(dst)
        srcobj = self.raw.getVertex(srcid)
        dstobj = self.raw.getVertex(dstid)
        self.raw.addEdge(srcobj, dstobj)

    @property
    def vertices(self):  # type: () -> list[T]
        """Get all vertices in this graph.

        Warning: this constructs the list every time, so it's not a light operation.
        Use vertex_count for counting."""
        return [self.__resolve(vid.getId()) for vid in self.raw.vertexSet()]

    @property
    def vertex_count(self):  # type: () -> int
        """Return the number of vertices in this graph."""
        return self.raw.vertexSet().size()

    def __len__(self):  # type: () -> int
        """Return the number of vertices in this graph.

        To get the number of edges, use edge_count."""
        return self.vertex_count

    @property
    def edges(self):  # type: () -> list[tuple[T, T]]
        """Get all edges in this graph.

        Warning: this constructs the list every time, so it's not a light operation.
        Use edge_count for counting."""
        result = []
        for e in self.raw.edgeSet():
            frm = self.raw.getEdgeSource(e)
            to = self.raw.getEdgeTarget(e)
            frmobj = self.data.get(frm, frm)
            toobj = self.data.get(to, to)
            result.append((frmobj, toobj))
        return result

    @property
    def edge_count(self):  # type: () -> int
        """Return the number of edges in this graph."""
        return self.raw.edgeSet().size()

    @property
    def name(self):  # type: () -> str
        """Return the name of this graph."""
        return self.raw.getName()

    @property
    def description(self):  # type: () -> str
        """Return the description of this graph."""
        return self.raw.getDescription()

    def to_dot(self):  # type: () -> str
        """Return a DOT representation of this graph."""
        result = []
        result.append("digraph {} {{".format(self.name))
        for v in self.raw.vertexSet():
            result.append('    "{}" [label="{}"];'.format(v.getId(), v.getName()))
        for e in self.raw.edgeSet():
            frm = self.raw.getEdgeSource(e)
            to = self.raw.getEdgeTarget(e)
            result.append('    "{}" -> "{}";'.format(frm.getId(), to.getId()))
        result.append("}")
        return "\n".join(result)

    def show(self):  # type: () -> None
        """Display this graph in the Ghidra GUI."""
        graphtype = self.raw.getGraphType()
        description = graphtype.getDescription()
        options = GraphDisplayOptions(graphtype)

        broker = getState().tool.getService(GraphDisplayBroker)
        display = broker.getDefaultGraphDisplay(False, getMonitor())
        display.setGraph(self.raw, options, description, False, getMonitor())

    def __resolve(self, vid):  # type: (str) -> T
        """Resolve a vertex ID to a vertex object.

        :param vid: The ID of the vertex to resolve."""
        if vid in self.data:
            return self.data[vid]
        else:
            return vid  # type: ignore graph created outside of ghidralib?

    def dfs(
        self, origin, callback=lambda _: None
    ):  # type: (T, Callable[[T], None]) -> dict[T, T|None]
        """Perform a depth-first search on this graph, starting from the given vertex.

        The callback will be called for each vertex visited when first visited, and
        the returned value is a dictionary of parent vertices for each visited vertex.

            >>> g = Graph.create()
            >>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
            >>> g.edge(a, b)
            >>> g.edge(b, c)
            >>> g.dfs(a)
            {'a': None, 'b': 'a', 'c': 'b'}

        Warning: This won't reach every node in the graph, if it's not connected.

        :param origin: The ID of the vertex to start the search from.
        :param callback: A callback function to call for each vertex visited.
        :returns: A dictionary of parent vertices for each visited vertex.
        """
        tovisit = [(None, _get_unique_string(origin))]
        visited = set()
        parents = {origin: None}  # type: dict[T, T|None]
        while tovisit:
            parent, vid = tovisit.pop()
            if vid in visited:
                continue
            visited.add(vid)
            vobj = self.__resolve(vid)
            parents[vobj] = parent
            callback(vobj)
            for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
                tovisit.append((vobj, self.raw.getEdgeTarget(edge).getId()))
        return parents

    def toposort(self, origin):  # type: (T) -> list[T]
        """Perform a topological sort on this graph, starting from the given vertex.
        :param origin: The ID of the vertex to start the sort from.

        The order is such that if there is an edge from A to B, then A will come
        before B in the list. This means that if the graph is connected and acyclic
        then "origin" will be the last element in the list.

        On a practical example, for a call graph, this means that if A calls B, then
        B will be before A in the list - so if you want to process from the bottom up,
        you should use the entry point of the program as the origin. In the example
        below, the entry point is "a", "a" calls "b", and "b" calls "c":

            >>> g = Graph.create()
            >>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
            >>> g.edge(a, b)
            >>> g.edge(b, c)
            >>> g.toposort(a)
            ['c', 'b', 'a']

        :param origin: The ID of the origin vertex to start the sort from.
        :returns: a list of vertex IDs in topological order."""
        visited = set()
        result = []

        def dfs(vid):
            visited.add(vid)
            for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
                target = self.raw.getEdgeTarget(edge)
                if target.getId() not in visited:
                    dfs(target.getId())
            result.append(self.__resolve(vid))

        dfs(_get_unique_string(origin))
        for vid in self.raw.vertexSet():
            if vid.getId() not in visited:
                dfs(vid.getId())
        return result

    def bfs(
        self, origin, callback=lambda _: None
    ):  # type: (T, Callable[[T], None]) -> dict[T, T|None]
        """Perform a breadth-first search on this graph, starting from the given vertex.

        The callback will be called for each vertex visited when first visited, and
        the returned value is a dictionary of parent vertices for each visited vertex.

            >>> g = Graph.create()
            >>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
            >>> g.edge(a, b)
            >>> g.edge(b, c)
            >>> g.bfs(a)
            {'a': None, 'b': 'a', 'c': 'b'}

        Warning: This won't reach every node in the graph, if it's not connected.

        :param origin: The ID of the vertex to start the search from.
        :param callback: A callback function to call for each vertex visited.
        """
        tovisit = [(None, _get_unique_string(origin))]
        visited = set()
        parents = {origin: None}  # type: dict[T, T|None]
        while tovisit:
            parent, vid = tovisit.pop(0)
            if vid in visited:
                continue
            visited.add(vid)
            vobj = self.__resolve(vid)
            parents[vobj] = parent
            callback(vobj)
            for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
                tovisit.append((vobj, self.raw.getEdgeTarget(edge).getId()))
        return parents

data = {} instance-attribute

description property

Return the description of this graph.

edge_count property

Return the number of edges in this graph.

edges property

Get all edges in this graph.

Warning: this constructs the list every time, so it's not a light operation. Use edge_count for counting.

name property

Return the name of this graph.

vertex_count property

Return the number of vertices in this graph.

vertices property

Get all vertices in this graph.

Warning: this constructs the list every time, so it's not a light operation. Use vertex_count for counting.

__contains__(vtx)

Check if a given vertex exists in this graph.

Parameters:
  • vtx

    The ID of the vertex to check.

Source code in ghidralib.py
490
491
492
493
494
495
496
def __contains__(self, vtx):  # type: (T) -> bool
    """Check if a given vertex exists in this graph.

    :param vtx: The ID of the vertex to check."""
    vid = _get_unique_string(vtx)
    vobj = self.raw.getVertex(vid)
    return self.raw.containsVertex(vobj)

__init__(raw)

Create a new Graph wrapper.

We have to keep track of additional data, since AttributedGraph is a bit clunky and can only store string IDs and string values.

Parameters:
  • raw

    The AttributedGraph object to wrap.

Source code in ghidralib.py
449
450
451
452
453
454
455
456
457
def __init__(self, raw):  # type: (AttributedGraph) -> None
    """Create a new Graph wrapper.

    We have to keep track of additional data, since AttributedGraph is a bit
    clunky and can only store string IDs and string values.

    :param raw: The AttributedGraph object to wrap."""
    GhidraWrapper.__init__(self, raw)
    self.data = {}

__len__()

Return the number of vertices in this graph.

To get the number of edges, use edge_count.

Source code in ghidralib.py
542
543
544
545
546
def __len__(self):  # type: () -> int
    """Return the number of vertices in this graph.

    To get the number of edges, use edge_count."""
    return self.vertex_count

__resolve(vid)

Resolve a vertex ID to a vertex object.

Parameters:
  • vid

    The ID of the vertex to resolve.

Source code in ghidralib.py
601
602
603
604
605
606
607
608
def __resolve(self, vid):  # type: (str) -> T
    """Resolve a vertex ID to a vertex object.

    :param vid: The ID of the vertex to resolve."""
    if vid in self.data:
        return self.data[vid]
    else:
        return vid  # type: ignore graph created outside of ghidralib?

bfs(origin, callback=lambda _: None)

Perform a breadth-first search on this graph, starting from the given vertex.

The callback will be called for each vertex visited when first visited, and the returned value is a dictionary of parent vertices for each visited vertex.

>>> g = Graph.create()
>>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
>>> g.edge(a, b)
>>> g.edge(b, c)
>>> g.bfs(a)
{'a': None, 'b': 'a', 'c': 'b'}

Warning: This won't reach every node in the graph, if it's not connected.

Parameters:
  • origin

    The ID of the vertex to start the search from.

  • callback

    A callback function to call for each vertex visited.

Source code in ghidralib.py
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
def bfs(
    self, origin, callback=lambda _: None
):  # type: (T, Callable[[T], None]) -> dict[T, T|None]
    """Perform a breadth-first search on this graph, starting from the given vertex.

    The callback will be called for each vertex visited when first visited, and
    the returned value is a dictionary of parent vertices for each visited vertex.

        >>> g = Graph.create()
        >>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
        >>> g.edge(a, b)
        >>> g.edge(b, c)
        >>> g.bfs(a)
        {'a': None, 'b': 'a', 'c': 'b'}

    Warning: This won't reach every node in the graph, if it's not connected.

    :param origin: The ID of the vertex to start the search from.
    :param callback: A callback function to call for each vertex visited.
    """
    tovisit = [(None, _get_unique_string(origin))]
    visited = set()
    parents = {origin: None}  # type: dict[T, T|None]
    while tovisit:
        parent, vid = tovisit.pop(0)
        if vid in visited:
            continue
        visited.add(vid)
        vobj = self.__resolve(vid)
        parents[vobj] = parent
        callback(vobj)
        for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
            tovisit.append((vobj, self.raw.getEdgeTarget(edge).getId()))
    return parents

construct(vertexlist, getedges) staticmethod

Create a new Graph from a list of vertices and a function to get edges.

Parameters:
  • vertexlist

    The list of vertices.

  • getedges

    A function that gets a list of destinations from a vertex.

Source code in ghidralib.py
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
@staticmethod
def construct(
    vertexlist, getedges
):  # type: (list[T], Callable[[T], list[T]]) -> Graph[T]
    """Create a new Graph from a list of vertices and a function to get edges.

    :param vertexlist: The list of vertices.
    :param getedges: A function that gets a list of destinations from a vertex."""
    g = Graph.create()
    for v in vertexlist:
        g.vertex(v)
    for v in vertexlist:
        for dest in getedges(v):
            if dest in g:
                g.edge(v, dest)
    return g

create(name=None, description=None) staticmethod

Create a new Graph.

Parameters:
  • name

    The name of the graph. If None, a default name will be used.

  • description

    The description of the graph. If None, a default description will be used.

Returns:
  • a new Graph object.

Source code in ghidralib.py
459
460
461
462
463
464
465
466
467
468
469
470
471
@staticmethod
def create(name=None, description=None):  # type: (str|None, str|None) -> Graph[Any]
    """Create a new Graph.

    :param name: The name of the graph. If None, a default name will be used.
    :param description: The description of the graph. If
    None, a default description will be used.
    :returns: a new Graph object.
    """
    name = name or "Graph"
    description = description or "Graph"
    graphtype = GraphType(name, description, ArrayList([]), ArrayList([]))
    return Graph(AttributedGraph(name, graphtype, description))

dfs(origin, callback=lambda _: None)

Perform a depth-first search on this graph, starting from the given vertex.

The callback will be called for each vertex visited when first visited, and the returned value is a dictionary of parent vertices for each visited vertex.

>>> g = Graph.create()
>>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
>>> g.edge(a, b)
>>> g.edge(b, c)
>>> g.dfs(a)
{'a': None, 'b': 'a', 'c': 'b'}

Warning: This won't reach every node in the graph, if it's not connected.

Parameters:
  • origin

    The ID of the vertex to start the search from.

  • callback

    A callback function to call for each vertex visited.

Returns:
  • A dictionary of parent vertices for each visited vertex.

Source code in ghidralib.py
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
def dfs(
    self, origin, callback=lambda _: None
):  # type: (T, Callable[[T], None]) -> dict[T, T|None]
    """Perform a depth-first search on this graph, starting from the given vertex.

    The callback will be called for each vertex visited when first visited, and
    the returned value is a dictionary of parent vertices for each visited vertex.

        >>> g = Graph.create()
        >>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
        >>> g.edge(a, b)
        >>> g.edge(b, c)
        >>> g.dfs(a)
        {'a': None, 'b': 'a', 'c': 'b'}

    Warning: This won't reach every node in the graph, if it's not connected.

    :param origin: The ID of the vertex to start the search from.
    :param callback: A callback function to call for each vertex visited.
    :returns: A dictionary of parent vertices for each visited vertex.
    """
    tovisit = [(None, _get_unique_string(origin))]
    visited = set()
    parents = {origin: None}  # type: dict[T, T|None]
    while tovisit:
        parent, vid = tovisit.pop()
        if vid in visited:
            continue
        visited.add(vid)
        vobj = self.__resolve(vid)
        parents[vobj] = parent
        callback(vobj)
        for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
            tovisit.append((vobj, self.raw.getEdgeTarget(edge).getId()))
    return parents

edge(src, dst)

Create an edge between two vertices in this graph.

Parameters:
  • src

    The source vertex ID.

  • dst

    The destination vertex ID.

Source code in ghidralib.py
518
519
520
521
522
523
524
525
526
527
def edge(self, src, dst):  # type: (T, T) -> None
    """Create an edge between two vertices in this graph.

    :param src: The source vertex ID.
    :param dst: The destination vertex ID."""
    srcid = _get_unique_string(src)
    dstid = _get_unique_string(dst)
    srcobj = self.raw.getVertex(srcid)
    dstobj = self.raw.getVertex(dstid)
    self.raw.addEdge(srcobj, dstobj)

has_vertex(vtx)

Check if a given vertex exists in this graph.

Parameters:
  • vtx

    The ID of the vertex to check.

Source code in ghidralib.py
498
499
500
501
502
def has_vertex(self, vtx):  # type: (T) -> bool
    """Check if a given vertex exists in this graph.

    :param vtx: The ID of the vertex to check."""
    return vtx in self

show()

Display this graph in the Ghidra GUI.

Source code in ghidralib.py
591
592
593
594
595
596
597
598
599
def show(self):  # type: () -> None
    """Display this graph in the Ghidra GUI."""
    graphtype = self.raw.getGraphType()
    description = graphtype.getDescription()
    options = GraphDisplayOptions(graphtype)

    broker = getState().tool.getService(GraphDisplayBroker)
    display = broker.getDefaultGraphDisplay(False, getMonitor())
    display.setGraph(self.raw, options, description, False, getMonitor())

to_dot()

Return a DOT representation of this graph.

Source code in ghidralib.py
578
579
580
581
582
583
584
585
586
587
588
589
def to_dot(self):  # type: () -> str
    """Return a DOT representation of this graph."""
    result = []
    result.append("digraph {} {{".format(self.name))
    for v in self.raw.vertexSet():
        result.append('    "{}" [label="{}"];'.format(v.getId(), v.getName()))
    for e in self.raw.edgeSet():
        frm = self.raw.getEdgeSource(e)
        to = self.raw.getEdgeTarget(e)
        result.append('    "{}" -> "{}";'.format(frm.getId(), to.getId()))
    result.append("}")
    return "\n".join(result)

toposort(origin)

Perform a topological sort on this graph, starting from the given vertex.

Parameters:
  • origin

    The ID of the vertex to start the sort from. The order is such that if there is an edge from A to B, then A will come before B in the list. This means that if the graph is connected and acyclic then "origin" will be the last element in the list. On a practical example, for a call graph, this means that if A calls B, then B will be before A in the list - so if you want to process from the bottom up, you should use the entry point of the program as the origin. In the example below, the entry point is "a", "a" calls "b", and "b" calls "c": >>> g = Graph.create() >>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c") >>> g.edge(a, b) >>> g.edge(b, c) >>> g.toposort(a) ['c', 'b', 'a']

Returns:
  • a list of vertex IDs in topological order.

Source code in ghidralib.py
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
def toposort(self, origin):  # type: (T) -> list[T]
    """Perform a topological sort on this graph, starting from the given vertex.
    :param origin: The ID of the vertex to start the sort from.

    The order is such that if there is an edge from A to B, then A will come
    before B in the list. This means that if the graph is connected and acyclic
    then "origin" will be the last element in the list.

    On a practical example, for a call graph, this means that if A calls B, then
    B will be before A in the list - so if you want to process from the bottom up,
    you should use the entry point of the program as the origin. In the example
    below, the entry point is "a", "a" calls "b", and "b" calls "c":

        >>> g = Graph.create()
        >>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
        >>> g.edge(a, b)
        >>> g.edge(b, c)
        >>> g.toposort(a)
        ['c', 'b', 'a']

    :param origin: The ID of the origin vertex to start the sort from.
    :returns: a list of vertex IDs in topological order."""
    visited = set()
    result = []

    def dfs(vid):
        visited.add(vid)
        for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
            target = self.raw.getEdgeTarget(edge)
            if target.getId() not in visited:
                dfs(target.getId())
        result.append(self.__resolve(vid))

    dfs(_get_unique_string(origin))
    for vid in self.raw.vertexSet():
        if vid.getId() not in visited:
            dfs(vid.getId())
    return result

vertex(vtx, name=None)

Get or create a vertex in this graph.

Parameters:
  • vtx

    The ID of the new vertex, or any "Vertexable" object that can be used to identify the vertex.

  • name

    The name of the vertex. If not provided, the ID will be used as the name.

Returns:
  • vtx parameter is returned

Source code in ghidralib.py
504
505
506
507
508
509
510
511
512
513
514
515
516
def vertex(self, vtx, name=None):  # type: (T, str|None) -> T
    """Get or create a vertex in this graph.

    :param vtx: The ID of the new vertex, or any "Vertexable" object
    that can be used to identify the vertex.
    :param name: The name of the vertex. If not provided,
    the ID will be used as the name.
    :returns: vtx parameter is returned"""
    vid = _get_unique_string(vtx)
    name = name or str(vtx)
    self.raw.addVertex(vid, name)
    self.data[vid] = vtx
    return vtx

HighFunction

Bases: GhidraWrapper

Source code in ghidralib.py
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
class HighFunction(GhidraWrapper):
    @staticmethod
    def get(address):  # type: (JavaObject|str|Addr) -> HighFunction|None
        """Get a HighFunction at a given address, or None if there is none."""
        if isinstance(address, GhHighFunction):
            return HighFunction(address)
        func = Function.get(address)
        if func is None:
            return None
        return func.high_function

    @property
    def function(self):  # type: () -> Function
        """Get the underlying function of this high function."""
        return Function(self.raw.getFunction())

    def get_pcode_at(self, address):  # type: (Addr) -> list[PcodeOp]
        """Get a list of PcodeOps at a given address.

        This list may be empty even if there are instructions at that address."""
        address = resolve(address)
        return [PcodeOp(raw) for raw in self.raw.getPcodeOps(address)]

    @property
    def pcode(self):  # type: () -> list[PcodeOp]
        """Get a list of all high PcodeOps in this function.

        Note: high PcodeOps are called PcodeOpAST internally."""
        return [PcodeOp(raw) for raw in self.raw.getPcodeOps()]

    @property
    def data_flow(self):  # type: () -> Graph[PcodeOp]
        """Get a data flow graph of varnodes in this function.

        Note: I don't think this method is currently very useful, but you can
        use it to easily get information about all varnodes that impact a value of
        another varnode

        :returns: A graph where vertexes are varnodes, and edges mean that
        target varnode is a result of operation on source varnodes."""
        g = Graph.create()
        for op in self.pcode:
            if op.output:
                for inp in op.inputs:
                    g.vertex(op.output)
                    g.vertex(inp)
                    g.edge(inp, op.output)
        return g

    @property
    def basicblocks(self):  # type: () -> list[PcodeBlock]
        """Get a list of basic blocks in this high function."""
        return [PcodeBlock(raw) for raw in self.raw.getBasicBlocks()]

    @property
    def pcode_tree(self):  # type: () -> BlockGraph
        """Get an AST-like representation of the function's Pcode.

        Warning: this method needs to decompile the function, and is therefore slow."""
        edge_map = {}
        ingraph = GhBlockGraph()
        for block in self.basicblocks:
            gb = BlockCopy(block.raw, block.raw.getStart())
            ingraph.addBlock(gb)
            edge_map[block.raw] = gb

        for block in self.basicblocks:
            for edge in block.outgoing_edges:
                ingraph.addEdge(edge_map[block.raw], edge_map[edge.raw])

        ingraph.setIndices()
        decompiler = DecompInterface()
        decompiler.openProgram(Program.current())
        outgraph = decompiler.structureGraph(ingraph, 0, getMonitor())
        return BlockGraph(outgraph)

    @property
    def varnodes(self):  # type: () -> list[Varnode]
        """Get all varnodes used in this function."""
        return [Varnode(raw) for raw in self.raw.locRange()]

    @property
    def symbols(self):  # type: () -> list[HighSymbol]
        """Get high symbols used in this function (including parameters)."""
        sm = self.raw.getLocalSymbolMap()
        return [HighSymbol(symbol) for symbol in sm.getSymbols()]

    @property
    def variables(self):  # type: () -> list[HighVariable]
        """Get high variables defined in this function."""
        result = []
        for sym in self.symbols:
            var = sym.variable
            if var is not None:
                result.append(var)
        return result

    def __eq__(self, other):  # type: (object) -> bool
        """Compare two high functions.

        Fun fact - Ghidra doesn't know how to do this."""
        if not isinstance(other, HighFunction):
            return False
        return self.function == other.function

basicblocks property

Get a list of basic blocks in this high function.

data_flow property

Get a data flow graph of varnodes in this function.

Note: I don't think this method is currently very useful, but you can use it to easily get information about all varnodes that impact a value of another varnode

Returns:
  • A graph where vertexes are varnodes, and edges mean that target varnode is a result of operation on source varnodes.

function property

Get the underlying function of this high function.

pcode property

Get a list of all high PcodeOps in this function.

Note: high PcodeOps are called PcodeOpAST internally.

pcode_tree property

Get an AST-like representation of the function's Pcode.

Warning: this method needs to decompile the function, and is therefore slow.

symbols property

Get high symbols used in this function (including parameters).

variables property

Get high variables defined in this function.

varnodes property

Get all varnodes used in this function.

__eq__(other)

Compare two high functions.

Fun fact - Ghidra doesn't know how to do this.

Source code in ghidralib.py
1324
1325
1326
1327
1328
1329
1330
def __eq__(self, other):  # type: (object) -> bool
    """Compare two high functions.

    Fun fact - Ghidra doesn't know how to do this."""
    if not isinstance(other, HighFunction):
        return False
    return self.function == other.function

get(address) staticmethod

Get a HighFunction at a given address, or None if there is none.

Source code in ghidralib.py
1228
1229
1230
1231
1232
1233
1234
1235
1236
@staticmethod
def get(address):  # type: (JavaObject|str|Addr) -> HighFunction|None
    """Get a HighFunction at a given address, or None if there is none."""
    if isinstance(address, GhHighFunction):
        return HighFunction(address)
    func = Function.get(address)
    if func is None:
        return None
    return func.high_function

get_pcode_at(address)

Get a list of PcodeOps at a given address.

This list may be empty even if there are instructions at that address.

Source code in ghidralib.py
1243
1244
1245
1246
1247
1248
def get_pcode_at(self, address):  # type: (Addr) -> list[PcodeOp]
    """Get a list of PcodeOps at a given address.

    This list may be empty even if there are instructions at that address."""
    address = resolve(address)
    return [PcodeOp(raw) for raw in self.raw.getPcodeOps(address)]

HighSymbol

Bases: GhidraWrapper

Source code in ghidralib.py
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
class HighSymbol(GhidraWrapper):
    def rename(
        self, new_name, source=SourceType.USER_DEFINED
    ):  # type: (str, SourceType) -> None
        """Rename this high symbol.

        :param new_name: The new name of the symbol
        :param source: The source of the symbol"""
        HighFunctionDBUtil.updateDBVariable(self.raw, new_name, None, source)

    @property
    def size(self):  # type: () -> int
        """Return the size of this symbol in bytes"""
        return self.raw.getSize()

    @property
    def data_type(self):  # type: () -> DataType
        """Return the data type of this symbol"""
        return DataType(self.raw.getDataType())

    @property
    def variable(self):  # type: () -> HighVariable|None
        """Return the high variable associated with this symbol, if any.

        The symbol may have multiple HighVariables associated with it.
        This method returns the biggest one."""
        raw = self.raw.getHighVariable()
        if raw is None:
            return None
        return HighVariable(raw)

    @property
    def name(self):  # type: () -> str
        """Return the name of this symbol"""
        return self.raw.getName()

    @property
    def symbol(self):  # type: () -> Symbol|None
        """Get the corresponding symbol, if it exists."""
        raw = self.raw.getSymbol()
        if raw is None:
            return None
        return Symbol(raw)

    @property
    def is_this_pointer(self):  # type: () -> bool
        """Return True if this symbol is a "this" pointer for a class"""
        return self.raw.isThisPointer()

data_type property

Return the data type of this symbol

is_this_pointer property

Return True if this symbol is a "this" pointer for a class

name property

Return the name of this symbol

size property

Return the size of this symbol in bytes

symbol property

Get the corresponding symbol, if it exists.

variable property

Return the high variable associated with this symbol, if any.

The symbol may have multiple HighVariables associated with it. This method returns the biggest one.

rename(new_name, source=SourceType.USER_DEFINED)

Rename this high symbol.

Parameters:
  • new_name

    The new name of the symbol

  • source

    The source of the symbol

Source code in ghidralib.py
802
803
804
805
806
807
808
809
def rename(
    self, new_name, source=SourceType.USER_DEFINED
):  # type: (str, SourceType) -> None
    """Rename this high symbol.

    :param new_name: The new name of the symbol
    :param source: The source of the symbol"""
    HighFunctionDBUtil.updateDBVariable(self.raw, new_name, None, source)

HighVariable

Bases: GhidraWrapper

Source code in ghidralib.py
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
class HighVariable(GhidraWrapper):
    @property
    def symbol(self):  # type: () -> HighSymbol
        return HighSymbol(self.raw.getSymbol())

    def rename(self, new_name):  # type: (str) -> None
        """Rename this high variable."""
        self.symbol.rename(new_name)

    @property
    def size(self):  # type: () -> int
        """Return the size of this variable in bytes"""
        return self.raw.getSize()

    @property
    def data_type(self):  # type: () -> DataType
        """Return the data type of this variable"""
        return DataType(self.raw.getDataType())

    @property
    def name(self):  # type: () -> str
        """Return the name of this variable"""
        return self.raw.getName()

    @property
    def varnode(self):  # type: () -> Varnode
        """Return the Varnode that represents this variable"""
        return Varnode(self.raw.getRepresentative())

    @property
    def varnodes(self):  # type: () -> list[Varnode]
        """Return all Varnodes that represent this variable at some point"""
        return [Varnode(vn) for vn in self.raw.getInstances()]

    @property
    def is_unaffected(self):  # type: () -> bool
        """Return True if ALL varnodes of this variable are is unaffected."""
        return any(vn.is_unaffected for vn in self.varnodes)

    @property
    def is_persistent(self):  # type: () -> bool
        """Return True if ALL varnodes of this variable are persistent."""
        return any(vn.is_persistent for vn in self.varnodes)

    @property
    def is_addr_tied(self):  # type: () -> bool
        """Return True if ALL varnodes of this variable are addr tied."""
        return any(vn.is_addr_tied for vn in self.varnodes)

    @property
    def is_input(self):  # type: () -> bool
        """Return True if ALL varnodes of this variable are input."""
        return any(vn.is_input for vn in self.varnodes)

    @property
    def is_free(self):  # type: () -> bool
        """Return True if ALL varnodes of this variable are free."""
        return all(vn.is_free for vn in self.varnodes)

data_type property

Return the data type of this variable

is_addr_tied property

Return True if ALL varnodes of this variable are addr tied.

is_free property

Return True if ALL varnodes of this variable are free.

is_input property

Return True if ALL varnodes of this variable are input.

is_persistent property

Return True if ALL varnodes of this variable are persistent.

is_unaffected property

Return True if ALL varnodes of this variable are is unaffected.

name property

Return the name of this variable

size property

Return the size of this variable in bytes

symbol property

varnode property

Return the Varnode that represents this variable

varnodes property

Return all Varnodes that represent this variable at some point

rename(new_name)

Rename this high variable.

Source code in ghidralib.py
746
747
748
def rename(self, new_name):  # type: (str) -> None
    """Rename this high variable."""
    self.symbol.rename(new_name)

Instruction

Bases: GhidraWrapper, BodyTrait

Wraps a Ghidra Instruction object

Source code in ghidralib.py
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
class Instruction(GhidraWrapper, BodyTrait):
    """Wraps a Ghidra Instruction object"""

    @staticmethod
    def get(address):  # type: (JavaObject|str|Addr) -> Instruction|None
        """Get an instruction at the address, or None if not found.

        Note: This will return None if the instruction is not defined in Ghidra
        at the given address. If you want to disassemble an address, not necessarily
        defined in Ghidra, try :func:`disassemble_at` instead.

        :param address: The address of the instruction.
        :return: The instruction at the address, or None if not found."""
        if can_resolve(address):
            raw = getInstructionAt(resolve(address))
        else:
            raw = address
        if raw is None:
            return None
        return Instruction(raw)

    @staticmethod
    def all():  # type: () -> list[Instruction]
        """Get all instruction defined in the current program."""
        raw_instructions = Program.current().getListing().getInstructions(True)
        return [Instruction(raw) for raw in raw_instructions]

    @staticmethod
    def create(address):  # type: (Addr) -> Instruction
        """Create an instruction at the given address.

        Note: this will force ghidra to disassemble at the given address,
        and return the created instruction. If you want to actually change the
        instruction at the given address, use `assemble_at` instead.

        If you want to just create an instruction object, use `assemble` method.

        :param address: The address where instruction should be created.
        """
        addr = resolve(address)
        disassemble(addr)
        return Instruction(addr)

    @property
    def mnemonic(self):  # type: () -> str
        """Get the mnemonic of this instruction."""
        return self.raw.getMnemonicString()

    @property
    def next(self):  # type: () -> Instruction
        """Get the next instruction."""
        return Instruction(self.raw.getNext())

    @property
    def previous(self):  # type: () -> Instruction
        """Get the previous instruction."""
        return Instruction(self.raw.getPrevious())

    prev = previous

    @property
    def flows(self):  # type: () -> list[int]
        """Get a set of possible flows (next executed addresses).

        Note: this DOES NOT INCLUDE a fallthrough. A strange design decision IMO,
        but I'm being faithful to Ghidra API."""
        return [addr.getOffset() for addr in self.raw.getFlows()]

    @property
    def all_flows(self):  # type: () -> list[int]
        """Get a set of possible flows (next executed addresses).

        Note: this INCLUDES fallthrough."""
        fallthrough = self.fallthrough
        return self.flows + ([fallthrough] if fallthrough else [])

    @property
    def pcode(self):  # type: () -> list[PcodeOp]
        """Get a list of Pcode operations that this instruction was parsed to"""
        return [PcodeOp(raw) for raw in self.raw.getPcode()]

    @property
    def output_varnodes(self):  # type: () -> list[Varnode]
        """Get a list of output (LOW) varnodes for this instruction."""
        return [op.output for op in self.pcode if op.output]

    @property
    def input_varnodes(self):  # type: () -> list[Varnode]
        """Get a list of output (LOW) varnodes for this instruction."""
        return [inp for op in self.pcode if op.output for inp in op.inputs]

    @property
    def high_pcode(self):  # type: () -> list[PcodeOp]
        """Get high Pcode for this instruction.

        WARNING: do not use this in a loop. Use Function.high_pcode instead."""
        return PcodeOp.get_high_pcode_at(self.address)

    @property
    def xrefs_from(self):  # type: () -> list[Reference]
        """Get a list of references from this instruction."""
        return [Reference(raw) for raw in self.raw.getReferencesFrom()]

    @property
    def xrefs_to(self):  # type: () -> list[Reference]
        """Get a list of references to this instruction."""
        return [Reference(raw) for raw in self.raw.getReferenceIteratorTo()]

    @property
    def bytes(self):  # type: () -> bytes
        """Get the bytes of this instruction."""
        return _bytes_from_bytelist(self.raw.getBytes())

    @property
    def length(self):  # type: () -> int
        """Get the length of this instruction in bytes."""
        return self.raw.getLength()

    def __len__(self):  # type: () -> int
        """Get the length of this instruction in bytes."""
        return self.length

    def __convert_operand(self, operand):  # type: (JavaObject) -> Operand
        """Convert an operand to a scalar or address."""
        from ghidra.program.model.address import Address  # type: ignore
        from ghidra.program.model.scalar import Scalar  # type: ignore

        if isinstance(operand, GhRegister):
            return Operand(_python_str(operand.getName()))
        elif isinstance(operand, Address):
            return Operand(operand.getOffset())
        elif isinstance(operand, Scalar):
            return Operand(operand.getValue())
        elif _is_array(operand):
            operands = [self.__convert_operand(o).raw for o in operand]  # type: ignore
            if len(operands) == 1:
                # Unwrap the operands if there is only one operand
                return Operand(operands[0])
            return Operand(operands)  # type: ignore (we know there are no nested lists)
        else:
            raise RuntimeError("Don't know how to read operand {}".format(operand))

    def operand(self, ndx):  # type: (int) -> Operand
        """Get the nth operand of this instruction as an object."""
        operand = self.raw.getOpObjects(ndx)
        return self.__convert_operand(operand)

    @property
    def address(self):  # type: () -> int
        """Get the address of this instruction."""
        return self.raw.getAddress().getOffset()

    @property
    def operands(self):  # type: () -> list[Operand]
        """Return operands as primitive values (int or a string representation).

        More specifically, this will convert constants and addresses into integers,
        and for registers the name will be returned.

        If you know operand type, call .scalar(), .register() or .list() instead."""
        return [self.operand(i) for i in range(self.raw.getNumOperands())]

    @property
    def operand_values(self):  # type: () -> list[int|str|list[int|str]]
        """Return operands as primitive values (int or a string representation).

        This is equivalent to calling .operands() and then calling .value()
        on each operand."""
        return [operand.value for operand in self.operands]

    @property
    def flow_type(self):  # type: () -> FlowType
        """Get the flow type of this instruction.

        For example, for x86 JMP this will return RefType.UNCONDITIONAL_JUMP"""
        return FlowType(self.raw.getFlowType())

    # int opIndex, Address refAddr, RefType type, SourceType sourceType
    def add_operand_reference(
        self, op_ndx, ref_addr, ref_type, src_type=SourceType.USER_DEFINED
    ):  # type: (int, Addr, RefType, SourceType) -> None
        """Add a reference to an operand of this instruction."""
        self.raw.addOperandReference(op_ndx, resolve(ref_addr), ref_type.raw, src_type)

    @property
    def body(self):  # type: () -> AddressSet
        """Get the address range this instruction."""
        return AddressSet.create(self.address, self.length)

    @property
    def has_fallthrough(self):  # type: () -> bool
        """Return true if this instruction has a fallthrough."""
        return self.raw.hasFallthrough()

    @property
    def fallthrough(self):  # type: () -> int|None
        """Get the fallthrough address (next address executed), if any.

        For normal instruction, this is the next instruction address. For jumps,
        this is None. Can be overriden by fallthrough override."""
        fall = self.raw.getFallThrough()
        if not fall:
            return None
        return fall.getOffset()

    def set_fallthrough_override(self, value):  # type: (Addr) -> None
        """Override the fallthrough address for this instruction.

        This sets the next instruction that will be executed after this
        instruction, assuming the current instruction doesn't jump anywhere.
        You can clear this with clear_fallthrough_override

        :param value: new fallthrough address"""
        self.raw.setFallThrough(resolve(value))

    def clear_fallthrough_override(self):  # type: () -> None
        """This clears the fallthrough override for this instruction.

        Alias for del self.fallthrough_override"""
        self.raw.clearFallThroughOverride()

    @property
    def has_fallthrough_override(self):  # type: () -> bool
        """Return true if this instruction fallthrough was overriden."""
        return self.raw.isFallThroughOverridden()

    def write_jumptable(self, targets):  # type: (list[Addr]) -> None
        """Provide a list of addresses where this instruction may jump.

        Warning: For this to work, the instruction must be a part of a function.

        This is useful for fixing unrecognised switches, for example.

        Note: the new switch instruction will use all references of type
        COMPUTED_JUMP already defined for the instruction
        (maybe we should clear them first?)."""

        targets = [resolve(addr) for addr in targets]

        for dest in targets:
            disassemble(dest)
            self.add_operand_reference(0, dest, RefType.COMPUTED_JUMP)

        func = Function.get(self.address)
        if func is None:
            raise RuntimeError("Instruction is not part of a function")

        targetlist = ArrayList([dest for dest in targets])
        jumpTab = JumpTable(toAddr(self.address), targetlist, True)
        jumpTab.writeOverride(func.raw)
        CreateFunctionCmd.fixupFunctionBody(Program.current(), func.raw, getMonitor())

address property

Get the address of this instruction.

all_flows property

Get a set of possible flows (next executed addresses).

Note: this INCLUDES fallthrough.

body property

Get the address range this instruction.

bytes property

Get the bytes of this instruction.

fallthrough property

Get the fallthrough address (next address executed), if any.

For normal instruction, this is the next instruction address. For jumps, this is None. Can be overriden by fallthrough override.

flow_type property

Get the flow type of this instruction.

For example, for x86 JMP this will return RefType.UNCONDITIONAL_JUMP

flows property

Get a set of possible flows (next executed addresses).

Note: this DOES NOT INCLUDE a fallthrough. A strange design decision IMO, but I'm being faithful to Ghidra API.

has_fallthrough property

Return true if this instruction has a fallthrough.

has_fallthrough_override property

Return true if this instruction fallthrough was overriden.

high_pcode property

Get high Pcode for this instruction.

WARNING: do not use this in a loop. Use Function.high_pcode instead.

input_varnodes property

Get a list of output (LOW) varnodes for this instruction.

length property

Get the length of this instruction in bytes.

mnemonic property

Get the mnemonic of this instruction.

next property

Get the next instruction.

operand_values property

Return operands as primitive values (int or a string representation).

This is equivalent to calling .operands() and then calling .value() on each operand.

operands property

Return operands as primitive values (int or a string representation).

More specifically, this will convert constants and addresses into integers, and for registers the name will be returned.

If you know operand type, call .scalar(), .register() or .list() instead.

output_varnodes property

Get a list of output (LOW) varnodes for this instruction.

pcode property

Get a list of Pcode operations that this instruction was parsed to

prev = previous class-attribute instance-attribute

previous property

Get the previous instruction.

xrefs_from property

Get a list of references from this instruction.

xrefs_to property

Get a list of references to this instruction.

__convert_operand(operand)

Convert an operand to a scalar or address.

Source code in ghidralib.py
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
def __convert_operand(self, operand):  # type: (JavaObject) -> Operand
    """Convert an operand to a scalar or address."""
    from ghidra.program.model.address import Address  # type: ignore
    from ghidra.program.model.scalar import Scalar  # type: ignore

    if isinstance(operand, GhRegister):
        return Operand(_python_str(operand.getName()))
    elif isinstance(operand, Address):
        return Operand(operand.getOffset())
    elif isinstance(operand, Scalar):
        return Operand(operand.getValue())
    elif _is_array(operand):
        operands = [self.__convert_operand(o).raw for o in operand]  # type: ignore
        if len(operands) == 1:
            # Unwrap the operands if there is only one operand
            return Operand(operands[0])
        return Operand(operands)  # type: ignore (we know there are no nested lists)
    else:
        raise RuntimeError("Don't know how to read operand {}".format(operand))

__len__()

Get the length of this instruction in bytes.

Source code in ghidralib.py
1698
1699
1700
def __len__(self):  # type: () -> int
    """Get the length of this instruction in bytes."""
    return self.length

add_operand_reference(op_ndx, ref_addr, ref_type, src_type=SourceType.USER_DEFINED)

Add a reference to an operand of this instruction.

Source code in ghidralib.py
1758
1759
1760
1761
1762
def add_operand_reference(
    self, op_ndx, ref_addr, ref_type, src_type=SourceType.USER_DEFINED
):  # type: (int, Addr, RefType, SourceType) -> None
    """Add a reference to an operand of this instruction."""
    self.raw.addOperandReference(op_ndx, resolve(ref_addr), ref_type.raw, src_type)

all() staticmethod

Get all instruction defined in the current program.

Source code in ghidralib.py
1601
1602
1603
1604
1605
@staticmethod
def all():  # type: () -> list[Instruction]
    """Get all instruction defined in the current program."""
    raw_instructions = Program.current().getListing().getInstructions(True)
    return [Instruction(raw) for raw in raw_instructions]

clear_fallthrough_override()

This clears the fallthrough override for this instruction.

Alias for del self.fallthrough_override

Source code in ghidralib.py
1795
1796
1797
1798
1799
def clear_fallthrough_override(self):  # type: () -> None
    """This clears the fallthrough override for this instruction.

    Alias for del self.fallthrough_override"""
    self.raw.clearFallThroughOverride()

create(address) staticmethod

Create an instruction at the given address.

Note: this will force ghidra to disassemble at the given address, and return the created instruction. If you want to actually change the instruction at the given address, use assemble_at instead.

If you want to just create an instruction object, use assemble method.

Parameters:
  • address

    The address where instruction should be created.

Source code in ghidralib.py
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
@staticmethod
def create(address):  # type: (Addr) -> Instruction
    """Create an instruction at the given address.

    Note: this will force ghidra to disassemble at the given address,
    and return the created instruction. If you want to actually change the
    instruction at the given address, use `assemble_at` instead.

    If you want to just create an instruction object, use `assemble` method.

    :param address: The address where instruction should be created.
    """
    addr = resolve(address)
    disassemble(addr)
    return Instruction(addr)

get(address) staticmethod

Get an instruction at the address, or None if not found.

Note: This will return None if the instruction is not defined in Ghidra at the given address. If you want to disassemble an address, not necessarily defined in Ghidra, try :func:disassemble_at instead.

Parameters:
  • address

    The address of the instruction.

Returns:
  • The instruction at the address, or None if not found.

Source code in ghidralib.py
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
@staticmethod
def get(address):  # type: (JavaObject|str|Addr) -> Instruction|None
    """Get an instruction at the address, or None if not found.

    Note: This will return None if the instruction is not defined in Ghidra
    at the given address. If you want to disassemble an address, not necessarily
    defined in Ghidra, try :func:`disassemble_at` instead.

    :param address: The address of the instruction.
    :return: The instruction at the address, or None if not found."""
    if can_resolve(address):
        raw = getInstructionAt(resolve(address))
    else:
        raw = address
    if raw is None:
        return None
    return Instruction(raw)

operand(ndx)

Get the nth operand of this instruction as an object.

Source code in ghidralib.py
1722
1723
1724
1725
def operand(self, ndx):  # type: (int) -> Operand
    """Get the nth operand of this instruction as an object."""
    operand = self.raw.getOpObjects(ndx)
    return self.__convert_operand(operand)

set_fallthrough_override(value)

Override the fallthrough address for this instruction.

This sets the next instruction that will be executed after this instruction, assuming the current instruction doesn't jump anywhere. You can clear this with clear_fallthrough_override

Parameters:
  • value

    new fallthrough address

Source code in ghidralib.py
1785
1786
1787
1788
1789
1790
1791
1792
1793
def set_fallthrough_override(self, value):  # type: (Addr) -> None
    """Override the fallthrough address for this instruction.

    This sets the next instruction that will be executed after this
    instruction, assuming the current instruction doesn't jump anywhere.
    You can clear this with clear_fallthrough_override

    :param value: new fallthrough address"""
    self.raw.setFallThrough(resolve(value))

write_jumptable(targets)

Provide a list of addresses where this instruction may jump.

Warning: For this to work, the instruction must be a part of a function.

This is useful for fixing unrecognised switches, for example.

Note: the new switch instruction will use all references of type COMPUTED_JUMP already defined for the instruction (maybe we should clear them first?).

Source code in ghidralib.py
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
def write_jumptable(self, targets):  # type: (list[Addr]) -> None
    """Provide a list of addresses where this instruction may jump.

    Warning: For this to work, the instruction must be a part of a function.

    This is useful for fixing unrecognised switches, for example.

    Note: the new switch instruction will use all references of type
    COMPUTED_JUMP already defined for the instruction
    (maybe we should clear them first?)."""

    targets = [resolve(addr) for addr in targets]

    for dest in targets:
        disassemble(dest)
        self.add_operand_reference(0, dest, RefType.COMPUTED_JUMP)

    func = Function.get(self.address)
    if func is None:
        raise RuntimeError("Instruction is not part of a function")

    targetlist = ArrayList([dest for dest in targets])
    jumpTab = JumpTable(toAddr(self.address), targetlist, True)
    jumpTab.writeOverride(func.raw)
    CreateFunctionCmd.fixupFunctionBody(Program.current(), func.raw, getMonitor())

JavaObject

A fake class, used for static type hints.

Source code in ghidralib.py
199
200
201
202
203
204
class JavaObject:
    """A fake class, used for static type hints."""

    def __getattribute__(self, name):  # type: (str) -> Any
        """This attribute exists to make mypy happy."""
        pass

__getattribute__(name)

This attribute exists to make mypy happy.

Source code in ghidralib.py
202
203
204
def __getattribute__(self, name):  # type: (str) -> Any
    """This attribute exists to make mypy happy."""
    pass

MemoryBlock

Bases: GhidraWrapper, BodyTrait

A Ghidra wrapper for a Ghidra MemoryBlock

Source code in ghidralib.py
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
class MemoryBlock(GhidraWrapper, BodyTrait):
    """A Ghidra wrapper for a Ghidra MemoryBlock"""

    @staticmethod
    def get(raw_or_name):  # type: (JavaObject|str|Addr) -> MemoryBlock|None
        """Gets a MemoryBlock by name or containing the given address.

        Note: for a string argument, this will try to get memoryblock by name, and
        if it fails, it will fall back to the regular behaviour of "resolve the symbol
        to the address, and get element by address

        :param raw_or_name: name or address of MemoryBlock to get
        :return: the MemoryBlock, or None if not found
        """
        memory = Program.current().getMemory()
        if isinstance(raw_or_name, str):
            raw = memory.getBlock(raw_or_name)
            if raw is not None:
                return MemoryBlock(raw)
        addr = try_resolve(raw_or_name)
        if addr is None:
            return None
        return MemoryBlock(memory.getBlock(addr))

    @staticmethod
    def all():  # type: () -> list[MemoryBlock]
        """Get all MemoryBlocks in the current program"""
        raw_blocks = Program.current().getMemory().getBlocks()
        return [MemoryBlock(raw) for raw in raw_blocks]

    @property
    def comment(self):  # type: () -> str
        """Get the comment associated with this MemoryBlock"""
        return self.raw.getComment()

    @property
    def start(self):  # type: () -> int
        """Get the first address of this MemoryBlock"""
        return self.raw.getStart().getOffset()

    @property
    def name(self):  # type: () -> str
        """Get the name of this MemoryBlock"""
        return self.raw.getName()

    address = start

    @property
    def end(self):  # type: () -> int
        return self.raw.getEnd().getOffset()

    @property
    def size(self):  # type: () -> int
        """Get the size of this MemoryBlock"""
        return int(self.raw.getSize())

    length = size

    @property
    def body(self):  # type: () -> AddressSet
        """Get the address range this instruction."""
        return AddressSet.create(self.address, self.length)

    @property
    def bytes(self):  # type: () -> bytes
        """Get the bytes of this instruction."""
        return read_bytes(self.address, self.length)

address = start class-attribute instance-attribute

body property

Get the address range this instruction.

bytes property

Get the bytes of this instruction.

comment property

Get the comment associated with this MemoryBlock

end property

length = size class-attribute instance-attribute

name property

Get the name of this MemoryBlock

size property

Get the size of this MemoryBlock

start property

Get the first address of this MemoryBlock

all() staticmethod

Get all MemoryBlocks in the current program

Source code in ghidralib.py
3727
3728
3729
3730
3731
@staticmethod
def all():  # type: () -> list[MemoryBlock]
    """Get all MemoryBlocks in the current program"""
    raw_blocks = Program.current().getMemory().getBlocks()
    return [MemoryBlock(raw) for raw in raw_blocks]

get(raw_or_name) staticmethod

Gets a MemoryBlock by name or containing the given address.

Note: for a string argument, this will try to get memoryblock by name, and if it fails, it will fall back to the regular behaviour of "resolve the symbol to the address, and get element by address

Parameters:
  • raw_or_name

    name or address of MemoryBlock to get

Returns:
  • the MemoryBlock, or None if not found

Source code in ghidralib.py
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
@staticmethod
def get(raw_or_name):  # type: (JavaObject|str|Addr) -> MemoryBlock|None
    """Gets a MemoryBlock by name or containing the given address.

    Note: for a string argument, this will try to get memoryblock by name, and
    if it fails, it will fall back to the regular behaviour of "resolve the symbol
    to the address, and get element by address

    :param raw_or_name: name or address of MemoryBlock to get
    :return: the MemoryBlock, or None if not found
    """
    memory = Program.current().getMemory()
    if isinstance(raw_or_name, str):
        raw = memory.getBlock(raw_or_name)
        if raw is not None:
            return MemoryBlock(raw)
    addr = try_resolve(raw_or_name)
    if addr is None:
        return None
    return MemoryBlock(memory.getBlock(addr))

Operand

Operand helper for instruction, may be a register, const or a list

Source code in ghidralib.py
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
class Operand:
    """Operand helper for instruction, may be a register, const or a list"""

    def __init__(self, operand):  # type: (str|int|list[int|str]) -> None
        self.raw = operand

    @property
    def is_register(self):  # type: () -> bool
        """Return True if this operand is a register."""
        return isinstance(self.raw, Str)

    @property
    def is_scalar(self):  # type: () -> bool
        """Return True if this operand is a scalar."""
        return isinstance(self.raw, (int, long))

    @property
    def is_list(self):  # type: () -> bool
        """Return True if this operand is a list."""
        return isinstance(self.raw, list)

    @property
    def register(self):  # type: () -> str
        """Gets this operand value as a register name"""
        if not isinstance(self.raw, (str)):
            raise RuntimeError("Operand {} is not a register".format(self.raw))
        return self.raw

    @property
    def scalar(self):  # type: () -> int
        """Gets this operand value as a scalar"""
        if not isinstance(self.raw, (int, long)):
            raise RuntimeError("Operand {} is not a scalar".format(self.raw))
        return self.raw

    @property
    def list(self):  # type: () -> list
        """Gets this operand value as a list"""
        if not isinstance(self.raw, list):
            raise RuntimeError("Operand {} is not a list".format(self.raw))
        return self.raw

    @property
    def value(self):  # type: () -> str|int|list[int|str]
        """Return internal representation of this operand - string, int or a list"""
        return self.raw

is_list property

Return True if this operand is a list.

is_register property

Return True if this operand is a register.

is_scalar property

Return True if this operand is a scalar.

list property

Gets this operand value as a list

raw = operand instance-attribute

register property

Gets this operand value as a register name

scalar property

Gets this operand value as a scalar

value property

Return internal representation of this operand - string, int or a list

__init__(operand)

Source code in ghidralib.py
1535
1536
def __init__(self, operand):  # type: (str|int|list[int|str]) -> None
    self.raw = operand

Parameter

Bases: Variable

Wraps a Ghidra Parameter object.

Source code in ghidralib.py
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
class Parameter(Variable):
    """Wraps a Ghidra Parameter object."""

    @property
    def ordinal(self):  # type: () -> int
        """Returns the ordinal of this parameter."""
        return self.raw.getOrdinal()

    @property
    def formal_data_type(self):  # type: () -> DataType
        """Returns the formal data type of this parameter."""
        return DataType(self.raw.getFormalDataType())

formal_data_type property

Returns the formal data type of this parameter.

ordinal property

Returns the ordinal of this parameter.

PcodeBlock

Bases: GhidraWrapper

Source code in ghidralib.py
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
class PcodeBlock(GhidraWrapper):
    @property
    def outgoing_edges(self):  # type: () -> list[PcodeBlock]
        return [_pcode_node(self.raw.getOut(i)) for i in range(self.raw.getOutSize())]

    @property
    def incoming_edges(self):  # type: () -> list[PcodeBlock]
        return [_pcode_node(self.raw.getIn(i)) for i in range(self.raw.getInSize())]

    @property
    def has_children(self):  # type: () -> bool
        """Returns True if this block has any children and can be iterated over.

        This function is necessary because Ghidra's code uses isinstance()
        checks to dispatch types. We return true for instances of Java BlockGraph."""
        return isinstance(self.raw, GhBlockGraph)

    @property
    def pcode(self):  # type: () -> list[PcodeOp]
        raw_pcode = collect_iterator(self.raw.getRef().getIterator())
        return [PcodeOp(raw) for raw in raw_pcode]

has_children property

Returns True if this block has any children and can be iterated over.

This function is necessary because Ghidra's code uses isinstance() checks to dispatch types. We return true for instances of Java BlockGraph.

incoming_edges property

outgoing_edges property

pcode property

PcodeOp

Bases: GhidraWrapper

Pcode is a Ghidra's low-level intermediate language. Instructions from any processor are transformed into PCode before any analysis takes place. There is a finite number of possible operations.

While Ghidra doesn't define "High Pcode", this library refers to analysed Pcode as "High Pcode". While theoretically still the same object, Pcode is transformed significantly, for example before function parameter analysis "CALL" opcodes have no inputs.

Source code in ghidralib.py
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
class PcodeOp(GhidraWrapper):
    """Pcode is a Ghidra's low-level intermediate language.
    Instructions from any processor are transformed into PCode
    before any analysis takes place. There is a finite number of
    possible operations.

    While Ghidra doesn't define "High Pcode", this library refers
    to analysed Pcode as "High Pcode". While theoretically still
    the same object, Pcode is transformed significantly, for example
    before function parameter analysis "CALL" opcodes have no inputs.
    """

    UNIMPLEMENTED = 0
    COPY = 1
    LOAD = 2
    STORE = 3
    BRANCH = 4
    CBRANCH = 5
    BRANCHIND = 6
    CALL = 7
    CALLIND = 8
    CALLOTHER = 9
    RETURN = 10
    INT_EQUAL = 11
    INT_NOTEQUAL = 12
    INT_SLESS = 13
    INT_SLESSEQUAL = 14
    INT_LESS = 15
    INT_LESSEQUAL = 16
    INT_ZEXT = 17
    INT_SEXT = 18
    INT_ADD = 19
    INT_SUB = 20
    INT_CARRY = 21
    INT_SCARRY = 22
    INT_SBORROW = 23
    INT_2COMP = 24
    INT_NEGATE = 25
    INT_XOR = 26
    INT_AND = 27
    INT_OR = 28
    INT_LEFT = 29
    INT_RIGHT = 30
    INT_SRIGHT = 31
    INT_MULT = 32
    INT_DIV = 33
    INT_SDIV = 34
    INT_REM = 35
    INT_SREM = 36
    BOOL_NEGATE = 37
    BOOL_XOR = 38
    BOOL_AND = 39
    BOOL_OR = 40
    FLOAT_EQUAL = 41
    FLOAT_NOTEQUAL = 42
    FLOAT_LESS = 43
    FLOAT_LESSEQUAL = 44
    # Slot 45 is unused
    FLOAT_NAN = 46
    FLOAT_ADD = 47
    FLOAT_DIV = 48
    FLOAT_MULT = 49
    FLOAT_SUB = 50
    FLOAT_NEG = 51
    FLOAT_ABS = 52
    FLOAT_SQRT = 53
    FLOAT_INT2FLOAT = 54
    FLOAT_FLOAT2FLOAT = 55
    FLOAT_TRUNC = 56
    FLOAT_CEIL = 57
    FLOAT_FLOOR = 58
    FLOAT_ROUND = 59
    MULTIEQUAL = 60
    INDIRECT = 61
    PIECE = 62
    SUBPIECE = 63
    CAST = 64
    PTRADD = 65
    PTRSUB = 66
    SEGMENTOP = 67
    CPOOLREF = 68
    NEW = 69
    INSERT = 70
    EXTRACT = 71
    POPCOUNT = 72
    LZCOUNT = 73
    PCODE_MAX = 74

    @staticmethod
    def get_high_pcode_at(address):  # type: (Addr) -> list[PcodeOp]
        """Get a high pcode for the instruction at a specified address

        Convenience wrapper for Function(address).get_high_pcode_at(address)."""
        return Function(address).get_high_pcode_at(address)

    @property
    def address(self):  # type: () -> int
        """Get an address in the program where this instruction is located"""
        return self.raw.getSeqnum().getTarget().getOffset()

    @property
    def opcode(self):  # type: () -> int
        return self.raw.getOpcode()

    @property
    def mnemonic(self):  # type: () -> str
        """Get a string representation of the operation, for example "COPY" """
        return self.raw.getMnemonic()

    @property
    def inputs(self):  # type: () -> list[Varnode]
        return [Varnode(raw) for raw in self.raw.getInputs()]

    @property
    def inputs_simple(self):  # type: () -> list[int|str]
        """Return inputs as primitive values (int or a string representation).

        More specifically, this will convert constants and addresses into integers,
        for registers names are returned, and for unique and hash varnodes ad-hoc
        string encoding is used (hash:ID or uniq:ID where ID is varnode identifier).
        """
        return [varnode.simple for varnode in self.inputs]

    @property
    def output(self):  # type: () -> Varnode|None
        if self.raw.getOutput() is None:
            return None
        return Varnode(self.raw.getOutput())

    @property
    def result(self):  # type: () -> int|None
        """Try to evaluate the pcode operation to a constant value.

        Right now this is very poor and doesn't try to implement most of the opcodes.
        Mostly because I suspect I'm reinventing the wheel, and there is code to do
        this already in Ghidra.

        :return: Result of this operation, or None if can't be evaluated as const."""
        if self.opcode == PcodeOp.COPY:
            return self.inputs[0].value
        return None

BOOL_AND = 39 class-attribute instance-attribute

BOOL_NEGATE = 37 class-attribute instance-attribute

BOOL_OR = 40 class-attribute instance-attribute

BOOL_XOR = 38 class-attribute instance-attribute

BRANCH = 4 class-attribute instance-attribute

BRANCHIND = 6 class-attribute instance-attribute

CALL = 7 class-attribute instance-attribute

CALLIND = 8 class-attribute instance-attribute

CALLOTHER = 9 class-attribute instance-attribute

CAST = 64 class-attribute instance-attribute

CBRANCH = 5 class-attribute instance-attribute

COPY = 1 class-attribute instance-attribute

CPOOLREF = 68 class-attribute instance-attribute

EXTRACT = 71 class-attribute instance-attribute

FLOAT_ABS = 52 class-attribute instance-attribute

FLOAT_ADD = 47 class-attribute instance-attribute

FLOAT_CEIL = 57 class-attribute instance-attribute

FLOAT_DIV = 48 class-attribute instance-attribute

FLOAT_EQUAL = 41 class-attribute instance-attribute

FLOAT_FLOAT2FLOAT = 55 class-attribute instance-attribute

FLOAT_FLOOR = 58 class-attribute instance-attribute

FLOAT_INT2FLOAT = 54 class-attribute instance-attribute

FLOAT_LESS = 43 class-attribute instance-attribute

FLOAT_LESSEQUAL = 44 class-attribute instance-attribute

FLOAT_MULT = 49 class-attribute instance-attribute

FLOAT_NAN = 46 class-attribute instance-attribute

FLOAT_NEG = 51 class-attribute instance-attribute

FLOAT_NOTEQUAL = 42 class-attribute instance-attribute

FLOAT_ROUND = 59 class-attribute instance-attribute

FLOAT_SQRT = 53 class-attribute instance-attribute

FLOAT_SUB = 50 class-attribute instance-attribute

FLOAT_TRUNC = 56 class-attribute instance-attribute

INDIRECT = 61 class-attribute instance-attribute

INSERT = 70 class-attribute instance-attribute

INT_2COMP = 24 class-attribute instance-attribute

INT_ADD = 19 class-attribute instance-attribute

INT_AND = 27 class-attribute instance-attribute

INT_CARRY = 21 class-attribute instance-attribute

INT_DIV = 33 class-attribute instance-attribute

INT_EQUAL = 11 class-attribute instance-attribute

INT_LEFT = 29 class-attribute instance-attribute

INT_LESS = 15 class-attribute instance-attribute

INT_LESSEQUAL = 16 class-attribute instance-attribute

INT_MULT = 32 class-attribute instance-attribute

INT_NEGATE = 25 class-attribute instance-attribute

INT_NOTEQUAL = 12 class-attribute instance-attribute

INT_OR = 28 class-attribute instance-attribute

INT_REM = 35 class-attribute instance-attribute

INT_RIGHT = 30 class-attribute instance-attribute

INT_SBORROW = 23 class-attribute instance-attribute

INT_SCARRY = 22 class-attribute instance-attribute

INT_SDIV = 34 class-attribute instance-attribute

INT_SEXT = 18 class-attribute instance-attribute

INT_SLESS = 13 class-attribute instance-attribute

INT_SLESSEQUAL = 14 class-attribute instance-attribute

INT_SREM = 36 class-attribute instance-attribute

INT_SRIGHT = 31 class-attribute instance-attribute

INT_SUB = 20 class-attribute instance-attribute

INT_XOR = 26 class-attribute instance-attribute

INT_ZEXT = 17 class-attribute instance-attribute

LOAD = 2 class-attribute instance-attribute

LZCOUNT = 73 class-attribute instance-attribute

MULTIEQUAL = 60 class-attribute instance-attribute

NEW = 69 class-attribute instance-attribute

PCODE_MAX = 74 class-attribute instance-attribute

PIECE = 62 class-attribute instance-attribute

POPCOUNT = 72 class-attribute instance-attribute

PTRADD = 65 class-attribute instance-attribute

PTRSUB = 66 class-attribute instance-attribute

RETURN = 10 class-attribute instance-attribute

SEGMENTOP = 67 class-attribute instance-attribute

STORE = 3 class-attribute instance-attribute

SUBPIECE = 63 class-attribute instance-attribute

UNIMPLEMENTED = 0 class-attribute instance-attribute

address property

Get an address in the program where this instruction is located

inputs property

inputs_simple property

Return inputs as primitive values (int or a string representation).

More specifically, this will convert constants and addresses into integers, for registers names are returned, and for unique and hash varnodes ad-hoc string encoding is used (hash:ID or uniq:ID where ID is varnode identifier).

mnemonic property

Get a string representation of the operation, for example "COPY"

opcode property

output property

result property

Try to evaluate the pcode operation to a constant value.

Right now this is very poor and doesn't try to implement most of the opcodes. Mostly because I suspect I'm reinventing the wheel, and there is code to do this already in Ghidra.

Returns:
  • Result of this operation, or None if can't be evaluated as const.

get_high_pcode_at(address) staticmethod

Get a high pcode for the instruction at a specified address

Convenience wrapper for Function(address).get_high_pcode_at(address).

Source code in ghidralib.py
1132
1133
1134
1135
1136
1137
@staticmethod
def get_high_pcode_at(address):  # type: (Addr) -> list[PcodeOp]
    """Get a high pcode for the instruction at a specified address

    Convenience wrapper for Function(address).get_high_pcode_at(address)."""
    return Function(address).get_high_pcode_at(address)

Program

Bases: GhidraWrapper

A static class that represents the current program

Source code in ghidralib.py
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
class Program(GhidraWrapper):
    """A static class that represents the current program"""

    @staticmethod
    def create_data(address, datatype):  # type: (Addr, DataT) -> None
        """Force the type of the data defined at the given address to `datatype`.

        This function will clear the old type if it already has one

        :param address: address of the data.
        :param datatype: datatype to use for the data at `address`."""
        typeobj = DataType(datatype)
        try:
            createData(resolve(address), unwrap(typeobj))
        except:
            clearListing(resolve(address))
            createData(resolve(address), unwrap(typeobj))

    @staticmethod
    def location():  # type: () -> int
        """Get the current location in the program.

            >>> current_location()
            0x1000

        :return: the current location in the program
        """
        return getState().getCurrentLocation().getAddress().getOffset()

    @staticmethod
    def call_graph():  # type: () -> Graph[Function]
        """Get the call graph for this program."""
        return Graph.construct(Function.all(), lambda f: f.called)

    @staticmethod
    def control_flow():  # type: () -> Graph[BasicBlock]
        """Get a graph representing the whole program control flow.

        Warning: This graph may be big, so don't try to display it."""
        return Graph.construct(BasicBlock.all(), lambda b: b.destinations)

    @staticmethod
    def basicblocks():  # type: () -> list[BasicBlock]
        """Get all the basic blocks defined in the program."""
        return BasicBlock.all()

    @staticmethod
    def memory_blocks():  # type: () -> list[MemoryBlock]
        """Get memory blocks defined for the current program."""
        return MemoryBlock.all()

    @staticmethod
    def functions():  # type: () -> list[Function]
        """Get all the functions defined in the program."""
        return Function.all()

    @staticmethod
    def instructions():  # type: () -> list[Instruction]
        """Get all the instructions defined in the program."""
        return Instruction.all()

    @staticmethod
    def body():  # type: () -> AddressSet
        """Get the set of all addresses of the program."""
        body = Program.current().getNamespaceManager().getGlobalNamespace().getBody()
        return AddressSet(body)

    @staticmethod
    def current():  # type: () -> JavaObject
        """Get the current program. Equivalent to getCurrentProgram()

        This method must be used instead of currentProgram, because the latter
        won't work well if user is using multiple programs at the same time
        (for example, many tabs in the same tool)."""
        return getCurrentProgram()

    @staticmethod
    def analyze():  # type: () -> None
        """Analyze changes. This will block when autoanalysis changes place.

        Run this when you did changes that you will need to proceed with the rest
        of the script."""
        analyzeChanges(Program.current())

analyze() staticmethod

Analyze changes. This will block when autoanalysis changes place.

Run this when you did changes that you will need to proceed with the rest of the script.

Source code in ghidralib.py
3848
3849
3850
3851
3852
3853
3854
@staticmethod
def analyze():  # type: () -> None
    """Analyze changes. This will block when autoanalysis changes place.

    Run this when you did changes that you will need to proceed with the rest
    of the script."""
    analyzeChanges(Program.current())

basicblocks() staticmethod

Get all the basic blocks defined in the program.

Source code in ghidralib.py
3813
3814
3815
3816
@staticmethod
def basicblocks():  # type: () -> list[BasicBlock]
    """Get all the basic blocks defined in the program."""
    return BasicBlock.all()

body() staticmethod

Get the set of all addresses of the program.

Source code in ghidralib.py
3833
3834
3835
3836
3837
@staticmethod
def body():  # type: () -> AddressSet
    """Get the set of all addresses of the program."""
    body = Program.current().getNamespaceManager().getGlobalNamespace().getBody()
    return AddressSet(body)

call_graph() staticmethod

Get the call graph for this program.

Source code in ghidralib.py
3801
3802
3803
3804
@staticmethod
def call_graph():  # type: () -> Graph[Function]
    """Get the call graph for this program."""
    return Graph.construct(Function.all(), lambda f: f.called)

control_flow() staticmethod

Get a graph representing the whole program control flow.

Warning: This graph may be big, so don't try to display it.

Source code in ghidralib.py
3806
3807
3808
3809
3810
3811
@staticmethod
def control_flow():  # type: () -> Graph[BasicBlock]
    """Get a graph representing the whole program control flow.

    Warning: This graph may be big, so don't try to display it."""
    return Graph.construct(BasicBlock.all(), lambda b: b.destinations)

create_data(address, datatype) staticmethod

Force the type of the data defined at the given address to datatype.

This function will clear the old type if it already has one

Parameters:
  • address

    address of the data.

  • datatype

    datatype to use for the data at address.

Source code in ghidralib.py
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
@staticmethod
def create_data(address, datatype):  # type: (Addr, DataT) -> None
    """Force the type of the data defined at the given address to `datatype`.

    This function will clear the old type if it already has one

    :param address: address of the data.
    :param datatype: datatype to use for the data at `address`."""
    typeobj = DataType(datatype)
    try:
        createData(resolve(address), unwrap(typeobj))
    except:
        clearListing(resolve(address))
        createData(resolve(address), unwrap(typeobj))

current() staticmethod

Get the current program. Equivalent to getCurrentProgram()

This method must be used instead of currentProgram, because the latter won't work well if user is using multiple programs at the same time (for example, many tabs in the same tool).

Source code in ghidralib.py
3839
3840
3841
3842
3843
3844
3845
3846
@staticmethod
def current():  # type: () -> JavaObject
    """Get the current program. Equivalent to getCurrentProgram()

    This method must be used instead of currentProgram, because the latter
    won't work well if user is using multiple programs at the same time
    (for example, many tabs in the same tool)."""
    return getCurrentProgram()

functions() staticmethod

Get all the functions defined in the program.

Source code in ghidralib.py
3823
3824
3825
3826
@staticmethod
def functions():  # type: () -> list[Function]
    """Get all the functions defined in the program."""
    return Function.all()

instructions() staticmethod

Get all the instructions defined in the program.

Source code in ghidralib.py
3828
3829
3830
3831
@staticmethod
def instructions():  # type: () -> list[Instruction]
    """Get all the instructions defined in the program."""
    return Instruction.all()

location() staticmethod

Get the current location in the program.

>>> current_location()
0x1000
Returns:
  • the current location in the program

Source code in ghidralib.py
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
@staticmethod
def location():  # type: () -> int
    """Get the current location in the program.

        >>> current_location()
        0x1000

    :return: the current location in the program
    """
    return getState().getCurrentLocation().getAddress().getOffset()

memory_blocks() staticmethod

Get memory blocks defined for the current program.

Source code in ghidralib.py
3818
3819
3820
3821
@staticmethod
def memory_blocks():  # type: () -> list[MemoryBlock]
    """Get memory blocks defined for the current program."""
    return MemoryBlock.all()

RefType

Bases: GhidraWrapper

Source code in ghidralib.py
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
class RefType(GhidraWrapper):
    @property
    def has_fallthrough(self):  # type: () -> bool
        return self.raw.hasFallthrough()

    @has_fallthrough.setter
    def has_fallthrough(self, value):  # type: (bool) -> None
        self.raw.setHasFall(value)

    @property
    def is_call(self):  # type: () -> bool
        return self.raw.isCall()

    @is_call.setter
    def is_call(self, value):  # type: (bool) -> None
        self.raw.setIsCall(value)

    @property
    def is_jump(self):  # type: () -> bool
        return self.raw.isJump()

    @is_jump.setter
    def is_jump(self, value):  # type: (bool) -> None
        self.raw.setIsJump(value)

    @property
    def is_computed(self):  # type: () -> bool
        return self.raw.isComputed()

    @is_computed.setter
    def is_computed(self, value):  # type: (bool) -> None
        self.raw.setIsComputed(value)

    @property
    def is_conditional(self):  # type: () -> bool
        return self.raw.isConditional()

    @is_conditional.setter
    def is_conditional(self, value):  # type: (bool) -> None
        self.raw.setIsConditional(value)

    @property
    def is_unconditional(self):  # type: () -> bool
        return not self.is_conditional

    @property
    def is_terminal(self):  # type: () -> bool
        return self.raw.isTerminal()

    @property
    def is_data(self):  # type: () -> bool
        return self.raw.isData()

    @property
    def is_read(self):  # type: () -> bool
        return self.raw.isRead()

    @property
    def is_write(self):  # type: () -> bool
        return self.raw.isWrite()

    @property
    def is_flow(self):  # type: () -> bool
        return self.raw.isFlow()

    @property
    def is_override(self):  # type: () -> bool
        return self.raw.isOverride()

    INVALID = _reftype_placeholder()
    FLOW = _reftype_placeholder()
    FALL_THROUGH = _reftype_placeholder()
    UNCONDITIONAL_JUMP = _reftype_placeholder()
    CONDITIONAL_JUMP = _reftype_placeholder()
    UNCONDITIONAL_CALL = _reftype_placeholder()
    CONDITIONAL_CALL = _reftype_placeholder()
    TERMINATOR = _reftype_placeholder()
    COMPUTED_JUMP = _reftype_placeholder()
    CONDITIONAL_TERMINATOR = _reftype_placeholder()
    COMPUTED_CALL = _reftype_placeholder()
    CALL_TERMINATOR = _reftype_placeholder()
    COMPUTED_CALL_TERMINATOR = _reftype_placeholder()
    CONDITIONAL_CALL_TERMINATOR = _reftype_placeholder()
    CONDITIONAL_COMPUTED_CALL = _reftype_placeholder()
    CONDITIONAL_COMPUTED_JUMP = _reftype_placeholder()
    JUMP_TERMINATOR = _reftype_placeholder()
    INDIRECTION = _reftype_placeholder()
    CALL_OVERRIDE_UNCONDITIONAL = _reftype_placeholder()
    JUMP_OVERRIDE_UNCONDITIONAL = _reftype_placeholder()
    CALLOTHER_OVERRIDE_CALL = _reftype_placeholder()
    CALLOTHER_OVERRIDE_JUMP = _reftype_placeholder()

CALLOTHER_OVERRIDE_CALL = _reftype_placeholder() class-attribute instance-attribute

CALLOTHER_OVERRIDE_JUMP = _reftype_placeholder() class-attribute instance-attribute

CALL_OVERRIDE_UNCONDITIONAL = _reftype_placeholder() class-attribute instance-attribute

CALL_TERMINATOR = _reftype_placeholder() class-attribute instance-attribute

COMPUTED_CALL = _reftype_placeholder() class-attribute instance-attribute

COMPUTED_CALL_TERMINATOR = _reftype_placeholder() class-attribute instance-attribute

COMPUTED_JUMP = _reftype_placeholder() class-attribute instance-attribute

CONDITIONAL_CALL = _reftype_placeholder() class-attribute instance-attribute

CONDITIONAL_CALL_TERMINATOR = _reftype_placeholder() class-attribute instance-attribute

CONDITIONAL_COMPUTED_CALL = _reftype_placeholder() class-attribute instance-attribute

CONDITIONAL_COMPUTED_JUMP = _reftype_placeholder() class-attribute instance-attribute

CONDITIONAL_JUMP = _reftype_placeholder() class-attribute instance-attribute

CONDITIONAL_TERMINATOR = _reftype_placeholder() class-attribute instance-attribute

FALL_THROUGH = _reftype_placeholder() class-attribute instance-attribute

FLOW = _reftype_placeholder() class-attribute instance-attribute

INDIRECTION = _reftype_placeholder() class-attribute instance-attribute

INVALID = _reftype_placeholder() class-attribute instance-attribute

JUMP_OVERRIDE_UNCONDITIONAL = _reftype_placeholder() class-attribute instance-attribute

JUMP_TERMINATOR = _reftype_placeholder() class-attribute instance-attribute

TERMINATOR = _reftype_placeholder() class-attribute instance-attribute

UNCONDITIONAL_CALL = _reftype_placeholder() class-attribute instance-attribute

UNCONDITIONAL_JUMP = _reftype_placeholder() class-attribute instance-attribute

has_fallthrough property writable

is_call property writable

is_computed property writable

is_conditional property writable

is_data property

is_flow property

is_jump property writable

is_override property

is_read property

is_terminal property

is_unconditional property

is_write property

Reference

Bases: GhidraWrapper

Source code in ghidralib.py
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
class Reference(GhidraWrapper):
    @property
    def is_call(self):  # type: () -> bool
        """Return True if the reference is a call."""
        return self.reftype.is_call

    @property
    def is_jump(self):  # type: () -> bool
        """Return True if the reference is a jump."""
        return self.reftype.is_jump

    @property
    def reftype(self):  # type: () -> RefType
        """Return the type of reference."""
        return RefType(self.raw.getReferenceType())

    @property
    def from_address(self):  # type: () -> int
        """Return the address of the source of the reference."""
        return self.raw.getFromAddress().getOffset()

    @property
    def to_address(self):  # type: () -> int
        """Return the address of the target of the reference."""
        return self.raw.getToAddress().getOffset()

    @property
    def source(self):  # type: () -> SourceType
        return SourceType(self.raw.getSource())

from_address property

Return the address of the source of the reference.

is_call property

Return True if the reference is a call.

is_jump property

Return True if the reference is a jump.

reftype property

Return the type of reference.

source property

to_address property

Return the address of the target of the reference.

Register

Bases: GhidraWrapper

Source code in ghidralib.py
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
class Register(GhidraWrapper):
    @staticmethod
    def get(raw_or_name):  # type: (str|JavaObject) -> Register|None
        """Get a register by name"""
        if isinstance(raw_or_name, Str):
            raw_or_name = Program.current().getLanguage().getRegister(raw_or_name)
            if raw_or_name is None:
                return None
        return Register(raw_or_name)

    @property
    def name(self):  # type: () -> str
        """Return the name of this register"""
        return self.raw.getName()

    @property
    def size(self):  # type: () -> int
        """Return the size of this register in bytes

        This will tell the total number of bytes this register contains -
        because register values don't have to be byte-aligned"""
        return self.raw.getNumBytes()

    @property
    def varnode(self):  # type: () -> Varnode
        """Return the varnode associated with this register

        Warning: this doesn't support registers that are not byte-aligned
        (for example, flag registers). It will round the address down to byte.
        """
        raw = GhVarnode(self.raw.getAddress(), self.raw.getNumBytes())
        return Varnode(raw)

name property

Return the name of this register

size property

Return the size of this register in bytes

This will tell the total number of bytes this register contains - because register values don't have to be byte-aligned

varnode property

Return the varnode associated with this register

Warning: this doesn't support registers that are not byte-aligned (for example, flag registers). It will round the address down to byte.

get(raw_or_name) staticmethod

Get a register by name

Source code in ghidralib.py
852
853
854
855
856
857
858
859
@staticmethod
def get(raw_or_name):  # type: (str|JavaObject) -> Register|None
    """Get a register by name"""
    if isinstance(raw_or_name, Str):
        raw_or_name = Program.current().getLanguage().getRegister(raw_or_name)
        if raw_or_name is None:
            return None
    return Register(raw_or_name)

Symbol

Bases: GhidraWrapper

Wraps a Ghidra Symbol object.

Source code in ghidralib.py
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
class Symbol(GhidraWrapper):
    """Wraps a Ghidra Symbol object."""

    @staticmethod
    def resolve_thunk_if_exists(external_symbol):  # type: (JavaObject) -> JavaObject
        """Returns a function thunk leading to a passed external symbol, if it exists.

        If there is no function thunk, original symbol is returned.

        Why is this ugly thing here? Well, we want to support external symbols,
        especially external functions. Thunks are much more useful for us when
        thinking in context of the analysed program - when Linux program calls
        `printf` it jumps to the appropriate `printf` thunk, not to libc
        directly. So this is the location that we want to patch/hook/trace/etc when
        thinking about printf. But the thing is that Ghidra SymbolTable API will
        not even return thunks! So we trace the external function references, and
        return the first (almost certainly only) Thunk reference.

        :param external_symbol: Symbol to find thunk for (if it exists).
        """
        xrefs = list(external_symbol.getReferences())
        for xref in xrefs:
            if xref.getReferenceType() == GhRefType.THUNK:
                addr = xref.getFromAddress()
                thunk = Program.current().getSymbolTable().getPrimarySymbol(addr)
                if thunk is not None:
                    return thunk
        return external_symbol

    @staticmethod
    def resolve_external(external_symbol):  # type: (JavaObject) -> int
        """Resolves an external address to a RAM location, if possible.

        If the symbol has no RAM location, just return its offset.

        Why is this ugly thing here? Again, we want to support external symbols, and
        we are interested in their RAM address in the program address space. In some
        cases, Ghidra will give an external address a "location" in the RAM space.
        So, for example, if current program jumps to that external function (or read
        that external variable etc), it will read that location as far as Ghidra is
        concerned (for example, Emulator will use it for calls). This is important
        for emulating Windows binaries, that use address tables for imports.

        :param external_symbol: External symbol to resolve."""
        external_manager = Program.current().getExternalManager()
        ram_addr = external_manager.getExternalLocation(external_symbol).getAddress()
        if ram_addr:
            return ram_addr.getOffset()
        return external_symbol.getAddress().getOffset()

    @staticmethod
    def get(raw_or_name):  # type: (JavaObject|str|Addr) -> Symbol|None
        """Get a symbol with the provided name or at the provided address.

        Return None if the symbol was not found.

        Note: when resolving by name, local symbols take precedence over external ones
        (in particular for function thunks - in contrast to Ghidra default behaviour).

        :param raw_or_name: a Ghidra Java object, a string, or an address."""
        if isinstance(raw_or_name, str):
            symbol_iterator = Program.current().getSymbolTable().getSymbols(raw_or_name)
            symbols = collect_iterator(symbol_iterator)
            if not symbols:
                return None
            raw = symbols[0]
            if raw.isExternal():
                raw = Symbol.resolve_thunk_if_exists(raw)
        elif can_resolve(raw_or_name):
            raw = (
                Program.current()
                .getSymbolTable()
                .getPrimarySymbol(resolve(raw_or_name))
            )
            if not raw:
                return None
        else:
            raw = raw_or_name
        return Symbol(raw)

    @staticmethod
    def all():  # type: () -> list[Symbol]
        """Get all symbols defined in the program."""
        symbol_iterator = Program.current().getSymbolTable().getAllSymbols(True)
        symbols = collect_iterator(symbol_iterator)
        return [Symbol(s) for s in symbols]

    @staticmethod
    def create(
        address, name, source=SourceType.USER_DEFINED
    ):  # type: (Addr, str, SourceType) -> Symbol
        """Create a new symbol (also called label) at the given address.

        :param address: the address where to create the symbol.
        :param name: the name of the symbol.
        :param source: the source type for the new symbol."""
        raw = createLabel(resolve(address), name, False, source)
        return Symbol(raw)

    @staticmethod
    def remove(address, name):  # type: (Addr, str) -> None
        """Remove the symbol with the given name at the given address.

        :param address: the address of the symbol to remove.
        :param name: the name of the symbol to remove."""
        removeSymbol(resolve(address), name)

    @property
    def address(self):  # type: () -> int
        """Get the address of this symbol."""
        if self.is_external:
            return Symbol.resolve_external(self.raw)
        return self.raw.getAddress().getOffset()

    @property
    def name(self):  # type: () -> str
        """Get the name of this symbol."""
        return self.raw.getName()

    @property
    def name_with_namespace(self):  # type: () -> str
        """Get the fully qualified name of this symbol."""
        return self.raw.getName(True)

    @property
    def xrefs(self):  # type: () -> list[Reference]
        """Get a list of references to this symbol."""
        return [Reference(raw) for raw in self.raw.getReferences()]

    xrefs_to = xrefs

    @property
    def xref_addrs(self):  # type: () -> list[int]
        """Get the addresses of all references to this symbol."""
        return [xref.from_address for xref in self.xrefs]

    def set_type(self, datatype):  # type: (DataT) -> None
        """Set the data type of this symbol."""
        Program.create_data(self.address, datatype)

    def delete(self):  # type: () -> None
        """Delete this symbol."""
        self.raw.delete()

    def rename(
        self, new_name, source=SourceType.USER_DEFINED
    ):  # type: (str, SourceType) -> None
        """Rename this symbol.

            >>> main = Symbol.get("main")
            >>> main.rename("main_renamed")
            >>> main.name
            'main_renamed'

        :param new_name: the new name of the symbol."""
        self.raw.setName(new_name, source)

    @property
    def is_external(self):  # type: () -> bool
        """Return true if this symbol is external, otherwise false.

        Note: when resolving by name, local symbols take precedence over external ones
        (in particular for function thunks - in contrast to Ghidra default behaviour).

        :return: true if the symbol is external"""
        return self.raw.isExternal()

address property

Get the address of this symbol.

is_external property

Return true if this symbol is external, otherwise false.

Note: when resolving by name, local symbols take precedence over external ones (in particular for function thunks - in contrast to Ghidra default behaviour).

Returns:
  • true if the symbol is external

name property

Get the name of this symbol.

name_with_namespace property

Get the fully qualified name of this symbol.

xref_addrs property

Get the addresses of all references to this symbol.

xrefs property

Get a list of references to this symbol.

xrefs_to = xrefs class-attribute instance-attribute

all() staticmethod

Get all symbols defined in the program.

Source code in ghidralib.py
2979
2980
2981
2982
2983
2984
@staticmethod
def all():  # type: () -> list[Symbol]
    """Get all symbols defined in the program."""
    symbol_iterator = Program.current().getSymbolTable().getAllSymbols(True)
    symbols = collect_iterator(symbol_iterator)
    return [Symbol(s) for s in symbols]

create(address, name, source=SourceType.USER_DEFINED) staticmethod

Create a new symbol (also called label) at the given address.

Parameters:
  • address

    the address where to create the symbol.

  • name

    the name of the symbol.

  • source

    the source type for the new symbol.

Source code in ghidralib.py
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
@staticmethod
def create(
    address, name, source=SourceType.USER_DEFINED
):  # type: (Addr, str, SourceType) -> Symbol
    """Create a new symbol (also called label) at the given address.

    :param address: the address where to create the symbol.
    :param name: the name of the symbol.
    :param source: the source type for the new symbol."""
    raw = createLabel(resolve(address), name, False, source)
    return Symbol(raw)

delete()

Delete this symbol.

Source code in ghidralib.py
3039
3040
3041
def delete(self):  # type: () -> None
    """Delete this symbol."""
    self.raw.delete()

get(raw_or_name) staticmethod

Get a symbol with the provided name or at the provided address.

Return None if the symbol was not found.

Note: when resolving by name, local symbols take precedence over external ones (in particular for function thunks - in contrast to Ghidra default behaviour).

Parameters:
  • raw_or_name

    a Ghidra Java object, a string, or an address.

Source code in ghidralib.py
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
@staticmethod
def get(raw_or_name):  # type: (JavaObject|str|Addr) -> Symbol|None
    """Get a symbol with the provided name or at the provided address.

    Return None if the symbol was not found.

    Note: when resolving by name, local symbols take precedence over external ones
    (in particular for function thunks - in contrast to Ghidra default behaviour).

    :param raw_or_name: a Ghidra Java object, a string, or an address."""
    if isinstance(raw_or_name, str):
        symbol_iterator = Program.current().getSymbolTable().getSymbols(raw_or_name)
        symbols = collect_iterator(symbol_iterator)
        if not symbols:
            return None
        raw = symbols[0]
        if raw.isExternal():
            raw = Symbol.resolve_thunk_if_exists(raw)
    elif can_resolve(raw_or_name):
        raw = (
            Program.current()
            .getSymbolTable()
            .getPrimarySymbol(resolve(raw_or_name))
        )
        if not raw:
            return None
    else:
        raw = raw_or_name
    return Symbol(raw)

remove(address, name) staticmethod

Remove the symbol with the given name at the given address.

Parameters:
  • address

    the address of the symbol to remove.

  • name

    the name of the symbol to remove.

Source code in ghidralib.py
2998
2999
3000
3001
3002
3003
3004
@staticmethod
def remove(address, name):  # type: (Addr, str) -> None
    """Remove the symbol with the given name at the given address.

    :param address: the address of the symbol to remove.
    :param name: the name of the symbol to remove."""
    removeSymbol(resolve(address), name)

rename(new_name, source=SourceType.USER_DEFINED)

Rename this symbol.

>>> main = Symbol.get("main")
>>> main.rename("main_renamed")
>>> main.name
'main_renamed'
Parameters:
  • new_name

    the new name of the symbol.

Source code in ghidralib.py
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
def rename(
    self, new_name, source=SourceType.USER_DEFINED
):  # type: (str, SourceType) -> None
    """Rename this symbol.

        >>> main = Symbol.get("main")
        >>> main.rename("main_renamed")
        >>> main.name
        'main_renamed'

    :param new_name: the new name of the symbol."""
    self.raw.setName(new_name, source)

resolve_external(external_symbol) staticmethod

Resolves an external address to a RAM location, if possible.

If the symbol has no RAM location, just return its offset.

Why is this ugly thing here? Again, we want to support external symbols, and we are interested in their RAM address in the program address space. In some cases, Ghidra will give an external address a "location" in the RAM space. So, for example, if current program jumps to that external function (or read that external variable etc), it will read that location as far as Ghidra is concerned (for example, Emulator will use it for calls). This is important for emulating Windows binaries, that use address tables for imports.

Parameters:
  • external_symbol

    External symbol to resolve.

Source code in ghidralib.py
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
@staticmethod
def resolve_external(external_symbol):  # type: (JavaObject) -> int
    """Resolves an external address to a RAM location, if possible.

    If the symbol has no RAM location, just return its offset.

    Why is this ugly thing here? Again, we want to support external symbols, and
    we are interested in their RAM address in the program address space. In some
    cases, Ghidra will give an external address a "location" in the RAM space.
    So, for example, if current program jumps to that external function (or read
    that external variable etc), it will read that location as far as Ghidra is
    concerned (for example, Emulator will use it for calls). This is important
    for emulating Windows binaries, that use address tables for imports.

    :param external_symbol: External symbol to resolve."""
    external_manager = Program.current().getExternalManager()
    ram_addr = external_manager.getExternalLocation(external_symbol).getAddress()
    if ram_addr:
        return ram_addr.getOffset()
    return external_symbol.getAddress().getOffset()

resolve_thunk_if_exists(external_symbol) staticmethod

Returns a function thunk leading to a passed external symbol, if it exists.

If there is no function thunk, original symbol is returned.

Why is this ugly thing here? Well, we want to support external symbols, especially external functions. Thunks are much more useful for us when thinking in context of the analysed program - when Linux program calls printf it jumps to the appropriate printf thunk, not to libc directly. So this is the location that we want to patch/hook/trace/etc when thinking about printf. But the thing is that Ghidra SymbolTable API will not even return thunks! So we trace the external function references, and return the first (almost certainly only) Thunk reference.

Parameters:
  • external_symbol

    Symbol to find thunk for (if it exists).

Source code in ghidralib.py
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
@staticmethod
def resolve_thunk_if_exists(external_symbol):  # type: (JavaObject) -> JavaObject
    """Returns a function thunk leading to a passed external symbol, if it exists.

    If there is no function thunk, original symbol is returned.

    Why is this ugly thing here? Well, we want to support external symbols,
    especially external functions. Thunks are much more useful for us when
    thinking in context of the analysed program - when Linux program calls
    `printf` it jumps to the appropriate `printf` thunk, not to libc
    directly. So this is the location that we want to patch/hook/trace/etc when
    thinking about printf. But the thing is that Ghidra SymbolTable API will
    not even return thunks! So we trace the external function references, and
    return the first (almost certainly only) Thunk reference.

    :param external_symbol: Symbol to find thunk for (if it exists).
    """
    xrefs = list(external_symbol.getReferences())
    for xref in xrefs:
        if xref.getReferenceType() == GhRefType.THUNK:
            addr = xref.getFromAddress()
            thunk = Program.current().getSymbolTable().getPrimarySymbol(addr)
            if thunk is not None:
                return thunk
    return external_symbol

set_type(datatype)

Set the data type of this symbol.

Source code in ghidralib.py
3035
3036
3037
def set_type(self, datatype):  # type: (DataT) -> None
    """Set the data type of this symbol."""
    Program.create_data(self.address, datatype)

SymbolicPropogator

Bases: GhidraWrapper

Wraps SymbolicPropogator. Can be used to get known values at various locations in a given function (or outside of a function)

Source code in ghidralib.py
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
class SymbolicPropogator(GhidraWrapper):
    """Wraps SymbolicPropogator. Can be used to get known values at various
    locations in a given function (or outside of a function)"""

    @staticmethod
    def create():
        return SymbolicPropogator(GhSymbolicPropogator(Program.current()))

    def flow_constants(
        self, addr, body, evaluator
    ):  # type: (Addr, AddressSet, JavaObject) -> None
        """Flow constants from the given address in the given body

        :param addr: The address to start from
        :param body: The body where constants should be propagated
        :param evaluator: The evaluator to use for the propagation"""
        addr = resolve(addr)
        self.raw.flowConstants(addr, body.raw, evaluator, False, getMonitor())

    def register_at(self, addr, register):  # type: (Addr, Reg) -> int|None
        """Get a known register value at the given address (or None)

        Warning: this value is signed.

        :param addr: The address to get a register value at
        :param register: The register to get a value for
        :return: The value of the register at the given address, or None if the
        register is not known at that address"""
        addr = resolve(addr)
        reg = Register(register)
        value = self.raw.getRegisterValue(addr, reg.raw)
        if not value or value.isRegisterRelativeValue():
            # This never happens in my tests, so I just won't handle register-relative
            # values. I don't know when this can ever happen.
            return None
        return value.value

create() staticmethod

Source code in ghidralib.py
2418
2419
2420
@staticmethod
def create():
    return SymbolicPropogator(GhSymbolicPropogator(Program.current()))

flow_constants(addr, body, evaluator)

Flow constants from the given address in the given body

Parameters:
  • addr

    The address to start from

  • body

    The body where constants should be propagated

  • evaluator

    The evaluator to use for the propagation

Source code in ghidralib.py
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
def flow_constants(
    self, addr, body, evaluator
):  # type: (Addr, AddressSet, JavaObject) -> None
    """Flow constants from the given address in the given body

    :param addr: The address to start from
    :param body: The body where constants should be propagated
    :param evaluator: The evaluator to use for the propagation"""
    addr = resolve(addr)
    self.raw.flowConstants(addr, body.raw, evaluator, False, getMonitor())

register_at(addr, register)

Get a known register value at the given address (or None)

Warning: this value is signed.

Parameters:
  • addr

    The address to get a register value at

  • register

    The register to get a value for

Returns:
  • The value of the register at the given address, or None if the register is not known at that address

Source code in ghidralib.py
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
def register_at(self, addr, register):  # type: (Addr, Reg) -> int|None
    """Get a known register value at the given address (or None)

    Warning: this value is signed.

    :param addr: The address to get a register value at
    :param register: The register to get a value for
    :return: The value of the register at the given address, or None if the
    register is not known at that address"""
    addr = resolve(addr)
    reg = Register(register)
    value = self.raw.getRegisterValue(addr, reg.raw)
    if not value or value.isRegisterRelativeValue():
        # This never happens in my tests, so I just won't handle register-relative
        # values. I don't know when this can ever happen.
        return None
    return value.value

Variable

Bases: GhidraWrapper

Wraps a Ghidra Variable object

Source code in ghidralib.py
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
class Variable(GhidraWrapper):
    """Wraps a Ghidra Variable object"""

    @property
    def name(self):  # type: () -> str
        """Get the name of this variable"""
        return self.raw.getName()

    @name.setter
    def name(self, name):  # type: (str) -> None
        """Rename this variable"""
        self.rename(name, SourceType.USER_DEFINED)

    def rename(
        self, name, source=SourceType.USER_DEFINED
    ):  # type: (str, SourceType) -> None
        """Rename this variable"""
        self.raw.setName(name, source)

    @property
    def data_type(self):  # type: () -> DataType
        """Get the data type of this variable"""
        return DataType(self.raw.getDataType())

    @data_type.setter
    def data_type(
        self, data_type, source=SourceType.USER_DEFINED
    ):  # type: (DataType, SourceType) -> None
        """Set the data type of this variable"""
        self.raw.setDataType(data_type.raw, source)

    @property
    def is_valid(self):  # type: () -> bool
        """Check if this variable is valid"""
        return self.raw.isValid()

    @property
    def comment(self):  # type: () -> str|None
        """ "Get the comment for this variable"""
        return self.raw.getComment()

    @comment.setter
    def comment(self, name):  # type: (str|None) -> None
        """Set the comment for this variable"""
        self.set_comment(name)

    def set_comment(self, comment):  # type: (str|None) -> None
        """Set the comment for this variable"""
        self.raw.setComment(comment)

    @property
    def is_auto(self):  # type: () -> bool
        """Check if this variable is an automatic parameter.

        Some parameters are "hidden parameters" dictated by the calling
        convention. This method returns true for such paramteters."""
        return self.raw.getVariableStorage().isAutoStorage()

    @property
    def is_forced_indirect(self):  # type: () -> bool
        """Check if this variable was forced to be a pointer by calling convention"""
        return self.raw.getVariableStorage().isForcedIndirect()

    @property
    def has_bad_storage(self):  # type: () -> bool
        """Check if this variable has bad storage (could not be resolved)"""
        return self.raw.getVariableStorage().isBadStorage()

    @property
    def is_unassigned_storage(self):  # type: () -> bool
        """Check if this variable has no assigned storage (varnodes)"""
        return self.raw.getVariableStorage().isUnassignedStorage()

    @property
    def is_void(self):  # type: () -> bool
        """Check if this variable is of type void"""
        return self.raw.getVariableStorage().isVoidStorage()

    @property
    def stack_offfset(self):  # type: () -> int
        """Get the stack offset of this variable."""
        return self.raw.getVariableStorage().getStackOffset()

    @property
    def is_constant(self):  # type: () -> bool
        """Check if this variable consists of a single constant-space varnode"""
        return self.raw.getVariableStorage().isConstantStorage()

    @property
    def is_hash(self):  # type: () -> bool
        """Check if this variable consists of a single hash-space varnode."""
        return self.raw.getVariableStorage().isHashStorage()

    @property
    def is_stack(self):  # type: () -> bool
        """Check if this variable is a stack variable"""
        return self.raw.isStackVariable()

    @property
    def is_memory(self):  # type: () -> bool
        """Check if this variable is stored in memory"""
        return self.raw.isMemoryVariable()

    @property
    def is_unique(self):  # type: () -> bool
        """Check if this variable is of type unique"""
        return self.raw.isUniqueVariable()

    @property
    def is_compound(self):  # type: () -> bool
        """Check if this variable is a compound variable"""
        return self.raw.isCompoundVariable()

    @property
    def symbol(self):  # type: () -> Symbol
        """Get the symbol for this variable"""
        return Symbol(self.raw.getSymbol())

    @property
    def source(self):  # type: () -> SourceType
        """Get the source type of this variable"""
        return SourceType(self.raw.getSource())

    @property
    def varnode(self):  # type: () -> Varnode
        """Get the first varnode associated with this variable.

        Warning: there may be more than one varnode associated with a variable."""
        return Varnode(self.raw.getFirstStorageVarnode())

    @property
    def varnodes(self):  # type: () -> list[Varnode]
        """Get all varnodes associated with this variable."""
        storage = self.raw.getVariableStorage()
        return [Varnode(x) for x in storage.getVarnodes()]

    @property
    def is_register(self):  # type: () -> bool
        """Check if this variable consists of a single register."""
        return self.raw.isRegisterVariable()

    @property
    def register(self):  # type: () -> str
        """Get the register associated with this variable.

        Raises an exception if this variable is not a register variable."""
        reg = self.raw.getRegister()
        if not reg:
            raise ValueError("Variable is not a register variable")
        return reg.getName()

    @property
    def function(self):  # type: () -> Function
        """Get the function associated with this variable."""
        return Function(self.raw.getFunction())

comment property writable

"Get the comment for this variable

data_type property writable

Get the data type of this variable

function property

Get the function associated with this variable.

has_bad_storage property

Check if this variable has bad storage (could not be resolved)

is_auto property

Check if this variable is an automatic parameter.

Some parameters are "hidden parameters" dictated by the calling convention. This method returns true for such paramteters.

is_compound property

Check if this variable is a compound variable

is_constant property

Check if this variable consists of a single constant-space varnode

is_forced_indirect property

Check if this variable was forced to be a pointer by calling convention

is_hash property

Check if this variable consists of a single hash-space varnode.

is_memory property

Check if this variable is stored in memory

is_register property

Check if this variable consists of a single register.

is_stack property

Check if this variable is a stack variable

is_unassigned_storage property

Check if this variable has no assigned storage (varnodes)

is_unique property

Check if this variable is of type unique

is_valid property

Check if this variable is valid

is_void property

Check if this variable is of type void

name property writable

Get the name of this variable

register property

Get the register associated with this variable.

Raises an exception if this variable is not a register variable.

source property

Get the source type of this variable

stack_offfset property

Get the stack offset of this variable.

symbol property

Get the symbol for this variable

varnode property

Get the first varnode associated with this variable.

Warning: there may be more than one varnode associated with a variable.

varnodes property

Get all varnodes associated with this variable.

rename(name, source=SourceType.USER_DEFINED)

Rename this variable

Source code in ghidralib.py
2120
2121
2122
2123
2124
def rename(
    self, name, source=SourceType.USER_DEFINED
):  # type: (str, SourceType) -> None
    """Rename this variable"""
    self.raw.setName(name, source)

set_comment(comment)

Set the comment for this variable

Source code in ghidralib.py
2153
2154
2155
def set_comment(self, comment):  # type: (str|None) -> None
    """Set the comment for this variable"""
    self.raw.setComment(comment)

Varnode

Bases: GhidraWrapper

Source code in ghidralib.py
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
class Varnode(GhidraWrapper):
    @property
    def has_value(self):  # type: () -> bool
        """Return true if this varnode can be converted to a integer value.

        In particular, this will return true for Address and Constant varnodes"""
        return self.value is not None

    @property
    def value(self):  # type: () -> int|None
        """Get the value of this varnode. Traverse defining pcodeops if necessary."""
        if self.is_address or self.is_constant:
            return self.offset
        if self.defining_pcodeop is None:
            return None
        return self.defining_pcodeop.result

    @property
    def offset(self):  # type: () -> int
        return int(self.raw.getOffset())

    @property
    def size(self):  # type: () -> int
        return self.raw.getSize()

    @property
    def high(self):  # type: () -> HighVariable
        return HighVariable(self.raw.getHigh())

    @property
    def symbol(self):  # type: () -> HighSymbol
        return self.high.symbol

    @property
    def is_constant(self):  # type: () -> bool
        """Note: addresses are not constants in Ghidra-speak.
        Use has_value to check if the varnode has a predictable value."""
        return self.raw.isConstant()

    @property
    def is_register(self):  # type: () -> bool
        """Return True if this varnode is stored entirely in a register.

        Warning: this does not mean that it can be cast to a register! This may
        be, for example, upper 32 bits of RAX. Use is_named_register instead."""
        return self.raw.isRegister()

    @property
    def is_named_register(self):  # type: () -> bool
        """ "Return True if this varnode is stored entirely in a named register.

        "Named" in this context means that it has a conventional name, like RAX.
        Not all register varnodes are named, for example, the upper 32 bits of RAX
        have no commonly used name."""
        language = Program.current().getLanguage()
        raw = language.getRegister(self.raw.getAddress(), self.size)
        return raw is not None

    @property
    def as_register(self):  # type: () -> str
        """Return the name of the register this varnode is stored in.

        Warning: even if is_register returns true, this does not mean you can use
        this method safely. Use is_named_register to make sure."""
        language = Program.current().getLanguage()
        raw = language.getRegister(self.raw.getAddress(), self.size)
        return raw.getName()

    @property
    def is_address(self):  # type: () -> bool
        return self.raw.isAddress()

    @property
    def is_unique(self):  # type: () -> bool
        return self.raw.isUnique()

    @property
    def is_hash(self):  # type: () -> bool
        return self.raw.isHash()

    @property
    def is_stack(self):  # type: () -> bool
        spaceid = self.raw.getSpace()
        spacetype = AddressSpace.ID_TYPE_MASK & spaceid
        return spacetype == AddressSpace.TYPE_STACK

    def rename(self, new_name):  # type: (str) -> None
        """Try to rename the current varnode. This only makes sense for variables."""
        self.symbol.rename(new_name)

    @property
    def free(self):  # type: () -> Varnode
        return Varnode(GhVarnode(self.raw.getAddress(), self.raw.getSize()))

    @property
    def simple(self):  # type: () -> int|str
        """Convert Varnode to a primitive value (int or a string representation)

        More specifically, this will convert constants and addresses into integers,
        for registers names are returned, and for unique and hash varnodes ad-hoc
        string encoding is used (hash:ID or uniq:ID where ID is varnode identifier).

        This is useful for simple analyses when programmer already knows what
        type of value is expected at the given position."""
        value = self.value
        if value is not None:
            return value
        elif self.is_register:
            if self.is_named_register:
                return self.as_register
            return "reg:{:x}:{:x}".format(self.offset, self.size)
        elif self.is_unique:
            return "uniq:{:x}:{:x}".format(self.offset, self.size)
        elif self.is_hash:
            return "hash:{:x}:{:x}".format(self.offset, self.size)
        elif self.is_stack:
            return "stack:{:x}:{:x}".format(self.offset, self.size)
        raise RuntimeError("Unknown varnode type")

    @property
    def is_unaffected(self):  # type: () -> bool
        return self.raw.isUnaffected()

    @property
    def is_persistent(self):  # type: () -> bool
        return self.raw.isPersistent()

    @property
    def is_addr_tied(self):  # type: () -> bool
        return self.raw.isAddrTied()

    @property
    def is_input(self):  # type: () -> bool
        return self.raw.isInput()

    @property
    def is_free(self):  # type: () -> bool
        return self.raw.isFree()

    @property
    def defining_pcodeop(self):  # type: () -> PcodeOp|None
        """Return a PcodeOp that defined this varnode"""
        raw = self.raw.getDef()
        if raw is None:
            return None
        return PcodeOp(raw)

    @property
    def descendants(self):  # type: () -> list[PcodeOp]
        """Return a list of all descendants of this varnode"""
        if self.raw.getDescendants() is None:
            return []
        return [PcodeOp(x) for x in self.raw.getDescendants()]

    def intersects(self, other):  # type: (Varnode) -> list[PcodeOp]
        """Return true if this varnode intersects other"""
        return self.raw.intersects(other.raw)

as_register property

Return the name of the register this varnode is stored in.

Warning: even if is_register returns true, this does not mean you can use this method safely. Use is_named_register to make sure.

defining_pcodeop property

Return a PcodeOp that defined this varnode

descendants property

Return a list of all descendants of this varnode

free property

has_value property

Return true if this varnode can be converted to a integer value.

In particular, this will return true for Address and Constant varnodes

high property

is_addr_tied property

is_address property

is_constant property

Note: addresses are not constants in Ghidra-speak. Use has_value to check if the varnode has a predictable value.

is_free property

is_hash property

is_input property

is_named_register property

"Return True if this varnode is stored entirely in a named register.

"Named" in this context means that it has a conventional name, like RAX. Not all register varnodes are named, for example, the upper 32 bits of RAX have no commonly used name.

is_persistent property

is_register property

Return True if this varnode is stored entirely in a register.

Warning: this does not mean that it can be cast to a register! This may be, for example, upper 32 bits of RAX. Use is_named_register instead.

is_stack property

is_unaffected property

is_unique property

offset property

simple property

Convert Varnode to a primitive value (int or a string representation)

More specifically, this will convert constants and addresses into integers, for registers names are returned, and for unique and hash varnodes ad-hoc string encoding is used (hash:ID or uniq:ID where ID is varnode identifier).

This is useful for simple analyses when programmer already knows what type of value is expected at the given position.

size property

symbol property

value property

Get the value of this varnode. Traverse defining pcodeops if necessary.

intersects(other)

Return true if this varnode intersects other

Source code in ghidralib.py
1039
1040
1041
def intersects(self, other):  # type: (Varnode) -> list[PcodeOp]
    """Return true if this varnode intersects other"""
    return self.raw.intersects(other.raw)

rename(new_name)

Try to rename the current varnode. This only makes sense for variables.

Source code in ghidralib.py
971
972
973
def rename(self, new_name):  # type: (str) -> None
    """Try to rename the current varnode. This only makes sense for variables."""
    self.symbol.rename(new_name)

unicode

A fake stub class, to keep type-checker relatively happy

Source code in ghidralib.py
156
157
158
159
160
161
class unicode:
    """A fake stub class, to keep type-checker relatively happy"""

    def encode(self):  # type: () -> str
        """A fake method, to keep type-checker relatively happy"""
        raise NotImplementedError("This method should never be called")

encode()

A fake method, to keep type-checker relatively happy

Source code in ghidralib.py
159
160
161
def encode(self):  # type: () -> str
    """A fake method, to keep type-checker relatively happy"""
    raise NotImplementedError("This method should never be called")

assemble(instructions, address=0)

Assemble the given instructions and return them as a list of instructions.

Note: Address is important, because instruction meaning may depend on the location.

Parameters:
  • address

    the address where the instructious would be written

  • instructions

    a list of instructions, or a single instruction to assemble

Returns:
  • the newly assembled instructions

Source code in ghidralib.py
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
def assemble(
    instructions, address=0
):  # type: (str|list[str], Addr) -> list[Instruction]
    """Assemble the given instructions and return them as a list of instructions.

    Note: Address is important, because instruction meaning may depend on the location.

    :param address: the address where the instructious would be written
    :param instructions: a list of instructions, or a single instruction to assemble
    :return: the newly assembled instructions"""
    data = assemble_to_bytes(instructions, address)
    return disassemble_bytes(data, address)

assemble_at(address, instructions, pad_to=0)

Assemble the given instructions and write them at the given address.

Note: Ghidra is a bit picky, and case-sensitive when it comes to opcodes. For example, use "MOV EAX, EBX" instead of "mov eax, ebx".

>>> assemble_at(Function("exit").entrypoint, "RET")
Parameters:
  • address

    the address where to write the instructions

  • instructions

    a list of instructions, or a single instruction to assemble

  • pad_to

    optionally, pad the code with NOPs to reach this size

Returns:
  • the newly assembled instructions

Source code in ghidralib.py
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
def assemble_at(
    address, instructions, pad_to=0
):  # type: (Addr, str|list[str], int) -> list[Instruction]
    """Assemble the given instructions and write them at the given address.

    Note: Ghidra is a bit picky, and case-sensitive when it comes to opcodes.
    For example, use "MOV EAX, EBX" instead of "mov eax, ebx".

        >>> assemble_at(Function("exit").entrypoint, "RET")

    :param address: the address where to write the instructions
    :param instructions: a list of instructions, or a single instruction to assemble
    :param pad_to: optionally, pad the code with NOPs to reach this size
    :return: the newly assembled instructions"""
    # Note: Assembler API is actually quite user-friendly and doesn't require
    # wrapping. But let's wrap it for consistency.
    addr = resolve(address)
    asm = Assemblers.getAssembler(Program.current())
    result = [Instruction(i) for i in asm.assemble(addr, instructions)]

    # Append NOPs at the end, if length is shorter than pad_to.
    # This is purely to make the assembled code look nicer.
    if result:
        last = result[-1]
        end_addr = last.address + last.length
        code_size = end_addr - addr.getOffset()
        if pad_to > code_size:
            asm.assemble(addr.add(code_size), ["NOP"] * (pad_to - code_size))

    # Do what Ghidra should do automaticaly, and automatically try to disassemble
    # jump targets from the newly assembled instructions
    for instr in result:
        for xref in instr.xrefs_from:
            if xref.is_call or xref.is_jump:
                disassemble(toAddr(xref.to_address))

    return result

assemble_to_bytes(instructions, address=0)

Assemble the given instructions and return them as an array of bytes.

Note: Ghidra is a bit picky, and case-sensitive when it comes to opcodes. For example, use "MOV EAX, EBX" instead of "mov eax, ebx".

Note: Address is important, because instruction bytes may depend on the location.

>>> assemble_to_bytes("ADD EAX, EAX")
"À"
>>> assemble_to_bytes(["ADD EAX, EAX", "ADD EAX, EAX"])
"ÀÀ"
Parameters:
  • address

    the address to use as a base for instructions

  • instructions

    a list of instructions, or a single instruction to assemble

Source code in ghidralib.py
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
def assemble_to_bytes(instructions, address=0):  # type: (str|list[str], Addr) -> bytes
    """Assemble the given instructions and return them as an array of bytes.

    Note: Ghidra is a bit picky, and case-sensitive when it comes to opcodes.
    For example, use "MOV EAX, EBX" instead of "mov eax, ebx".

    Note: Address is important, because instruction bytes may depend on the location.

        >>> assemble_to_bytes("ADD EAX, EAX")
        "\x01\xc0"
        >>> assemble_to_bytes(["ADD EAX, EAX", "ADD EAX, EAX"])
        "\x01\xc0\x01\xc0"

    :param address: the address to use as a base for instructions
    :param instructions: a list of instructions, or a single instruction to assemble"""
    # Note: Assembler API is actually quite user-friendly and doesn't require
    # wrapping. But let's wrap it for consistency.
    addr_obj = resolve(address)
    asm = Assemblers.getAssembler(Program.current())
    if isinstance(instructions, Str):
        return _bytes_from_bytelist(asm.assembleLine(addr_obj, instructions))
    result = _asbytes("")
    for instr in instructions:
        result += _bytes_from_bytelist(
            asm.assembleLine(addr_obj.add(len(result)), instr)
        )
    return result

can_resolve(addr)

Check if a passed value address can be resolved.

This is useful for checking if resolve() will succeed. See resolve documentation for more details.

Source code in ghidralib.py
406
407
408
409
410
411
def can_resolve(addr):  # type: (Addr) -> bool
    """Check if a passed value address can be resolved.

    This is useful for checking if `resolve()` will succeed.
    See `resolve` documentation for more details."""
    return isinstance(addr, (GenericAddress, int, long, unicode, str))

collect_iterator(iterator)

Collect a Java iterator to a Python list.

Source code in ghidralib.py
421
422
423
424
425
426
def collect_iterator(iterator):  # type: (JavaObject) -> list
    """Collect a Java iterator to a Python list."""
    result = []
    while iterator.hasNext():
        result.append(iterator.next())
    return result

disassemble_at(address, max_instr=None, max_bytes=None)

Disassemble the bytes from the program memory at the given address.

If neither max_bytes nor max_instr are specified, this function will disassemble one instruction. If at least one of them is specified, this function will disassemble until one of the conditions occurs.

>>> disassemble_at(0x0403ED0)
[INC ESI]
Parameters:
  • address

    the address where to start disassembling

  • max_bytes

    maximum number of bytes to disassemble (None for no limit)

  • max_instr

    maximum number of instructions to disassemble (None for no limit)

Returns:
  • a list of Instruction objects

Source code in ghidralib.py
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
def disassemble_at(
    address, max_instr=None, max_bytes=None
):  # type: (Addr, int|None, int|None) -> list[Instruction]
    """Disassemble the bytes from the program memory at the given address.

    If neither `max_bytes` nor `max_instr` are specified, this function will
    disassemble one instruction. If at least one of them is specified,
    this function will disassemble until one of the conditions occurs.

        >>> disassemble_at(0x0403ED0)
        [INC ESI]

    :param address: the address where to start disassembling
    :param max_bytes: maximum number of bytes to disassemble (None for no limit)
    :param max_instr: maximum number of instructions to disassemble (None for no limit)
    :return: a list of Instruction objects"""
    addr = resolve(address)

    if max_instr is None:
        _max_instr = 1 if max_bytes is None else max_bytes
    else:
        _max_instr = max_instr

    if max_bytes is None:
        to_block_end = MemoryBlock(addr).end - addr.getOffset()
        # Hacky and inefficient, but good enough for now (and correct)
        _max_bytes = min(to_block_end, _max_instr * 16)
    else:
        _max_bytes = max_bytes
    data = read_bytes(addr, _max_bytes)

    return disassemble_bytes(data, addr, _max_instr)

disassemble_bytes(data, addr=0, max_instr=None)

Disassemble the given bytes and return a list of Instructions.

This function will return early if an exception during disassembly occurs.

>>> disassemble_bytes('F')
[INC ESI]

Note: Address is important, because instruction meaning may depend on the location.

Parameters:
  • data

    the bytes to disassemble

  • addr

    the (virtual) address of the first instruction

  • max_instr

    the maximum number of instructions to disassemble, or to disassemble until the end of the data

Returns:
  • a list of Instruction objects

Source code in ghidralib.py
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
def disassemble_bytes(
    data, addr=0, max_instr=None
):  # type: (bytes, Addr, int|None) -> list[Instruction]
    """Disassemble the given bytes and return a list of Instructions.

    This function will return early if an exception during disassembly occurs.

        >>> disassemble_bytes('F')
        [INC ESI]

    Note: Address is important, because instruction meaning may depend on the location.

    :param data: the bytes to disassemble
    :param addr: the (virtual) address of the first instruction
    :param max_instr: the maximum number of instructions to disassemble, or
    to disassemble until the end of the data
    :return: a list of Instruction objects"""
    dis = PseudoDisassembler(Program.current())
    offset = 0
    result = []
    address = resolve(addr)
    if max_instr is None:
        max_instr = 100000000
    for _ in range(0, max_instr):
        try:
            arr = data[offset : offset + 16]
            rawinstr = dis.disassemble(address.add(offset), arr)
            instr = Instruction(rawinstr)
            if offset + instr.length > len(data):
                break
            result.append(instr)
            offset += instr.length
            if offset + instr.length == len(data):
                break
        except:
            break
    return result

enhex(s)

Convert raw bytes to a hex string.

>>> enhex([0x01, 0x02])
'0102'
Parameters:
  • s

    raw bytes to encode.

Source code in ghidralib.py
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
def enhex(s):  # type: (bytes | list[int]) -> str
    """Convert raw bytes to a hex string.

        >>> enhex([0x01, 0x02])
        '0102'

    :param s: raw bytes to encode."""
    if not isinstance(s, Str):
        return _enhex(_asbytes("".join(chr(c) for c in s)))
    return _enhex(s)

findall_pattern(byte_pattern)

Find all occurrences of a byte pattern in the program.

>>> findall_pattern("01 02 ?? 04")
[0x1000, 0x1004]
Parameters:
  • byte_pattern

    the pattern string.

Returns:
  • iterator over all addresses of all occurrences.

Source code in ghidralib.py
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
def findall_pattern(byte_pattern):  # type: (str) -> Iterator[int]
    """Find all occurrences of a byte pattern in the program.

        >>> findall_pattern("01 02 ?? 04")
        [0x1000, 0x1004]

    :param byte_pattern: the pattern string.
    :return: iterator over all addresses of all occurrences."""
    addr = -1
    while True:
        addr = findone_pattern(byte_pattern, start=addr + 1)
        if addr is None:
            break
        yield addr

findone_pattern(byte_pattern, start=0)

Find the first occurrence of a byte pattern in the program (or None).

>>> findone_pattern("01 02 ?? 04")
0x1000
Parameters:
  • byte_pattern

    the pattern string.

  • start

    the address to start searching from.

Returns:
  • address of the first occurrence, or None if not found.

Source code in ghidralib.py
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
def findone_pattern(byte_pattern, start=0):  # type: (str, Addr) -> int|None
    """Find the first occurrence of a byte pattern in the program (or None).

        >>> findone_pattern("01 02 ?? 04")
        0x1000

    :param byte_pattern: the pattern string.
    :param start: the address to start searching from.
    :return: address of the first occurrence, or None if not found."""
    start = resolve(start)
    bytes = _pattern_to_bytes(byte_pattern)
    mask = _pattern_to_mask(byte_pattern)
    addr = (
        Program.current().getMemory().findBytes(start, bytes, mask, True, getMonitor())
    )
    if not addr:
        return None
    return addr.getOffset()

from_bytes(b)

Decode a bytes as a little-endian integer.

>>> from_bytes('ab')
25185
Parameters:
  • b

    byte stream to decode.

Source code in ghidralib.py
4124
4125
4126
4127
4128
4129
4130
4131
4132
def from_bytes(b):  # type: (bytes) -> int
    """Decode a bytes as a little-endian integer.

        >>> from_bytes('ab')
        25185

    :param b: byte stream to decode."""
    bl = _bytes_as_list(b)
    return sum(v << (i * 8) for i, v in enumerate(bl))

get_string(address)

Get the string defined at the given address.

This function will return None if the data defined in Ghidra at the given address is not a string. This function will also return None if the string at adress was not defined in Ghidra. To read a null-terminated string from Ghidra memory, use read_cstring instead.

>>> get_string(0x1000)
'Hello, world!'
Parameters:
  • address

    address where string should be located.

Source code in ghidralib.py
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
def get_string(address):  # type: (Addr) -> str|None
    """Get the string defined at the given address.

    This function will return None if the data defined in Ghidra at the
    given address is not a string. This function will also return None
    if the string at `adress` was not defined in Ghidra. To read a
    null-terminated string from Ghidra memory, use `read_cstring` instead.

        >>> get_string(0x1000)
        'Hello, world!'

    :param address: address where string should be located."""
    string = getDataAt(resolve(address))
    if string and string.hasStringValue():
        return string.getValue()
    return None

read_bytes(address, length)

Read a byte stream from program at address.

>>> read_bytes(0x1000, 4)
'test'
Parameters:
  • address

    address from which to read.

  • length

    number of bytes to read.

Source code in ghidralib.py
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
def read_bytes(address, length):  # type: (Addr, int) -> bytes
    """Read a byte stream from program at address.

        >>> read_bytes(0x1000, 4)
        'test'

    :param address: address from which to read.
    :param length: number of bytes to read."""
    address = resolve(address)
    return _bytes_from_bytelist(getBytes(address, length))

read_cstring(address)

Read a null-terminated string from Ghidra memory.

This function ignores metadata available to Ghidra and just reads the bytes until a nullbyte is encountered.

>>> read_cstring(0x1000)
'Hello, world!'
Parameters:
  • address

    address from which to start reading.

Source code in ghidralib.py
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
def read_cstring(address):  # type: (Addr) -> str
    """Read a null-terminated string from Ghidra memory.

    This function ignores metadata available to Ghidra and just reads
    the bytes until a nullbyte is encountered.

        >>> read_cstring(0x1000)
        'Hello, world!'

    :param address: address from which to start reading."""
    addr = resolve(address)
    string = ""
    while True:
        c = read_u8(addr)
        if c == 0:
            break
        string += chr(c)
        addr = addr.add(1)
    return string

read_u16(address)

Read a 16bit integer from program at address.

>>> read_u16(0x1000)
0x0102
Parameters:
  • address

    address from which to read.

Source code in ghidralib.py
4082
4083
4084
4085
4086
4087
4088
4089
def read_u16(address):  # type: (Addr) -> int
    """Read a 16bit integer from program at address.

        >>> read_u16(0x1000)
        0x0102

    :param address: address from which to read."""
    return from_bytes(read_bytes(address, 2))

read_u32(address)

Read a 32bit integer from program at address.

>>> read_u32(0x1000)
0x01020304
Parameters:
  • address

    address from which to read.

Source code in ghidralib.py
4092
4093
4094
4095
4096
4097
4098
4099
def read_u32(address):  # type: (Addr) -> int
    """Read a 32bit integer from program at address.

        >>> read_u32(0x1000)
        0x01020304

    :param address: address from which to read."""
    return from_bytes(read_bytes(address, 4))

read_u64(address)

Read a 64bit integer from program at address.

>>> read_u32(0x1000)
0x0102030405060708
Parameters:
  • address

    address from which to read.

Source code in ghidralib.py
4102
4103
4104
4105
4106
4107
4108
4109
def read_u64(address):  # type: (Addr) -> int
    """Read a 64bit integer from program at address.

        >>> read_u32(0x1000)
        0x0102030405060708

    :param address: address from which to read."""
    return from_bytes(read_bytes(address, 8))

read_u8(address)

Read a byte from program at address.

>>> read_u8(0x1000)
0x01
Parameters:
  • address

    address from which to read.

Source code in ghidralib.py
4072
4073
4074
4075
4076
4077
4078
4079
def read_u8(address):  # type: (Addr) -> int
    """Read a byte from program at address.

        >>> read_u8(0x1000)
        0x01

    :param address: address from which to read."""
    return from_bytes(read_bytes(address, 1))

read_unicode(address)

Read a null-terminated utf-16 string from Ghidra memory.

This function ignores metadata available to Ghidra and just reads the bytes until a null character is encountered.

>>> read_unicode(0x1000)
'Hello, world!'
Parameters:
  • address

    address from which to start reading.

Source code in ghidralib.py
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
def read_unicode(address):  # type: (Addr) -> str
    """Read a null-terminated utf-16 string from Ghidra memory.

    This function ignores metadata available to Ghidra and just reads
    the bytes until a null character is encountered.

        >>> read_unicode(0x1000)
        'Hello, world!'

    :param address: address from which to start reading."""
    addr = resolve(address)
    string = ""
    while True:
        c = read_u16(addr)
        if c == 0:
            break
        string += chr(c)
        addr = addr.add(2)
    return string

resolve(addr)

Convert an arbitrary addressable value to a Ghidra Address object.

This library accepts one of three things as addressses:

  1. A Ghidra Address object
  2. An integer representing an address
  3. A string representing a symbol name

This function is responsible from converting the addressable values (Addr) to Ghidra addresses (GenericAddress).

>>> resolve(0x1234)
0x1234
>>> resolve(Symbol("main"))
0x1234
>>> resolve(toAddr(0x1234))
0x1234
Parameters:
  • addr

    An addressable value.

Returns:
  • A GenericAddress object representing the passed address.

Source code in ghidralib.py
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
def resolve(addr):  # type: (Addr) -> GenericAddress
    """Convert an arbitrary addressable value to a Ghidra Address object.

    This library accepts one of three things as addressses:

    1. A Ghidra Address object
    2. An integer representing an address
    3. A string representing a symbol name

    This function is responsible from converting the addressable values (`Addr`)
    to Ghidra addresses (`GenericAddress`).

        >>> resolve(0x1234)
        0x1234
        >>> resolve(Symbol("main"))
        0x1234
        >>> resolve(toAddr(0x1234))
        0x1234

    :param addr: An addressable value.
    :return: A GenericAddress object representing the passed address.

    """
    if isinstance(addr, unicode):  # Why, Ghidra?
        addr = addr.encode()
    if isinstance(addr, GenericAddress):
        return addr
    if isinstance(addr, (int, long)):
        # Why convert to string? Java cannot handle large (unsigned) integers :/
        return toAddr("{:x}".format(addr))
    if isinstance(addr, str):
        return toAddr(Symbol(addr).address)
    raise TypeError("Address must be a ghidra Address, int, or str")

to_bytes(value, length)

Encode an integer as a little-endian byte stream.

>>> to_bytes(0x0102, 2)
'\x01\x02'
Parameters:
  • value

    integer to encode.

  • length

    number of bytes of the result.

Source code in ghidralib.py
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
def to_bytes(value, length):  # type: (int, int) -> bytes
    """Encode an integer as a little-endian byte stream.

        >>> to_bytes(0x0102, 2)
        '\\x01\\x02'

    :param value: integer to encode.
    :param length: number of bytes of the result."""
    out = ""
    for i in range(length):
        out += chr(value & 0xFF)
        value >>= 8
    return _asbytes(out)

try_resolve(addr)

Convert an arbitrary addressable value to a Ghidra Address object.

See resolve documentation for more details.

Parameters:
  • addr

    An addressable value.

Returns:
  • A GenericAddress representing the value, or None resolving failed.

Source code in ghidralib.py
393
394
395
396
397
398
399
400
401
402
403
def try_resolve(addr):  # type: (Addr) -> GenericAddress | None
    """Convert an arbitrary addressable value to a Ghidra Address object.

    See `resolve` documentation for more details.

    :param addr: An addressable value.
    :return: A GenericAddress representing the value, or None resolving failed."""
    try:
        return resolve(addr)
    except:
        return None

unhex(s)

Decode a hex string.

>>> unhex("01 02")
'0102'
Parameters:
  • s

    hex string to decode.

Source code in ghidralib.py
4167
4168
4169
4170
4171
4172
4173
4174
def unhex(s):  # type: (str) -> bytes
    """Decode a hex string.

        >>> unhex("01 02")
        '0102'

    :param s: hex string to decode."""
    return _unhex(s)

unwrap(wrapper_or_java_type)

If the argument is a GhidraWrapper, return the underlying Java object.

Source code in ghidralib.py
414
415
416
417
418
def unwrap(wrapper_or_java_type):  # type: (JavaObject|GhidraWrapper) -> JavaObject
    "If the argument is a GhidraWrapper, return the underlying Java object." ""
    if isinstance(wrapper_or_java_type, GhidraWrapper):
        return wrapper_or_java_type.raw
    return wrapper_or_java_type

write_bytes(address, data)

Write the provided bytes at a given address.

>>> write_bytes(0x1000, "test)
>>> read_bytes(0x1000, 4)
'test'
Parameters:
  • address

    address where bytes should be written.

  • data

    bytes to write.

Source code in ghidralib.py
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
def write_bytes(address, data):  # type: (Addr, bytes) -> None
    """Write the provided bytes at a given address.

        >>> write_bytes(0x1000, "test)
        >>> read_bytes(0x1000, 4)
        'test'

    :param address: address where bytes should be written.
    :param data: bytes to write."""
    addr = resolve(address)
    try:
        setBytes(addr, data)
    except:
        clearListing(addr, addr.add(len(data) - 1))
        setBytes(addr, data)

xor(a, b)

XOR two bytestrings together.

If two bytestrings are not the same length, the result will be truncated to the length of the shorter string.

>>> xor("\x01\x02", "\x03\x04")
'\x02\x06'
Parameters:
  • a

    the first bytestring.

  • b

    the second bytestring.

Source code in ghidralib.py
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
def xor(a, b):  # type: (bytes, bytes) -> bytes
    """XOR two bytestrings together.

    If two bytestrings are not the same length, the result will be
    truncated to the length of the shorter string.

        >>> xor("\\x01\\x02", "\\x03\\x04")
        '\\x02\\x06'

    :param a: the first bytestring.
    :param b: the second bytestring."""
    al = _bytes_as_list(a)
    bl = _bytes_as_list(b)
    return _asbytes("".join(chr(x ^ y) for x, y in zip(al, bl)))