Skip to content

Commit

Permalink
Merge pull request #224 from roblabla/ghidra-devenv
Browse files Browse the repository at this point in the history
Ghidra devenv
  • Loading branch information
roblabla committed Sep 2, 2024
2 parents 6f09bde + ca51cbd commit 1c1a2d5
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 37 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ jobs:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v5
with:
python-version: '3.4'
- name: Get toolchain from cache
id: cache-toolchain
uses: actions/cache/restore@v3
Expand Down Expand Up @@ -44,6 +47,9 @@ jobs:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v5
with:
python-version: '3.4'
- name: Get toolchain from cache
id: cache-toolchain
uses: actions/cache/restore@v3
Expand Down Expand Up @@ -79,6 +85,9 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v5
with:
python-version: '3.4'
- name: Get toolchain from cache
id: cache-toolchain
uses: actions/cache/restore@v3
Expand Down
33 changes: 30 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,42 @@ python3 ./scripts/build.py
This will automatically generate a ninja build script `build.ninja`, and run
ninja on it.

## Reverse Engineering
## Contributing

### Reverse Engineering

You can find an XML export of our Ghidra RE in the companion repository
[th06-re]. This repo is updated nightly through [`scripts/export_ghidra_database.py`],
and its history matches the checkin history from our team's Ghidra Server.
[th06-re], in the `xml` branch. This repo is updated nightly through
[`scripts/export_ghidra_database.py`], and its history matches the checkin
history from our team's Ghidra Server.

If you wish to help us in our Reverse Engineering effort, please contact
@roblabla on discord so we can give you an account on the Ghidra Server.

### Reimplementation

The easiest way to work on the reimplementation is through the use of
[`objdiff`](https://github.com/encounter/objdiff). Here's how to get started:

1. First, follow the instruction above to get a devenv setup.
1. Copy the original `東方紅魔郷.exe` file (version 1.02h) to the
`resources/game.exe` folder. This will be used as the source to compare the
reimplementations against.
1. Download the latest version of objdiff.
1. Run `python3 scripts/export_ghidra_objs.py --import-xml`. This will extract
from `resources/game.exe` the object files that objdiff can compare against.
1. Finally, run objdiff and open the th06 project.

#### Choosing a function to decompile

The easiest is to look at the `config/stubbed.csv` files. Those are all
functions that are automatically stubbed out. You should pick one of them, open
the associated object file in objdiff, and click on the function of interest.

Then, open the correct `cpp` file, copy/paste the declaration, and start
hacking! It may be useful to take the ghidra decompiler output as a base. You
can find this output in the [th06-re] repository.

# Credits

We would like to extend our thanks to the following individuals for their
Expand Down
2 changes: 1 addition & 1 deletion scripts/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def main():

if args.object_name is not None:
object_name = Path(args.object_name).name
target = f"build/objdiff/reimpl/{object_name}"
target = "build/objdiff/reimpl/" + object_name
elif args.target is not None:
target = args.target

Expand Down
45 changes: 43 additions & 2 deletions scripts/create_devenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import stat
import subprocess
import sys
import zipfile

try:
from typing import Optional
Expand Down Expand Up @@ -127,8 +128,8 @@ def parse_arguments() -> Namespace:
parser.add_argument(
"--only",
action="append",
choices=["vs", "dx8", "py", "pragma", "ninja", "satsuki"],
help="Only run certain steps. Possible values are vs, dx8, py, pragma, ninja and satsuki.",
choices=["vs", "dx8", "py", "pragma", "ninja", "satsuki", "ghidra"],
help="Only run certain steps. Possible values are vs, dx8, py, pragma, ninja, satsuki and ghidra.",
)
parser.add_argument("dl_cache_path", help="Path to download the requirements in")
parser.add_argument("output_path", help="The output directory")
Expand Down Expand Up @@ -341,6 +342,20 @@ def download_requirements(dl_cache_path, steps, should_torrent):
"filename": "satsuki",
"sha256": "e7a5f586b0f8febe5a1a6a3a0178486ec124c5dabc8ffb17bf0b892194dd8116",
},
{
"name": "ghidra",
"only": "ghidra",
"url": "https://github.com/happyhavoc/ghidra-ci/releases/download/2024-08-31/release.zip",
"filename": "ghidra.zip",
"sha256": "524f6bdfa134afbe722498953eb21efacd93a876842e31fd04f93592270976a3",
},
{
"name": "ghidra-delinker",
"only": "ghidra",
"url": "https://github.com/happyhavoc/ghidra-delinker-extension/releases/download/v0.5.0-th06.1/ghidra_11.1_PUBLIC_20240831_ghidra-delinker-extension.zip",
"filename": "ghidra-delinker.zip",
"sha256": "a9b063294412fb095d749d06905a05cdd42714b82818141d6844955f11680691",
},
]

if should_torrent:
Expand Down Expand Up @@ -536,6 +551,30 @@ def install_satsuki(dl_cache_path, output_path):
os.chmod(str(install_path / satsuki_name), mode)


def install_ghidra(dl_cache_path, tmp_dir, output_path):
install_path = output_path / "ghidra"
with zipfile.ZipFile(str(dl_cache_path / "ghidra.zip")) as ghidra_zip:
ghidra_zip.extractall(str(tmp_dir))
for item in ghidra_zip.infolist():
if not item.filename.endswith("/"):
file_attr = item.external_attr >> 16
os.chmod(str(tmp_dir / item.filename), file_attr)

# Find the ghidra folder, and move it.
for item in tmp_dir.iterdir():
if item.name.startswith("ghidra_") and item.is_dir():
print(str(item) + "->" + str(install_path))
shutil.move(str(item), str(install_path))
break

# Next, install ghidra-delinker-extension
shutil.unpack_archive(
str(dl_cache_path / "ghidra-delinker.zip"),
str(install_path / "Ghidra" / "Extensions"),
format="zip",
)


def main(args: Namespace) -> int:
dl_cache_path = Path(args.dl_cache_path).absolute()
output_path = Path(args.output_path).absolute()
Expand Down Expand Up @@ -573,6 +612,8 @@ def main(args: Namespace) -> int:
install_ninja(ninja_zip_path, output_path)
if "satsuki" in steps:
install_satsuki(dl_cache_path, output_path)
if "ghidra" in steps:
install_ghidra(dl_cache_path, tmp_dir, output_path)

return 0

Expand Down
6 changes: 3 additions & 3 deletions scripts/export_ghidra_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ def fetchVersions(args):
with tempfile.NamedTemporaryFile(prefix="versions") as f:
ghidra_helpers.runAnalyze(
args.GHIDRA_REPO_NAME,
program=args.program,
process=args.program,
username=args.username,
ssh_key=args.ssh_key,
extraArgs=["-preScript", "ExportFileVersions.java", f.name],
pre_scripts=[["ExportFileVersions.java", f.name]],
)
versions = json.loads(f.read())
versions.sort(key=lambda x: x["version"])
Expand Down Expand Up @@ -62,7 +62,7 @@ def export(args, version: dict):
program=args.program,
username=args.username,
ssh_key=args.ssh_key,
extraArgs=["-preScript", script, str(out), str(version["version"])],
pre_scripts=[[script, str(out), str(version["version"])]],
)

if args.EXPORT_TYPE == XML:
Expand Down
29 changes: 14 additions & 15 deletions scripts/export_ghidra_objs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def main():
parser.add_argument("--program", help="Program to export", default="th06_102h.exe")
args = parser.parse_args()

os.makedirs(str(SCRIPT_PATH.parent / "build" / "objdiff" / "reimpl"), exist_ok=True)
os.makedirs(str(SCRIPT_PATH.parent / "build" / "objdiff" / "orig"), exist_ok=True)

if args.import_xml:
with tempfile.TemporaryDirectory() as tempdir:
Expand All @@ -32,16 +32,14 @@ def main():
ghidra_helpers.runAnalyze(
str(tempdir),
"Touhou 06",
import_file=str(SCRIPT_PATH.parent / "resources" / "game.exe"),
analysis=True,
extraArgs=[
"-import",
SCRIPT_PATH.parent / "resources" / "game.exe",
"-postScript",
SCRIPT_PATH / "ghidra" / "ImportFromXml.java",
filename,
"-postScript",
SCRIPT_PATH / "ghidra" / "ExportDelinker.java",
str(SCRIPT_PATH.parent / "build" / "objdiff" / "orig"),
post_scripts=[
["ImportFromXml.java", filename],
[
"ExportDelinker.java",
str(SCRIPT_PATH.parent / "build" / "objdiff" / "orig"),
],
],
)
else:
Expand All @@ -52,11 +50,12 @@ def main():
ghidra_helpers.runAnalyze(
repo,
project_name,
program,
extraArgs=[
"-preScript",
SCRIPT_PATH / "ghidra" / "ExportDelinker.java",
str(SCRIPT_PATH.parent / "build" / "objdiff" / "orig"),
process=program,
pre_scripts=[
[
"ExportDelinker.java",
str(SCRIPT_PATH.parent / "build" / "objdiff" / "orig"),
]
],
)

Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_objdiff_objs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def rename_symbols(filename):
elif namespace != class_name.encode("utf8"):
continue

offset = obj.string_table.append(func_name)
offset = obj.string_table.append(namespace + b"::" + func_name)
sym_obj.name = b"\0\0\0\0" + struct.pack("I", offset)

if not reimpl_folder.exists():
Expand Down
55 changes: 55 additions & 0 deletions scripts/ghidra/ImportFromXml.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* LICENSE
*/
// Description
//@author roblabla
//@category exports
//@keybinding
//@menupath Skeleton
//@toolbar Skeleton
import ghidra.app.script.GhidraScript;
import ghidra.app.util.Option;
import ghidra.app.util.bin.ByteProvider;
import ghidra.app.util.importer.MessageLog;
import ghidra.app.util.opinion.LoadSpec;
import ghidra.app.util.opinion.XmlLoader;
import ghidra.formats.gfilesystem.FSRL;
import ghidra.formats.gfilesystem.FileSystemService;
import ghidra.framework.model.DomainFile;
import ghidra.framework.model.DomainObject;
import ghidra.program.model.mem.Memory;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;

public class ImportFromXml extends GhidraScript
{
@Override protected void run() throws Exception
{
File inFile = askFile("Input XML", "");

XmlLoader loader = new XmlLoader();

FSRL fsrl = FileSystemService.getInstance().getLocalFSRL(inFile);
ByteProvider bp = FileSystemService.getInstance().getByteProvider(fsrl, false, monitor);

Collection<LoadSpec> specs = loader.findSupportedLoadSpecs(bp);
if (specs.isEmpty())
{
throw new Exception("No specs found");
}
if (specs.size() > 1)
{
throw new Exception("More than 1 spec found");
}

LoadSpec loadSpec = specs.iterator().next();
MessageLog messageLog = new MessageLog();

ArrayList<Option> opts = new ArrayList();
opts.add(new Option("Memory Blocks", false));
loader.loadInto(bp, loadSpec, opts, messageLog, currentProgram, monitor);

this.println(messageLog.toString());
}
}
45 changes: 35 additions & 10 deletions scripts/ghidra_helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from pathlib import Path
import re
import shlex
import shutil
import subprocess

Expand All @@ -9,16 +10,22 @@


def findAnalyzeHeadless():
ghidra_home = None
if (SCRIPT_PATH / "prefix" / "ghidra").exists():
ghidra_home = SCRIPT_PATH / "prefix" / "ghidra"

# The standard way to locate ghidra is to look at the GHIDRA_HOME
# environment variable, which points to the ghidra installation folder.
if os.getenv("GHIDRA_HOME") is not None:
elif os.getenv("GHIDRA_HOME") is not None:
ghidra_home = Path(os.getenv("GHIDRA_HOME"))

if ghidra_home is not None:
if os.name == "nt":
analyze_headless = ghidra_home / "support" / "analyzeHeadless.bat"
else:
analyze_headless = ghidra_home / "support" / "analyzeHeadless"
if analyze_headless.exists():
return analyze_headless
return str(analyze_headless)

# ArchLinux and Nix add a ghidra-analyzeHeadless symlink that points to the
# analyzeHeadless script of the ghidra installation.
Expand All @@ -38,18 +45,27 @@ def findAnalyzeHeadless():
def runAnalyze(
ghidra_repo_name,
project_name="Touhou 06",
program=None,
process=None,
import_file=None,
analysis=False,
username=None,
ssh_key=None,
extraArgs=[],
pre_scripts=[],
post_scripts=[],
):
commonAnalyzeHeadlessArgs = [findAnalyzeHeadless(), ghidra_repo_name]

if not re.match("^ghidra://", ghidra_repo_name):
# Set a project name
commonAnalyzeHeadlessArgs += [project_name]

if process and import_file:
raise Exception("Cannot provide both import and process")
elif process:
commonAnalyzeHeadlessArgs += ["-process", process]
elif import_file:
commonAnalyzeHeadlessArgs += ["-import", import_file]

commonAnalyzeHeadlessArgs += [
"-readOnly",
"-scriptPath",
Expand All @@ -62,16 +78,25 @@ def runAnalyze(
if ssh_key:
commonAnalyzeHeadlessArgs += ["-keystore", ssh_key]

# TODO: If program is not provided, export all files from server.
if program:
commonAnalyzeHeadlessArgs += ["-process", program]
for pre_script in pre_scripts:
if isinstance(pre_script, list):
commonAnalyzeHeadlessArgs += ["-prescript"] + pre_script
elif isinstance(pre_script, str):
commonAnalyzeHeadlessArgs += ["-prescript", pre_script]

for post_script in post_scripts:
if isinstance(post_script, list):
commonAnalyzeHeadlessArgs += ["-postscript"] + post_script
elif isinstance(post_script, str):
commonAnalyzeHeadlessArgs += ["-postscript", post_script]

commonAnalyzeHeadlessEnv = os.environ.copy()
if username is not None:
commonAnalyzeHeadlessEnv["_JAVA_OPTIONS"] = (
f"-Duser.name={username} " + os.environ.get("_JAVA_OPTIONS", "")
)

allArgs = commonAnalyzeHeadlessArgs + extraArgs
print("Running " + str(allArgs))
return subprocess.run(allArgs, env=commonAnalyzeHeadlessEnv, check=True)
print("Running " + " ".join(shlex.quote(x) for x in commonAnalyzeHeadlessArgs))
return subprocess.run(
commonAnalyzeHeadlessArgs, env=commonAnalyzeHeadlessEnv, check=True
)
Loading

0 comments on commit 1c1a2d5

Please sign in to comment.