From f40a84eda13d467e10766a68952da71c32447b13 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Sun, 26 May 2024 20:53:45 +0800 Subject: [PATCH 01/14] Add `hash_type` for `search_by_symbol_offsets` --- pwnlib/libcdb.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index 3685465ed..3ce1943b6 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -583,7 +583,7 @@ def _handle_multiple_matching_libcs(matching_libcs): selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs]) return matching_libcs[selected_index] -def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False): +def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, hash_type="md5"): """ Lookup possible matching libc versions based on leaked function addresses. @@ -626,6 +626,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as >>> for buildid in matched_libcs: # doctest +SKIP ... libc = ELF(search_by_build_id(buildid)) # doctest +SKIP """ + assert hash_type in HASHES, hash_type + for symbol, address in symbols.items(): if isinstance(address, int): symbols[symbol] = hex(address) @@ -661,21 +663,24 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as if return_as_list: return [libc['buildid'] for libc in matching_list] + # replace 'build_id' to 'buildid' + match_type = hash_type.replace("_", "") + # If there's only one match, return it directly if len(matching_list) == 1: - return search_by_build_id(matching_list[0]['buildid'], unstrip=unstrip, offline_only=offline_only) + return search_by_hash(matching_list[0][match_type], hash_type=hash_type, unstrip=unstrip, offline_only=offline_only) # If a specific index is provided, validate it and return the selected libc if select_index is not None: if select_index > 0 and select_index <= len(matching_list): - return search_by_build_id(matching_list[select_index - 1]['buildid'], unstrip=unstrip, offline_only=offline_only) + return search_by_hash(matching_list[select_index - 1][match_type], hash_type=hash_type, unstrip=unstrip, offline_only=offline_only) else: log.error('Invalid selected libc index. %d is not in the range of 1-%d.', select_index, len(matching_list)) return None # Handle multiple matches interactively if no index is specified selected_libc = _handle_multiple_matching_libcs(matching_list) - return search_by_build_id(selected_libc['buildid'], unstrip=unstrip, offline_only=offline_only) + return search_by_hash(selected_libc[match_type], hash_type=hash_type, unstrip=unstrip, offline_only=offline_only) def search_by_build_id(hex_encoded_id, unstrip=True, offline_only=False): """ From c9efa2db952855d4eeb2d8d78e7699af2fb01bb3 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Sun, 26 May 2024 21:13:07 +0800 Subject: [PATCH 02/14] Add docs --- pwnlib/libcdb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index 3ce1943b6..ebb86256c 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -608,6 +608,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as offline_only(bool): When pass `offline_only=True`, restricts search mode to offline sources only, disable online lookup. Defaults to `False`, and enable both offline and online providers. + hash_type(str): + An option to select searched hash. Returns: Path to the downloaded library on disk, or :const:`None`. From 62cdf0ef9d82d31fa2bd2ddf9d2d7e9efd66173b Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Sun, 26 May 2024 21:14:11 +0800 Subject: [PATCH 03/14] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 312af8267..9963f342f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,6 +83,7 @@ The table below shows which release corresponds to each branch, and what date th - [#2376][2376] Return buffered data on first EOF in tube.readline() - [#2387][2387] Convert apport_corefile() output from bytes-like object to string - [#2388][2388] libcdb: add `offline_only` to `search_by_symbol_offsets` +- [#2413][2413] libcdb: improve the search speed of `search_by_symbol_offsets` [2360]: https://github.com/Gallopsled/pwntools/pull/2360 [2356]: https://github.com/Gallopsled/pwntools/pull/2356 @@ -95,6 +96,7 @@ The table below shows which release corresponds to each branch, and what date th [2376]: https://github.com/Gallopsled/pwntools/pull/2376 [2387]: https://github.com/Gallopsled/pwntools/pull/2387 [2388]: https://github.com/Gallopsled/pwntools/pull/2388 +[2413]: https://github.com/Gallopsled/pwntools/pull/2413 ## 4.13.0 (`beta`) From f7db717eae71b58a0144683ccbfaab36250c8078 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Fri, 7 Jun 2024 11:00:10 +0800 Subject: [PATCH 04/14] Allow search `id` in search_by_hash --- pwnlib/libcdb.py | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index ebb86256c..18c268245 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -24,6 +24,7 @@ log = getLogger(__name__) HASHES = { + 'id': None, 'build_id': lambda path: enhex(ELF(path, checksec=False).buildid or b''), 'sha1': sha1filehex, 'sha256': sha256filehex, @@ -43,6 +44,9 @@ # https://gitlab.com/libcdb/libcdb wasn't updated after 2019, # but still is a massive database of older libc binaries. def provider_libcdb(hex_encoded_id, hash_type): + if hash_type == 'id': + return None + # Deferred import because it's slow import requests from six.moves import urllib @@ -136,6 +140,12 @@ def provider_local_database(hex_encoded_id, hash_type): if not localdb.is_dir(): return None + if hash_type == "id": + libc_list = list(localdb.rglob(f"{hex_encoded_id}.so")) + if len(libc_list) == 0: + return None + return read(libc_list[0]) + log.debug("Searching local libc database, %s: %s", hash_type, hex_encoded_id) for libc_path in localdb.rglob("*.so"): if hex_encoded_id == HASHES[hash_type](libc_path): @@ -583,7 +593,7 @@ def _handle_multiple_matching_libcs(matching_libcs): selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs]) return matching_libcs[selected_index] -def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, hash_type="md5"): +def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, hash_type="id"): """ Lookup possible matching libc versions based on leaked function addresses. @@ -684,6 +694,32 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as selected_libc = _handle_multiple_matching_libcs(matching_list) return search_by_hash(selected_libc[match_type], hash_type=hash_type, unstrip=unstrip, offline_only=offline_only) +def search_by_id(libs_id, unstrip=True, offline_only=False): + """ + Given a hex-encoded Build ID, attempt to download a matching libc from libcdb. + + Arguments: + libs_id(str): + Libs ID (e.g. 'ABCDEF...') of the library + unstrip(bool): + Try to fetch debug info for the libc and apply it to the downloaded file. + offline_only(bool): + When pass `offline_only=True`, restricts search mode to offline sources only, + disable online lookup. Defaults to `False`, and enable both offline and online providers. + + Returns: + Path to the downloaded library on disk, or :const:`None`. + + Examples: + + >>> None == search_by_id('XX') + True + >>> filename = search_by_id('libc6_2.31-3_amd64') + >>> hex(ELF(filename).symbols.read) + '0xeef40' + """ + return search_by_hash(libs_id, 'id', unstrip, offline_only) + def search_by_build_id(hex_encoded_id, unstrip=True, offline_only=False): """ Given a hex-encoded Build ID, attempt to download a matching libc from libcdb. @@ -827,6 +863,9 @@ def _pack_libs_info(path, libs_id, libs_url, syms): info["download_url"] = "" for hash_type, hash_func in HASHES.items(): + if hash_type == "id": + continue + # replace 'build_id' to 'buildid' info[hash_type.replace("_", "")] = hash_func(path) From cf52dc93c6de4ac160b564a3392f1dcc601ce338 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Fri, 7 Jun 2024 11:03:56 +0800 Subject: [PATCH 05/14] Fix py2.7 test --- pwnlib/libcdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index 18c268245..185d700cd 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -141,7 +141,7 @@ def provider_local_database(hex_encoded_id, hash_type): return None if hash_type == "id": - libc_list = list(localdb.rglob(f"{hex_encoded_id}.so")) + libc_list = list(localdb.rglob("%s.so" % hex_encoded_id)) if len(libc_list) == 0: return None return read(libc_list[0]) @@ -700,7 +700,7 @@ def search_by_id(libs_id, unstrip=True, offline_only=False): Arguments: libs_id(str): - Libs ID (e.g. 'ABCDEF...') of the library + Libs ID (e.g. 'libc6_...') of the library unstrip(bool): Try to fetch debug info for the libc and apply it to the downloaded file. offline_only(bool): From 3984b3348a54fe9b76087e3aeb907c56c3f7cda9 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:54:36 +0800 Subject: [PATCH 06/14] Rename `hash_type` to `search_type` --- pwnlib/libcdb.py | 72 ++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index 185d700cd..ce5310535 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -23,7 +23,7 @@ log = getLogger(__name__) -HASHES = { +TYPES = { 'id': None, 'build_id': lambda path: enhex(ELF(path, checksec=False).buildid or b''), 'sha1': sha1filehex, @@ -43,8 +43,8 @@ # https://gitlab.com/libcdb/libcdb wasn't updated after 2019, # but still is a massive database of older libc binaries. -def provider_libcdb(hex_encoded_id, hash_type): - if hash_type == 'id': +def provider_libcdb(hex_encoded_id, search_type): + if search_type == 'id': return None # Deferred import because it's slow @@ -52,7 +52,7 @@ def provider_libcdb(hex_encoded_id, hash_type): from six.moves import urllib # Build the URL using the requested hash type - url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % hash_type + url_base = "https://gitlab.com/libcdb/libcdb/raw/master/hashes/%s/" % search_type url = urllib.parse.urljoin(url_base, hex_encoded_id) data = b"" @@ -62,7 +62,7 @@ def provider_libcdb(hex_encoded_id, hash_type): data = wget(url, timeout=20) if not data: - log.warn_once("Could not fetch libc for %s %s from libcdb", hash_type, hex_encoded_id) + log.warn_once("Could not fetch libc for %s %s from libcdb", search_type, hex_encoded_id) break # GitLab serves up symlinks with @@ -70,7 +70,7 @@ def provider_libcdb(hex_encoded_id, hash_type): url = os.path.dirname(url) + '/' url = urllib.parse.urljoin(url.encode('utf-8'), data) except requests.RequestException as e: - log.warn_once("Failed to fetch libc for %s %s from libcdb: %s", hash_type, hex_encoded_id, e) + log.warn_once("Failed to fetch libc for %s %s from libcdb: %s", search_type, hex_encoded_id, e) return data def query_libc_rip(params): @@ -90,16 +90,16 @@ def query_libc_rip(params): return None # https://libc.rip/ -def provider_libc_rip(hex_encoded_id, hash_type): +def provider_libc_rip(hex_encoded_id, search_type): # Build the request for the hash type # https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml - if hash_type == 'build_id': - hash_type = 'buildid' - params = {hash_type: hex_encoded_id} + if search_type == 'build_id': + search_type = 'buildid' + params = {search_type: hex_encoded_id} libc_match = query_libc_rip(params) if not libc_match: - log.warn_once("Could not find libc info for %s %s on libc.rip", hash_type, hex_encoded_id) + log.warn_once("Could not find libc info for %s %s on libc.rip", search_type, hex_encoded_id) return None if len(libc_match) > 1: @@ -111,13 +111,13 @@ def provider_libc_rip(hex_encoded_id, hash_type): data = wget(url, timeout=20) if not data: - log.warn_once("Could not fetch libc binary for %s %s from libc.rip", hash_type, hex_encoded_id) + log.warn_once("Could not fetch libc binary for %s %s from libc.rip", search_type, hex_encoded_id) return None return data # Check if the local system libc matches the requested hash. -def provider_local_system(hex_encoded_id, hash_type): - if hash_type == 'id': +def provider_local_system(hex_encoded_id, search_type): + if search_type == 'id': return None shell_path = os.environ.get('SHELL', None) or '/bin/sh' if not os.path.exists(shell_path): @@ -127,12 +127,12 @@ def provider_local_system(hex_encoded_id, hash_type): if not local_libc: log.debug('Cannot lookup libc from shell %r. Skipping local system libc matching.', shell_path) return None - if HASHES[hash_type](local_libc.path) == hex_encoded_id: + if TYPES[search_type](local_libc.path) == hex_encoded_id: return local_libc.data return None # Offline search https://github.com/niklasb/libc-database for hash type -def provider_local_database(hex_encoded_id, hash_type): +def provider_local_database(hex_encoded_id, search_type): if not context.local_libcdb: return None @@ -140,15 +140,15 @@ def provider_local_database(hex_encoded_id, hash_type): if not localdb.is_dir(): return None - if hash_type == "id": + if search_type == "id": libc_list = list(localdb.rglob("%s.so" % hex_encoded_id)) if len(libc_list) == 0: return None return read(libc_list[0]) - log.debug("Searching local libc database, %s: %s", hash_type, hex_encoded_id) + log.debug("Searching local libc database, %s: %s", search_type, hex_encoded_id) for libc_path in localdb.rglob("*.so"): - if hex_encoded_id == HASHES[hash_type](libc_path): + if hex_encoded_id == TYPES[search_type](libc_path): return read(libc_path) return None @@ -195,11 +195,11 @@ def query_local_database(params): "online": [provider_libcdb, provider_libc_rip] } -def search_by_hash(hex_encoded_id, hash_type='build_id', unstrip=True, offline_only=False): - assert hash_type in HASHES, hash_type +def search_by_hash(hex_encoded_id, search_type='build_id', unstrip=True, offline_only=False): + assert search_type in TYPES, search_type # Ensure that the libcdb cache directory exists - cache, cache_valid = _check_elf_cache('libcdb', hex_encoded_id, hash_type) + cache, cache_valid = _check_elf_cache('libcdb', hex_encoded_id, search_type) if cache_valid: return cache @@ -213,12 +213,12 @@ def search_by_hash(hex_encoded_id, hash_type='build_id', unstrip=True, offline_o # Run through all available libc database providers to see if we have a match. for provider in providers: - data = provider(hex_encoded_id, hash_type) + data = provider(hex_encoded_id, search_type) if data and data.startswith(b'\x7FELF'): break if not data: - log.warn_once("Could not find libc for %s %s anywhere", hash_type, hex_encoded_id) + log.warn_once("Could not find libc for %s %s anywhere", search_type, hex_encoded_id) # Save whatever we got to the cache write(cache, data or b'') @@ -267,7 +267,7 @@ def _search_debuginfo_by_hash(base_url, hex_encoded_id): return cache -def _check_elf_cache(cache_type, hex_encoded_id, hash_type): +def _check_elf_cache(cache_type, hex_encoded_id, search_type): """ Check if there already is an ELF file for this hash in the cache. @@ -280,7 +280,7 @@ def _check_elf_cache(cache_type, hex_encoded_id, hash_type): True """ # Ensure that the cache directory exists - cache_dir = os.path.join(context.cache_dir, cache_type, hash_type) + cache_dir = os.path.join(context.cache_dir, cache_type, search_type) if not os.path.isdir(cache_dir): os.makedirs(cache_dir) @@ -593,7 +593,7 @@ def _handle_multiple_matching_libcs(matching_libcs): selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs]) return matching_libcs[selected_index] -def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, hash_type="id"): +def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, search_type="id"): """ Lookup possible matching libc versions based on leaked function addresses. @@ -618,7 +618,7 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as offline_only(bool): When pass `offline_only=True`, restricts search mode to offline sources only, disable online lookup. Defaults to `False`, and enable both offline and online providers. - hash_type(str): + search_type(str): An option to select searched hash. Returns: @@ -638,7 +638,7 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as >>> for buildid in matched_libcs: # doctest +SKIP ... libc = ELF(search_by_build_id(buildid)) # doctest +SKIP """ - assert hash_type in HASHES, hash_type + assert search_type in TYPES, search_type for symbol, address in symbols.items(): if isinstance(address, int): @@ -676,23 +676,23 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as return [libc['buildid'] for libc in matching_list] # replace 'build_id' to 'buildid' - match_type = hash_type.replace("_", "") + match_type = search_type.replace("_", "") # If there's only one match, return it directly if len(matching_list) == 1: - return search_by_hash(matching_list[0][match_type], hash_type=hash_type, unstrip=unstrip, offline_only=offline_only) + return search_by_hash(matching_list[0][match_type], search_type=search_type, unstrip=unstrip, offline_only=offline_only) # If a specific index is provided, validate it and return the selected libc if select_index is not None: if select_index > 0 and select_index <= len(matching_list): - return search_by_hash(matching_list[select_index - 1][match_type], hash_type=hash_type, unstrip=unstrip, offline_only=offline_only) + return search_by_hash(matching_list[select_index - 1][match_type], search_type=search_type, unstrip=unstrip, offline_only=offline_only) else: log.error('Invalid selected libc index. %d is not in the range of 1-%d.', select_index, len(matching_list)) return None # Handle multiple matches interactively if no index is specified selected_libc = _handle_multiple_matching_libcs(matching_list) - return search_by_hash(selected_libc[match_type], hash_type=hash_type, unstrip=unstrip, offline_only=offline_only) + return search_by_hash(selected_libc[match_type], search_type=search_type, unstrip=unstrip, offline_only=offline_only) def search_by_id(libs_id, unstrip=True, offline_only=False): """ @@ -862,12 +862,12 @@ def _pack_libs_info(path, libs_id, libs_url, syms): info["libs_url"] = libs_url info["download_url"] = "" - for hash_type, hash_func in HASHES.items(): - if hash_type == "id": + for search_type, hash_func in TYPES.items(): + if search_type == "id": continue # replace 'build_id' to 'buildid' - info[hash_type.replace("_", "")] = hash_func(path) + info[search_type.replace("_", "")] = hash_func(path) default_symbol_list = [ "__libc_start_main_ret", "dup2", "printf", "puts", "read", "system", "str_bin_sh" From 34bbcc8742ce40855ebe9efc517aa169c53f881f Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Thu, 15 Aug 2024 18:09:36 +0800 Subject: [PATCH 07/14] Rename `TYPES['id']` to `TYPES['libs_id']` --- pwnlib/libcdb.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index ce5310535..d008ca2ef 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -24,7 +24,7 @@ log = getLogger(__name__) TYPES = { - 'id': None, + 'libs_id': None, 'build_id': lambda path: enhex(ELF(path, checksec=False).buildid or b''), 'sha1': sha1filehex, 'sha256': sha256filehex, @@ -44,7 +44,7 @@ # https://gitlab.com/libcdb/libcdb wasn't updated after 2019, # but still is a massive database of older libc binaries. def provider_libcdb(hex_encoded_id, search_type): - if search_type == 'id': + if search_type == 'libs_id': return None # Deferred import because it's slow @@ -117,7 +117,7 @@ def provider_libc_rip(hex_encoded_id, search_type): # Check if the local system libc matches the requested hash. def provider_local_system(hex_encoded_id, search_type): - if search_type == 'id': + if search_type == 'libs_id': return None shell_path = os.environ.get('SHELL', None) or '/bin/sh' if not os.path.exists(shell_path): @@ -140,7 +140,8 @@ def provider_local_database(hex_encoded_id, search_type): if not localdb.is_dir(): return None - if search_type == "id": + # Handle the specific search type 'libs_id' + if search_type == 'libs_id': libc_list = list(localdb.rglob("%s.so" % hex_encoded_id)) if len(libc_list) == 0: return None @@ -593,7 +594,7 @@ def _handle_multiple_matching_libcs(matching_libcs): selected_index = options("Select the libc version to use:", [libc['id'] for libc in matching_libcs]) return matching_libcs[selected_index] -def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, search_type="id"): +def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as_list=False, offline_only=False, search_type='build_id'): """ Lookup possible matching libc versions based on leaked function addresses. @@ -694,7 +695,7 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as selected_libc = _handle_multiple_matching_libcs(matching_list) return search_by_hash(selected_libc[match_type], search_type=search_type, unstrip=unstrip, offline_only=offline_only) -def search_by_id(libs_id, unstrip=True, offline_only=False): +def search_by_libs_id(libs_id, unstrip=True, offline_only=False): """ Given a hex-encoded Build ID, attempt to download a matching libc from libcdb. @@ -718,7 +719,7 @@ def search_by_id(libs_id, unstrip=True, offline_only=False): >>> hex(ELF(filename).symbols.read) '0xeef40' """ - return search_by_hash(libs_id, 'id', unstrip, offline_only) + return search_by_hash(libs_id, 'libs_id', unstrip, offline_only) def search_by_build_id(hex_encoded_id, unstrip=True, offline_only=False): """ @@ -863,11 +864,15 @@ def _pack_libs_info(path, libs_id, libs_url, syms): info["download_url"] = "" for search_type, hash_func in TYPES.items(): - if search_type == "id": + # pass libs_id + if search_type == 'libs_id': continue # replace 'build_id' to 'buildid' - info[search_type.replace("_", "")] = hash_func(path) + if search_type == 'build_id': + search_type = search_type.replace("_", "") + + info[search_type] = hash_func(path) default_symbol_list = [ "__libc_start_main_ret", "dup2", "printf", "puts", "read", "system", "str_bin_sh" From 541a83151890099fa2646555f475cbd61efd9c88 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Thu, 15 Aug 2024 18:24:37 +0800 Subject: [PATCH 08/14] Rename part `hex_encoded_id` to `search_target` --- pwnlib/libcdb.py | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index d008ca2ef..db50095ef 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -90,16 +90,16 @@ def query_libc_rip(params): return None # https://libc.rip/ -def provider_libc_rip(hex_encoded_id, search_type): +def provider_libc_rip(search_target, search_type): # Build the request for the hash type # https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml if search_type == 'build_id': search_type = 'buildid' - params = {search_type: hex_encoded_id} + params = {search_type: search_target} libc_match = query_libc_rip(params) if not libc_match: - log.warn_once("Could not find libc info for %s %s on libc.rip", search_type, hex_encoded_id) + log.warn_once("Could not find libc info for %s %s on libc.rip", search_type, search_target) return None if len(libc_match) > 1: @@ -111,7 +111,7 @@ def provider_libc_rip(hex_encoded_id, search_type): data = wget(url, timeout=20) if not data: - log.warn_once("Could not fetch libc binary for %s %s from libc.rip", search_type, hex_encoded_id) + log.warn_once("Could not fetch libc binary for %s %s from libc.rip", search_type, search_target) return None return data @@ -132,7 +132,7 @@ def provider_local_system(hex_encoded_id, search_type): return None # Offline search https://github.com/niklasb/libc-database for hash type -def provider_local_database(hex_encoded_id, search_type): +def provider_local_database(search_target, search_type): if not context.local_libcdb: return None @@ -142,14 +142,14 @@ def provider_local_database(hex_encoded_id, search_type): # Handle the specific search type 'libs_id' if search_type == 'libs_id': - libc_list = list(localdb.rglob("%s.so" % hex_encoded_id)) + libc_list = list(localdb.rglob("%s.so" % search_target)) if len(libc_list) == 0: return None return read(libc_list[0]) - log.debug("Searching local libc database, %s: %s", search_type, hex_encoded_id) + log.debug("Searching local libc database, %s: %s", search_type, search_target) for libc_path in localdb.rglob("*.so"): - if hex_encoded_id == TYPES[search_type](libc_path): + if search_target == TYPES[search_type](libc_path): return read(libc_path) return None @@ -196,11 +196,28 @@ def query_local_database(params): "online": [provider_libcdb, provider_libc_rip] } -def search_by_hash(hex_encoded_id, search_type='build_id', unstrip=True, offline_only=False): +def search_by_hash(search_target, search_type='build_id', unstrip=True, offline_only=False): + """search_by_hash(str, str, bool, bool) -> bytes + Arguments: + search_target(str): + The identifier used for searching the libc. This could be a hex encoded ID (`hex_encoded_id`), + a library name (`libs_id`). Depending on `search_type`, this can represent different types of + encoded values or names. + search_type(str): + The type of the search to be performed, it shoule be one of the keys in the `TYPES` dictionary. + unstrip(bool): + Try to fetch debug info for the libc and apply it to the downloaded file. + offline_only(bool): + If True, restricts the search to offline providers only (local database). If False, it will also + search online providers. Default is False. + + Returns: + The path to the cached directory containing the downloaded libraries. + """ assert search_type in TYPES, search_type # Ensure that the libcdb cache directory exists - cache, cache_valid = _check_elf_cache('libcdb', hex_encoded_id, search_type) + cache, cache_valid = _check_elf_cache('libcdb', search_target, search_type) if cache_valid: return cache @@ -214,12 +231,12 @@ def search_by_hash(hex_encoded_id, search_type='build_id', unstrip=True, offline # Run through all available libc database providers to see if we have a match. for provider in providers: - data = provider(hex_encoded_id, search_type) + data = provider(search_target, search_type) if data and data.startswith(b'\x7FELF'): break if not data: - log.warn_once("Could not find libc for %s %s anywhere", search_type, hex_encoded_id) + log.warn_once("Could not find libc for %s %s anywhere", search_type, search_target) # Save whatever we got to the cache write(cache, data or b'') @@ -268,7 +285,7 @@ def _search_debuginfo_by_hash(base_url, hex_encoded_id): return cache -def _check_elf_cache(cache_type, hex_encoded_id, search_type): +def _check_elf_cache(cache_type, search_target, search_type): """ Check if there already is an ELF file for this hash in the cache. @@ -288,7 +305,7 @@ def _check_elf_cache(cache_type, hex_encoded_id, search_type): # If we already downloaded the file, and it looks even passingly like # a valid ELF file, return it. - cache = os.path.join(cache_dir, hex_encoded_id) + cache = os.path.join(cache_dir, search_target) if not os.path.exists(cache): return cache, False @@ -300,7 +317,7 @@ def _check_elf_cache(cache_type, hex_encoded_id, search_type): # Retry failed lookups after some time if time.time() > os.path.getmtime(cache) + NEGATIVE_CACHE_EXPIRY: return cache, False - log.info_once("Skipping invalid cached ELF %s", hex_encoded_id) + log.info_once("Skipping invalid cached ELF %s", search_target) return None, False log.info_once("Using cached data from %r", cache) From 0f7d4ae4af3bf73785bd3a878044bb5346c40592 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Thu, 15 Aug 2024 21:44:59 +0800 Subject: [PATCH 09/14] Turbofast extract build id --- pwnlib/libcdb.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index db50095ef..bb71e9d68 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -8,6 +8,7 @@ import time import six import tempfile +import struct from pwnlib.context import context from pwnlib.elf import ELF @@ -23,9 +24,35 @@ log = getLogger(__name__) + +def _turbofast_extract_build_id(path): + """ + Elf_External_Note: + + 0x00 +--------+ + | namesz | <- Size of entry's owner string + 0x04 +--------+ + | descsz | <- Size of the note descriptor + 0x08 +--------+ + | type | <- Interpretation of the descriptor + 0x0c +--------+ + | name | <- Start of the name+desc data + ... +-------- + | desc | + ... +--------+ + """ + data = read(path, 0x1000) + # search NT_GNU_BUILD_ID and b"GNU\x00" + idx = data.find(bytes.fromhex("03000000 474e5500")) + if idx == -1: + return enhex(b'') + descsz, = struct.unpack(" bytes Arguments: search_target(str): - The identifier used for searching the libc. This could be a hex encoded ID (`hex_encoded_id`), + The identifier used for searching the libc. This could be a hex encoded ID (`hex_encoded_id`) or a library name (`libs_id`). Depending on `search_type`, this can represent different types of encoded values or names. search_type(str): From 788d82d59e8de4e6b4b9ddd2e9d73ebcb98d1a03 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Thu, 15 Aug 2024 22:19:00 +0800 Subject: [PATCH 10/14] Fix docs --- pwnlib/libcdb.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index bb71e9d68..9f484e899 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -42,7 +42,7 @@ def _turbofast_extract_build_id(path): ... +--------+ """ data = read(path, 0x1000) - # search NT_GNU_BUILD_ID and b"GNU\x00" + # search NT_GNU_BUILD_ID and b"GNU\x00" (type+name) idx = data.find(bytes.fromhex("03000000 474e5500")) if idx == -1: return enhex(b'') @@ -224,14 +224,14 @@ def query_local_database(params): } def search_by_hash(search_target, search_type='build_id', unstrip=True, offline_only=False): - """search_by_hash(str, str, bool, bool) -> bytes + """search_by_hash(str, str, bool, bool) -> str Arguments: search_target(str): - The identifier used for searching the libc. This could be a hex encoded ID (`hex_encoded_id`) or - a library name (`libs_id`). Depending on `search_type`, this can represent different types of - encoded values or names. + Use for searching the libc. This could be a hex encoded ID (`hex_encoded_id`) or a library + name (`libs_id`). Depending on `search_type`, this can represent different types of encoded + values or names. search_type(str): - The type of the search to be performed, it shoule be one of the keys in the `TYPES` dictionary. + The type of the search to be performed, it should be one of the keys in the `TYPES` dictionary. unstrip(bool): Try to fetch debug info for the libc and apply it to the downloaded file. offline_only(bool): From 335b3b9ebb0bf4a33085a59a0583986967f4394c Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Fri, 16 Aug 2024 17:10:26 +0800 Subject: [PATCH 11/14] Add a map for types key --- pwnlib/libcdb.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index 9f484e899..fb146e6be 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -57,6 +57,13 @@ def _turbofast_extract_build_id(path): 'sha256': sha256filehex, 'md5': md5filehex, } + +# mapping for search result +MAP_TYPES = { + 'libs_id': 'id', + 'build_id': 'buildid' +} + DEBUGINFOD_SERVERS = [ 'https://debuginfod.elfutils.org/', ] @@ -120,8 +127,9 @@ def query_libc_rip(params): def provider_libc_rip(search_target, search_type): # Build the request for the hash type # https://github.com/niklasb/libc-database/blob/master/searchengine/api.yml - if search_type == 'build_id': - search_type = 'buildid' + if search_type in MAP_TYPES.keys(): + search_type = MAP_TYPES[search_type] + params = {search_type: search_target} libc_match = query_libc_rip(params) @@ -720,8 +728,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as if return_as_list: return [libc['buildid'] for libc in matching_list] - # replace 'build_id' to 'buildid' - match_type = search_type.replace("_", "") + if search_type in MAP_TYPES.keys(): + match_type = MAP_TYPES[search_type] # If there's only one match, return it directly if len(matching_list) == 1: @@ -912,9 +920,9 @@ def _pack_libs_info(path, libs_id, libs_url, syms): if search_type == 'libs_id': continue - # replace 'build_id' to 'buildid' - if search_type == 'build_id': - search_type = search_type.replace("_", "") + # replace search_type + if search_type in MAP_TYPES.keys(): + search_type = MAP_TYPES[search_type] info[search_type] = hash_func(path) From 13f2753947a5331636f588425870ce7d5155b704 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Fri, 16 Aug 2024 17:12:49 +0800 Subject: [PATCH 12/14] Extract proper buildid --- pwnlib/libcdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index fb146e6be..b5ced486f 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -45,7 +45,7 @@ def _turbofast_extract_build_id(path): # search NT_GNU_BUILD_ID and b"GNU\x00" (type+name) idx = data.find(bytes.fromhex("03000000 474e5500")) if idx == -1: - return enhex(b'') + return enhex(ELF(path, checksec=False).buildid or b'') descsz, = struct.unpack(" Date: Fri, 16 Aug 2024 17:16:43 +0800 Subject: [PATCH 13/14] Fix docs --- pwnlib/libcdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index b5ced486f..2324a2718 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -58,7 +58,7 @@ def _turbofast_extract_build_id(path): 'md5': md5filehex, } -# mapping for search result +# mapping for search result (same as libc.rip) MAP_TYPES = { 'libs_id': 'id', 'build_id': 'buildid' From 263f67e6b9cbd7ea54bcbf1e4d86036303cd7ad5 Mon Sep 17 00:00:00 2001 From: Th3S <46804083+the-soloist@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:20:01 +0800 Subject: [PATCH 14/14] Fix E0606 --- pwnlib/libcdb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pwnlib/libcdb.py b/pwnlib/libcdb.py index 2324a2718..7833b2be1 100644 --- a/pwnlib/libcdb.py +++ b/pwnlib/libcdb.py @@ -730,6 +730,8 @@ def search_by_symbol_offsets(symbols, select_index=None, unstrip=True, return_as if search_type in MAP_TYPES.keys(): match_type = MAP_TYPES[search_type] + else: + match_type = search_type # If there's only one match, return it directly if len(matching_list) == 1: