Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GitHub] Authorization by GitHub repository access #650

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 88 additions & 13 deletions oauthenticator/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from jupyterhub.auth import LocalAuthenticator
from requests.utils import parse_header_links
from tornado.httpclient import HTTPClientError
from traitlets import Bool, Set, Unicode, default

from .oauth2 import OAuthenticator
Expand Down Expand Up @@ -114,6 +115,16 @@ def _userdata_url_default(self):
""",
)

allowed_repositories = Set(
config=True,
help="""
Allow users with read access to specified repositories by specifying
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to think about future maintainability. Could you say a bit about your design decisions to make this conditional on read access as opposed to e.g. write access, making it configurable etc?

Copy link
Author

@koendelaat koendelaat Jul 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I thought about it and had the following observations:

  • The used app can be either a GitHub OAuth app, or a 'normal' GitHub App
    • When using a OAuth app, repo access is completely depending on the users permissions
    • When using a GitHub App, repo access is an intersection of the apps permissions and the users permissions
  • Read access to a repo is the minimal, and is also relatively easy to verify.
  • Write access is harder to verify

Our use-case is an internal (private) repo with example code. We would like to co-create with outside collaborators and for that we give them read-access to this example repository. Also we would give them a JupyterHub environment where they can use these examples. That is why this change is proposed.

repositories like `org-a/repo-1`.

Requires `repo` to be set in `scope`.
""",
)

populate_teams_in_auth_state = Bool(
False,
config=True,
Expand Down Expand Up @@ -184,6 +195,17 @@ async def check_allowed(self, username, auth_model):
message = f"User {username} is not part of allowed_organizations"
self.log.warning(message)

if self.allowed_repositories:
access_token = auth_model["auth_state"]["token_response"]["access_token"]
token_type = auth_model["auth_state"]["token_response"]["token_type"]
for repo in self.allowed_repositories:
if await self._check_membership_allowed_repository(
repo, username, access_token, token_type
):
return True
message = f"User {username} does not have access to allowed_repositories"
self.log.warning(message)

# users should be explicitly allowed via config, otherwise they aren't
return False

Expand All @@ -205,23 +227,38 @@ async def update_auth_model(self, auth_model):
# - about /user/emails: https://docs.github.com/en/rest/reference/users#list-email-addresses-for-the-authenticated-user
#
# Note that the read:user scope does not imply the user:emails scope!
# Note that the retrieved scopes will be empty for GitHub Apps https://docs.github.com/en/developers/apps
access_token = auth_model["auth_state"]["token_response"]["access_token"]
token_type = auth_model["auth_state"]["token_response"]["token_type"]
granted_scopes = auth_model["auth_state"].get("scope", [])
granted_scopes = [
scope for scope in auth_model["auth_state"].get("scope", []) if scope
]
if not user_info["email"] and (
"user" in granted_scopes or "user:email" in granted_scopes
"user" in granted_scopes
or "user:email" in granted_scopes
or not granted_scopes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain this change? If this is called with empty scopes won't it fail to get the user's emails?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The retrieved scopes will be empty for GitHub Apps https://docs.github.com/en/developers/apps.
So to facilitate this behavior, empty scopes are also allowed.
This is also why we need a change in error handling of the email fetch. This might cause an error, but we don't know upfront what is causing this.

):
resp_json = await self.httpfetch(
f"{self.github_api}/user/emails",
"fetching user emails",
method="GET",
headers=self.build_userdata_request_headers(access_token, token_type),
validate_cert=self.validate_server_cert,
)
for val in resp_json:
if val["primary"]:
user_info["email"] = val["email"]
break
try:
resp_json = await self.httpfetch(
f"{self.github_api}/user/emails",
"fetching user emails",
method="GET",
headers=self.build_userdata_request_headers(
access_token, token_type
),
validate_cert=self.validate_server_cert,
)
for val in resp_json:
if val["primary"]:
user_info["email"] = val["email"]
break
except HTTPClientError as e:
if e.code == 403 and not granted_scopes:
# This means granted_scopes is empty (GitHub App) but we don't have permission to
# read users email
pass
else:
raise e

if self.populate_teams_in_auth_state:
if "read:org" not in self.scope:
Expand Down Expand Up @@ -332,6 +369,44 @@ async def _check_membership_allowed_organizations(
)
return False

async def _check_membership_allowed_repository(
self, owner_repo, username, access_token, token_type
):
"""
Checks if a user is allowed to read the repo via
GitHub's REST API. The `repo` scope is required to check access.

The `owner_repo` parameter accepts values like `OWNER/REPO`.
"""
headers = self.build_userdata_request_headers(access_token, token_type)

# https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#get-a-repository
owner, repo = owner_repo.split("/")
api_url = f"{self.github_api}/repos/{owner}/{repo}"

self.log.debug(f"Checking GitHub repo access: {username} for {owner}/{repo}?")
resp = await self.httpfetch(
api_url,
parse_json=False,
raise_error=False,
method="GET",
headers=headers,
validate_cert=self.validate_server_cert,
)
if resp.code == 200:
self.log.debug(f"Allowing {username} as access of {owner}/{repo}")
return True
else:
try:
resp_json = json.loads((resp.body or b'').decode('utf8', 'replace'))
message = resp_json.get('message', '')
except ValueError:
message = ''
self.log.debug(
f"{username} does not appear to have access to {owner}/{repo} (status={resp.code}): {message}",
)
return False


class LocalGitHubOAuthenticator(LocalAuthenticator, GitHubOAuthenticator):
"""A version that mixes in local system user creation"""
33 changes: 20 additions & 13 deletions oauthenticator/tests/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,19 @@ def fetch_impl(self, request, response_callback):
response_callback(response)


def extract_token(request):
auth_header = request.headers.get('Authorization')
if auth_header:
token = auth_header.split(None, 1)[1]
else:
query = parse_qs(urlparse(request.url).query)
if 'access_token' in query:
token = query['access_token'][0]
else:
token = None
return token


def setup_oauth_mock(
client,
host,
Expand Down Expand Up @@ -170,19 +183,13 @@ def access_token(request):

def get_user(request):
assert request.method == 'GET', request.method
auth_header = request.headers.get('Authorization')
if auth_header:
token = auth_header.split(None, 1)[1]
else:
query = parse_qs(urlparse(request.url).query)
if 'access_token' in query:
token = query['access_token'][0]
else:
return HTTPResponse(
request=request,
code=403,
reason='Missing Authorization header',
)
token = extract_token(request)
if token is None:
return HTTPResponse(
request=request,
code=403,
reason='Missing Authorization header',
)
if token not in access_tokens:
return HTTPResponse(
request=request,
Expand Down
62 changes: 61 additions & 1 deletion oauthenticator/tests/test_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from traitlets.config import Config

from ..github import GitHubOAuthenticator
from .mocks import setup_oauth_mock
from .mocks import extract_token, setup_oauth_mock


def user_model(username):
Expand Down Expand Up @@ -117,6 +117,7 @@ async def test_allowed_org_membership(github_client):
"team1": ["user1"],
},
}
allowed_repo_members = {"org1": {"repo1": ["user2"]}}

member_regex = re.compile(r'/orgs/(.*)/members')

Expand Down Expand Up @@ -192,17 +193,43 @@ def team_membership(request):
return HTTPResponse(request, 404)
return HTTPResponse(request, 204)

repo_membership_regex = re.compile(r'/repos/(.*)/(.*)')

def repo_membership(request):
token = extract_token(request)
user = github_client.access_tokens.get(token)
urlinfo = urlparse(request.url)
urlmatch = repo_membership_regex.match(urlinfo.path)
owner = urlmatch.group(1)
repo = urlmatch.group(2)
username = user.get('login')
print(f"Request owner = {owner}, repo = {repo} username = {username}")
if owner not in allowed_repo_members:
print(f"Owner not found: owner = {owner}")
return HTTPResponse(request, 404)
if repo not in allowed_repo_members[owner]:
print(f"Repo not found in owner: repo = {repo}, owner = {owner}")
return HTTPResponse(request, 404)
if username not in allowed_repo_members[owner][repo]:
print(
f"Member not found: owner = {owner}, repo = {repo}, username = {username}"
)
return HTTPResponse(request, 404)
return HTTPResponse(request, 200)

## Perform tests

client_hosts = github_client.hosts['api.github.com']
client_hosts.append((team_membership_regex, team_membership))
client_hosts.append((org_membership_regex, org_membership))
client_hosts.append((repo_membership_regex, repo_membership))

# Run tests twice, once with paginate and once without
for paginate in (False, True):
client_hosts.append((member_regex, functools.partial(org_members, paginate)))

# test org membership
authenticator.allowed_repositories = []
authenticator.allowed_organizations = ["org1"]

handled_user_model = user_model("user1")
Expand All @@ -228,6 +255,39 @@ def team_membership(request):
auth_model = await authenticator.get_authenticated_user(handler, None)
assert auth_model is None

# test repo membership
authenticator.allowed_organizations = []
authenticator.allowed_repositories = ["org1/repo1"]

handled_user_model = user_model("user2")
handler = github_client.handler_for_user(handled_user_model)
auth_model = await authenticator.get_authenticated_user(handler, None)
assert auth_model

handled_user_model = user_model("user-not-in-repo")
handler = github_client.handler_for_user(handled_user_model)
auth_model = await authenticator.get_authenticated_user(handler, None)
assert auth_model is None

# test repo membership
authenticator.allowed_organizations = ["org1:team1"]
authenticator.allowed_repositories = ["org1/repo1"]

handled_user_model = user_model("user1")
handler = github_client.handler_for_user(handled_user_model)
auth_model = await authenticator.get_authenticated_user(handler, None)
assert auth_model

handled_user_model = user_model("user2")
handler = github_client.handler_for_user(handled_user_model)
auth_model = await authenticator.get_authenticated_user(handler, None)
assert auth_model

handled_user_model = user_model("user-not-in-repo")
handler = github_client.handler_for_user(handled_user_model)
auth_model = await authenticator.get_authenticated_user(handler, None)
assert auth_model is None

client_hosts.pop()


Expand Down