diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..097f9f9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,9 @@ +# +# https://help.github.com/articles/dealing-with-line-endings/ +# +# Linux start script should use lf +/gradlew text eol=lf + +# These are Windows script files and should use crlf +*.bat text eol=crlf + diff --git a/.github/workflows/build-and-publish.yaml b/.github/workflows/build-and-publish.yaml new file mode 100644 index 0000000..d3909cb --- /dev/null +++ b/.github/workflows/build-and-publish.yaml @@ -0,0 +1,50 @@ +name: Build and publish artifacts +on: + push: + branches: + # Run on all branches (but not tags); branch builds publish snapshots + - '*' +jobs: + build-and-publish: + runs-on: ubuntu-22.04 # LTS EoL Apr 2025 + + env: + ORG_GRADLE_PROJECT_signingKey: ${{ secrets.SONATYPE_PGP_KEY }} + ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.SONATYPE_PGP_PASSWORD }} + ORG_GRADLE_PROJECT_sonatypeUsername: ${{ secrets.SONATYPE_PUBLISH_USERNAME }} + ORG_GRADLE_PROJECT_sonatypePassword: ${{ secrets.SONATYPE_PUBLISH_PASSWORD }} + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-java@v3 + with: + java-version: '8.x' + java-package: jdk + architecture: x64 + distribution: temurin + + - uses: actions/cache@v3 + with: + path: ~/.gradle + key: ${{ runner.OS }}-gradle-${{ hashFiles('**/build.gradle') }} + restore-keys: | + ${{ runner.OS }}-gradle- + ${{ runner.OS }} + + - name: Install Dependencies + run: | + ./gradlew dependencies + + - name: Show Versions + run: | + echo "java: $(java -version)" + + - name: Extract branch name + shell: bash + run: echo "BRANCH=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_ENV + + - name: Build and test + run: ./gradlew test --console plain + + - name: Publish artifacts + run: ./gradlew publish -x test -Pbranch="$BRANCH" diff --git a/.gitignore b/.gitignore index a5b1113..6ceb3c3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,23 @@ **/build/ !src/**/build/ +# Temp files +**/*.tmp +**/*.swp + +# Merge conflict resolution backup files +**/*.orig + +# Eclipse +.classpath +.project +.settings/ + +# IDEA +.idea +.run +*.iml + # Ignore Gradle GUI config gradle-app.setting @@ -19,3 +36,6 @@ gradle-app.setting .project # JDT-specific (Eclipse Java Development Tools) .classpath + +# Ignore Gradle build output directory +build diff --git a/README.md b/README.md index f0e59fe..9cd178d 100644 --- a/README.md +++ b/README.md @@ -1 +1,313 @@ -# pac-interpreter \ No newline at end of file + + +# pac-interpreter + +A [Proxy Auto-Configuration](https://developer.mozilla.org/en-US/docs/Web/HTTP/Proxy_servers_and_tunneling/Proxy_Auto-Configuration_PAC_file) +(PAC) script interpreter for Java with minimal dependencies. + +## Installation + +### Gradle + +``` +implementation group: 'com.mabl', name: 'pac-interpreter', version: '1.+' +``` + +### Maven + +``` + + com.mabl + pac-interpreter + [1.0,2.0) + +``` + +## Quickstart + +Please see the full documentation below this example for more information. + +``` +import com.mabl.net.proxy.FindProxyDirective; +import com.mabl.net.proxy.FindProxyResult; +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.ReloadablePacInterpreter; +import java.net.URL; +import java.time.Duration; +... + +// Initialize the interpreter: +PacInterpreter interpreter = ReloadablePacInterpreter.forUrl(new URL("https://example.com/proxy.pac")); + +// Reload the PAC from https://example.com/proxy.pac every 5 minutes +interpreter.start(Duration.ofMinutes(5)); + +// Find the proxy that should be used for the URL https://www.example.org/foo : +FindProxyResult result = interpreter.findProxyForUrl("https://www.example.com/foo"); +FindProxyDirective firstDirective = result.first(); + +// Connect as described by the directive: +switch (firstDirective.connectionType()) { + case DIRECT: + System.out.println("Connecting without proxy"); + break; + case HTTP: + case HTTPS: + case PROXY: + System.out.println("Connecting via HTTP(S) proxy " + firstDirective.proxyHostAndPort()); + break; + case SOCKS: + case SOCKS4: + case SOCKS5: + System.out.println("Connecting via SOCKS proxy " + firstDirective.proxyHostAndPort()); + break; +} + +... + +interpreter.stop(); // Stop automatic PAC reloads +``` + +## Usage + +This package provides two PAC interpreter implemenations, both of which implement +the [PacInterpreter](/blob/main/src/main/java/com/mabl/net/proxy/PacInterpreter.java) interface: + +1. [SimplePacInterpreter](/blob/main/src/main/java/com/mabl/net/proxy/SimplePacInterpreter.java) +1. [AutoReloadingPacInterpreter](/blob/main/src/main/java/com/mabl/net/proxy/AutoReloadingPacInterpreter.java) + +### `SimplePacInterpreter` + +The [SimplePacInterpreter](/blob/main/src/main/java/com/mabl/net/proxy/SimplePacInterpreter.java) implementation loads a +specified PAC script once and provides a method to execute the PAC's `FindProxyForURL` function as many times as needed. + +The PAC script can be loaded into the interpreter in several ways: + +#### Passed directly as a `String` + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.SimplePacInterpreter; +... +String script = "function FindProxyForURL(url, host) { return \"DIRECT\"; }"; +PacInterpreter interpreter = SimplePacInterpreter.forScript(script); +``` + +#### Loaded from a `File` + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.SimplePacInterpreter; +import java.io.File; +... +PacInterpreter interpreter = SimplePacInterpreter.forFile(new File("/path/to/proxy.pac")); +``` + +#### Loaded from a URL + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.SimplePacInterpreter; +import java.net.URL; +... +PacInterpreter interpreter = SimplePacInterpreter.forUrl(new URL("https://example.com/proxy.pac")); +``` + +### `ReloadablePacInterpreter` + +The [ReloadablePacInterpreter](/blob/main/src/main/java/com/mabl/net/proxy/ReloadablePacInterpreter.java) implementation +is similar to `SimplePacInterpreter` except that it allows the PAC to be reloaded, either explicitly by executing +the `reload()` method or automatically in the background at a specified period using the `start(Duration)` method. To +stop automatic reloading, execute the `stop()` method. + +The `ReloadablePacInterpreter` is initialized in a similar way to `SimplePacInterpreter`, with various options for +loading the PAC script: + +#### Passed directly as a `String` + +Passing the script directly to `ReloadablePacInterpreter` is slightly different than +with `SimplePacInterpreter`. `ReloadablePacInterpreter` takes a `Supplier` rather than a `String` in order to +allow the underlying script to change between reloads: + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.ReloadablePacInterpreter; +... +private String loadScript() { + // TODO Get the script from somewhere + return "function FindProxyForURL(url, host) { return \"DIRECT\"; }"; +} +PacInterpreter interpreter = ReloadablePacInterpreter.forScript(this::loadScript); +``` + +#### Loaded from a `File` + +Each time the interpreter is reloaded, the file will be re-read. + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.ReloadablePacInterpreter; +import java.io.File; +... +PacInterpreter interpreter = ReloadablePacInterpreter.forFile(new File("/path/to/proxy.pac")); +``` + +#### Loaded from a URL + +Each time the interpreter is reloaded the PAC script will be re-fetched from the specified URL. + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.ReloadablePacInterpreter; +import java.net.URL; +... +PacInterpreter interpreter = ReloadablePacInterpreter.forUrl(new URL("https://example.com/proxy.pac")); +``` + +#### Triggering a reload manually + +To reload the PAC from the underlying source manually, call the `reload()` method: + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.ReloadablePacInterpreter; +import java.net.URL; +... +PacInterpreter interpreter = ReloadablePacInterpreter.forUrl(new URL("https://example.com/proxy.pac")); +interpreter.reload(); +``` + +#### Automatic reloads + +To start automatic reloads, use the `start(Duration)` method. To stop, call `stop()`: + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.ReloadablePacInterpreter; +import java.net.URL; +import java.time.Duration; +... +PacInterpreter interpreter = ReloadablePacInterpreter.forUrl(new URL("https://example.com/proxy.pac")); +interpreter.start(Duration.ofMinutes(5)); +... +interpreter.stop(); +``` + +### Using the interpreter to select a proxy + +Once you have chosen an interpreter implementation and successfully initialized it, you can use that interpreter to +invoke the PAC script's `FindProxyForURL` function. + +The easiest way to do this is to call the interpreter's `findProxyForUrl(String url)` method: + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.FindProxyResult; +... +PacInterpreter interpreter = initializeInterpreter(); +FindProxyResult result = interpreter.findProxyForUrl("https://www.example.com"); +``` + +The PAC script's `FindProxyForURL` actually takes two arguments, the full URL and the +host ([defined](https://developer.mozilla.org/en-US/docs/Web/HTTP/Proxy_servers_and_tunneling/Proxy_Auto-Configuration_PAC_file#parameters) +as "the string between `://` and the first `:` or `/` after that"). The `PacInterpreter` will automatically parse the +host from the URL for you, but if you prefer to pass a custom value for `host`, you can call the overloaded version of +this method that takes two parameters: + +``` +import com.mabl.net.proxy.PacInterpreter; +import com.mabl.net.proxy.FindProxyResult; +... +PacInterpreter interpreter = initializeInterpreter(); +FindProxyResult result = interpreter.findProxyForUrl("https://www.example.com", "www.example.com"); +``` + +### The `FindProxyResult` + +The PAC script's `FindProxyForURL` function returns a string which might contain multiple proxy directives separated +by `;`. For example: + +``` +"PROXY 4.5.6.7:8080; PROXY 7.8.9.10:8080; DIRECT" +``` + +To make this output easier to use, the interpreter automatically parses it and returns it as an instance +of `FindProxyResult`. The `FindProxyResult` class includes several methods for exploring these directives: + +#### `size()` + +Returns the number of proxy directives. + +#### `all()` + +Returns a `List` containing all directives that were parsed from the result in the order in which +the `FindProxyForURL` function returned them. + +#### `first()` + +Returns the first `ProxyDirective`. + +#### `random()` + +Returns a random `ProxyDirective`. + +#### `get(int index)` + +Returns the `ProxyDirective` at the specified (zero-based) index. + +#### Iterating + +`FindProxyResult` is also an `Iterable`: + +``` +for (FindProxyDirective directive : findProxyResult) { + ... +} +``` + +### The `FindProxyDirective` + +`FindProxyDirective` allows you to obtain the connection type and the `host:port` of the proxy (if any). + +The connection type is returned by the `connectionType()` method and is represented as an `enum` with one of the +following values: + +* `DIRECT` +* `HTTP` +* `HTTPS` +* `PROXY` +* `SOCKS` +* `SOCKS4` +* `SOCKS5` + +The hostname and port of the proxy (if any) can be obtained by calling the `proxyHostAndPort()` method. The host and +port will be returned as a `String` with the form `host:port`. + +_Note: `proxyHostAndPort()` will return `null` if the connection type is `DIRECT`._ + +### GraalVM optimization + +This PAC interpreter uses [GraalVM](https://www.graalvm.org/latest/reference-manual/polyglot-programming/) to execute +the JavaScript-based PAC scripts. To maximize GraalVM performance it is necessary to add certain arguments when starting +the JVM. See GraalVM's documentation +on [Running GraalVM JavaScript on a Stock JDK](https://www.graalvm.org/latest/reference-manual/js/RunOnJDK/) for more +information. + +## Building + +Use the `gradlew` script to build locally: + +``` +./gradlew build +``` + +## Contributing + +Please feel free to file [issues](/issues) and submit [pull requests](/pulls) if you would like to contribute to this +project. + +## License + +This code is released under the [GNU Lesser General Public License v2.1](/blob/main/LICENSE), mainly because +the [PAC utility functions](/blob/main/src/main/resources/pacUtils.js) are derived from the original Mozilla +implementation which was released under that license. diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..54426f0 --- /dev/null +++ b/build.gradle @@ -0,0 +1,118 @@ +plugins { + id 'java-library' + id 'maven-publish' + id 'signing' + id 'eclipse' + id 'idea' +} + +repositories { + mavenCentral() +} + +ext { + baseVersion = '1.0.0' + + getBranch = { + return project.findProperty('branch') ?: '' + } + + determineProjectVersion = { baseVersion, branch -> + projectVersion = baseVersion + if (!branch.equals("main")) { + if (!branch.isEmpty()) { + projectVersion = "${projectVersion}-${branch}" + } + projectVersion = "${projectVersion}-SNAPSHOT" + } + return projectVersion + } +} + +group = 'com.mabl' +archivesBaseName = 'pac-interpreter' +version = determineProjectVersion(baseVersion, getBranch()) + +java { + toolchain { + languageVersion = JavaLanguageVersion.of(8) + } +} + +ext { + graalVersion = '21.3.7' +} + +dependencies { + implementation group: 'org.slf4j', name: 'slf4j-api', version: '1.7.36' + + // GraalVM dependencies for interpreting PAC scripts: + implementation group: 'org.graalvm.sdk', name: 'graal-sdk', version: graalVersion + implementation group: 'org.graalvm.truffle', name: 'truffle-api', version: graalVersion + implementation group: 'org.graalvm.js', name: 'js', version: graalVersion + implementation group: 'org.graalvm.regex', name: 'regex', version: graalVersion + + // ICU4J is a GraalVM dependency: + implementation group: 'com.ibm.icu', name: 'icu4j', version: '73.2' + + // Test deps: + testImplementation group: 'junit', name: 'junit', version: '4.+' + testImplementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.3.+' + testImplementation group: 'io.undertow', name: 'undertow-core', version: '2.2.+' +} + +java { + withJavadocJar() + withSourcesJar() +} + +ext { + releasesUrl = "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/" + snapshotsUrl = "https://s01.oss.sonatype.org/content/repositories/snapshots/" +} +publishing { + repositories { + maven { + name = 'sonatype' + credentials(PasswordCredentials) + url = version.endsWith('SNAPSHOT') ? snapshotsUrl : releasesUrl + } + } + publications { + mavenJava(MavenPublication) { + from components.java + // The pom properties below are all required for publishing to Maven Central: + pom { + name = 'PAC Interpreter' + description = 'Interpreter for Proxy Auto-Configuration scripts' + url = 'https://github.com/mablhq/pac-interpreter' + licenses { + license { + name = 'GNU Lesser General Public License v2.1' + url = 'https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html' + } + } + scm { + connection = 'scm:git:git://github.com/mablhq/pac-interpreter.git' + developerConnection = 'scm:git:ssh://github.com/mablhq/pac-interpreter.git' + url = 'https://github.com/mablhq/pac-interpreter' + } + developers { + developer { + name = 'James Baldassari' + email = 'james@mabl.com' + organization = 'mabl' + organizationUrl = 'https://www.mabl.com' + } + } + } + } + } +} + +signing { + def signingKey = findProperty("signingKey") + def signingPassword = findProperty("signingPassword") + useInMemoryPgpKeys(signingKey, signingPassword) + sign publishing.publications.mavenJava +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..c1962a7 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..37aef8d --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.1.1-bin.zip +networkTimeout=10000 +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..aeb74cb --- /dev/null +++ b/gradlew @@ -0,0 +1,245 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..93e3f59 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..2edb3fe --- /dev/null +++ b/settings.gradle @@ -0,0 +1,6 @@ +plugins { + // Apply the foojay-resolver plugin to allow automatic download of JDKs + id 'org.gradle.toolchains.foojay-resolver-convention' version '0.4.0' +} + +rootProject.name = 'pac-interpreter' diff --git a/src/main/java/com/mabl/io/IoUtils.java b/src/main/java/com/mabl/io/IoUtils.java new file mode 100644 index 0000000..346d599 --- /dev/null +++ b/src/main/java/com/mabl/io/IoUtils.java @@ -0,0 +1,40 @@ +package com.mabl.io; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; + +public class IoUtils { + public static String readClasspathFileToString(final String path) throws IOException { + try (final InputStream resourceIn = IoUtils.class.getResourceAsStream(path)) { + return readInputStreamToString(resourceIn); + } + } + + public static String readUrlToString(final URL pacUrl) throws IOException { + try (final InputStream urlIn = pacUrl.openConnection().getInputStream()) { + return readInputStreamToString(urlIn); + } + } + + public static String readFileToString(final File file) throws IOException { + try (final InputStream fileIn = new FileInputStream(file)) { + return readInputStreamToString(fileIn); + } + } + + public static String readInputStreamToString(final InputStream in) throws IOException { + final StringBuffer buffer = new StringBuffer(); + try (final BufferedReader utilsIn = new BufferedReader(new InputStreamReader(in))) { + String line; + while ((line = utilsIn.readLine()) != null) { + buffer.append(line).append("\n"); + } + } + return buffer.toString(); + } +} diff --git a/src/main/java/com/mabl/net/proxy/ConnectionType.java b/src/main/java/com/mabl/net/proxy/ConnectionType.java new file mode 100644 index 0000000..bd42324 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/ConnectionType.java @@ -0,0 +1,24 @@ +package com.mabl.net.proxy; + +import java.util.Arrays; + +/** + * Represents the connection type returned by the PAC function + * + * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Proxy_servers_and_tunneling/Proxy_Auto-Configuration_PAC_file#return_value_format" + */ +public enum ConnectionType { + DIRECT, + HTTP, + HTTPS, + PROXY, + SOCKS, + SOCKS4, + SOCKS5; + + public static ConnectionType fromValue(final String value) { + return Arrays.stream(values()).filter(ct -> ct.name().equalsIgnoreCase(value)).findAny().orElseThrow(() -> + new IllegalArgumentException(String.format("\"%s\" is not a valid %s", value, ConnectionType.class.getSimpleName())) + ); + } +} diff --git a/src/main/java/com/mabl/net/proxy/FindProxyDirective.java b/src/main/java/com/mabl/net/proxy/FindProxyDirective.java new file mode 100644 index 0000000..f29cc79 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/FindProxyDirective.java @@ -0,0 +1,184 @@ +package com.mabl.net.proxy; + +import java.net.InetSocketAddress; +import java.util.Arrays; +import java.util.Objects; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Represents a single proxy directive (e.g. DIRECT, HTTP 10.1.1.1:8080, etc.). + */ +public class FindProxyDirective { + private static final String CONNECTION_TYPES_UNION = Arrays.stream(ConnectionType.values()).map(ConnectionType::name).collect(Collectors.joining("|")); + private static final Pattern RESULT_PATTERN = Pattern.compile(String.format("(%s)(?:\\s+([^\\s;]+))?", CONNECTION_TYPES_UNION, Pattern.CASE_INSENSITIVE)); + private static final String HOST_PORT_DELIMITER = ":"; + private final ConnectionType connectionType; + private final Optional proxyHostAndPort; + + private FindProxyDirective(final ConnectionType connectionType) { + this(connectionType, Optional.empty()); + } + + private FindProxyDirective(final ConnectionType connectionType, final String proxyHostAndPort) { + this(connectionType, Optional.of(proxyHostAndPort)); + } + + private FindProxyDirective(final ConnectionType connectionType, final Optional proxyHostAndPort) { + if (connectionType == null) { + throw new IllegalArgumentException("Connection type must not be null"); + } + if (proxyHostAndPort == null) { + throw new IllegalArgumentException("Proxy must not be null"); + } + if (connectionType != ConnectionType.DIRECT && !proxyHostAndPort.isPresent()) { + throw new IllegalArgumentException(String.format("When connection type is not %s proxy is required", ConnectionType.DIRECT)); + } + this.connectionType = connectionType; + this.proxyHostAndPort = proxyHostAndPort; + } + + /** + * Gets the connection type component of the directive, e.g. SOCKS + * + * @return the connection type for this directive. + */ + public ConnectionType connectionType() { + return connectionType; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FindProxyDirective that = (FindProxyDirective) o; + return connectionType == that.connectionType && Objects.equals(proxyHostAndPort, that.proxyHostAndPort); + } + + @Override + public int hashCode() { + return Objects.hash(connectionType, proxyHostAndPort); + } + + /** + * Tests whether this directive has connection type {@link ConnectionType#DIRECT}. + * + * @return true if this directive has a direct connection type; false otherwise. + */ + public boolean isDirect() { + return connectionType == ConnectionType.DIRECT; + } + + /** + * Tests whether this directive has connection type other than {@link ConnectionType#DIRECT}. + * + * @return true if this directive has a proxy connection type; false if the connection type is direct. + */ + public boolean isProxy() { + return !isDirect(); + } + + /** + * Gets the proxy host component of the directive, e.g. "192.168.1.1" + * + * @return the proxy host for this directive, or null if the connection type is {@link ConnectionType#DIRECT}. + */ + public String proxyHost() { + return Optional.ofNullable(unresolvedProxyAddress()).map(InetSocketAddress::getHostString) + .orElse(null); + } + + /** + * Gets the proxy port component of the directive, e.g. 8080. + * + * @return the proxy port for this directive, or null if the connection type is {@link ConnectionType#DIRECT}. + */ + public Integer proxyPort() { + return Optional.ofNullable(unresolvedProxyAddress()).map(InetSocketAddress::getPort) + .orElse(null); + } + + /** + * Gets the proxy and host component of the directive, e.g. "10.1.1.1:8080" + * + * @return the proxy:host for this directive, or null if the connection type is {@link ConnectionType#DIRECT}. + */ + public String proxyHostAndPort() { + return proxyHostAndPort.orElse(null); + } + + /** + * Get the proxy address associated with this directive. + *

+ * Note that if the proxy host is a hostname, it will be resolved to an IP address by this method. + * To create an unresolved {@link InetSocketAddress}, use {@link #unresolvedProxyAddress()} instead. + * + * @return the proxy address, or null if the connection type is {@link ConnectionType#DIRECT}. + * @see #unresolvedProxyAddress() + */ + public InetSocketAddress resolvedProxyAddress() { + return Optional.ofNullable(unresolvedProxyAddress()) + .map(unresolved -> new InetSocketAddress(unresolved.getHostString(), unresolved.getPort())) + .orElse(null); + } + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(connectionType.name()); + proxyHostAndPort.ifPresent(proxy -> builder.append(" ").append(proxy)); + return builder.toString(); + } + + /** + * Get the proxy address associated with this directive. + *

+ * Note that the {@link InetSocketAddress} returned by this method is created via + * {@link InetSocketAddress#createUnresolved(String, int)}. To create a resolved {@link InetSocketAddress} + * use {@link #resolvedProxyAddress()} instead. + * + * @return the proxy address, or null if the connection type is {@link ConnectionType#DIRECT}. + * @see #resolvedProxyAddress() + */ + public InetSocketAddress unresolvedProxyAddress() { + return proxyHostAndPort.map(hostAndPort -> { + final String[] hostPortParts = hostAndPort.split(HOST_PORT_DELIMITER); + final String host = hostPortParts[0]; + final int port = Integer.parseInt(hostPortParts[1]); + return InetSocketAddress.createUnresolved(host, port); + }).orElse(null); + } + + /** + * Parses a single proxy directive. + * + * @param value the value to parse. + * @return the parsed @{@link FindProxyDirective}. + * @throws PacInterpreterException if the given value cannot be parsed. + */ + public static FindProxyDirective parse(final String value) throws PacInterpreterException { + if (value == null) { + return new FindProxyDirective(ConnectionType.DIRECT); + } + final Matcher matcher = RESULT_PATTERN.matcher(value.trim()); + if (!matcher.matches()) { + throw new PacInterpreterException(String.format("Invalid proxy find result: \"%s\"", value)); + } + + final ConnectionType connectionType; + try { + connectionType = ConnectionType.fromValue(matcher.group(1)); + } catch (IllegalArgumentException e) { + // This shouldn't really happen because the regular expression is built to only accept valid connection types + throw new PacInterpreterException(String.format("Failed to parse connection type from \"%s\"", value), e); + } + + if (connectionType == ConnectionType.DIRECT) { + return new FindProxyDirective(connectionType); + } + + final String proxyHostAndPort = matcher.group(2).trim(); + return new FindProxyDirective(connectionType, proxyHostAndPort); + } +} diff --git a/src/main/java/com/mabl/net/proxy/FindProxyResult.java b/src/main/java/com/mabl/net/proxy/FindProxyResult.java new file mode 100644 index 0000000..2665d68 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/FindProxyResult.java @@ -0,0 +1,138 @@ +package com.mabl.net.proxy; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Random; +import java.util.stream.Collectors; + +/** + * Represents the result from invoking the FindProxyForURL function with a given URL. + */ +public class FindProxyResult implements Iterable { + private static final String PROXY_RESULT_SEPARATOR = ";"; + private static final Random random = new Random(); + private final List directives; + + private FindProxyResult(final List directives) { + if (directives == null) { + throw new IllegalArgumentException("Directives must not be null"); + } + this.directives = Collections.unmodifiableList(directives); + } + + /** + * Gets all proxy directives contained in this result. + * + * @return the list of all directives. + */ + public List all() { + return directives; + } + + /** + * Gets the first proxy directive contained in this result. + * + * @return the first directive. + */ + public FindProxyDirective first() { + return directives.get(0); + } + + /** + * Finds the first proxy directive in this result with a connection type other than {@link ConnectionType#DIRECT}. + * + * @return the first directive with a non-direct connection type, if any. + */ + public Optional firstProxy() { + return directives.stream() + .filter(directive -> directive.connectionType() != ConnectionType.DIRECT) + .findFirst(); + } + + /** + * Gets the proxy directive with the given index. + * + * @param index the index of the directive to retrieve (valid values: [0, size() - 1]) + * @return the directive at the given index. + */ + public FindProxyDirective get(final int index) { + return directives.get(index); + } + + @Override + public Iterator iterator() { + return directives.iterator(); + } + + /** + * Creates a normalized copy of this {@link FindProxyResult} by removing all non-unique proxy directives + * while maintaining the original relative ordering of the directives. + * + * @return a normalized copy of this result. + */ + public FindProxyResult normalize() { + return new FindProxyResult(new ArrayList<>(new LinkedHashSet<>(directives))); + } + + /** + * Gets a random proxy directive from this result. + * + * @return a random directive. + */ + public FindProxyDirective random() { + return get(random.nextInt(size())); + } + + /** + * Gets the number of proxy directives contained in this result. + * + * @return the number of directives. + */ + public int size() { + return directives.size(); + } + + @Override + public String toString() { + return directives.stream() + .map(FindProxyDirective::toString) + .collect(Collectors.joining(PROXY_RESULT_SEPARATOR + " ")); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FindProxyResult that = (FindProxyResult) o; + return Objects.equals(directives, that.directives); + } + + @Override + public int hashCode() { + return Objects.hash(directives); + } + + /** + * Parses the result from the output of the FindProxyForURL function. + * + * @param result the output from the FindProxyForURL function. + * @return the @{@link FindProxyResult} obtained from parsing the result. + * @throws PacInterpreterException if the given result cannot be parsed. + */ + public static FindProxyResult parse(final String result) throws PacInterpreterException { + final List directives = new ArrayList<>(); + if (result != null) { + for (final String directive : result.split(PROXY_RESULT_SEPARATOR)) { + directives.add(FindProxyDirective.parse(directive)); + } + } else { + directives.add(FindProxyDirective.parse(null)); + } + return new FindProxyResult(directives); + } +} diff --git a/src/main/java/com/mabl/net/proxy/PacInterpreter.java b/src/main/java/com/mabl/net/proxy/PacInterpreter.java new file mode 100644 index 0000000..eb6f364 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/PacInterpreter.java @@ -0,0 +1,33 @@ +package com.mabl.net.proxy; + +import java.net.MalformedURLException; + +public interface PacInterpreter { + /** + * Gets the PAC that is in use by this @{@link SimplePacInterpreter}. + * + * @return the PAC contents. + */ + String getPac(); + + /** + * Evaluates the PAC script for the given URL. + * Automatically parses the host from the URL before passing it to the PAC script. + * + * @param url the URL to evaluate. + * @return the result of executing the PAC script with the given URL. + * @throws MalformedURLException if the URL cannot be parsed. + * @throws PacInterpreterException if an error occurs evaluating the PAC script or parsing the results. + */ + FindProxyResult findProxyForUrl(final String url) throws MalformedURLException, PacInterpreterException; + + /** + * Evaluates the PAC script for the given URL and host. + * + * @param url the URL to evaluate. + * @param host the host component of the URL (the URL substring between :// and the first : or /). + * @return the result of executing the PAC script with the given URL and host. + * @throws PacInterpreterException if an error occurs evaluating the PAC script or parsing the results. + */ + FindProxyResult findProxyForUrl(final String url, final String host) throws PacInterpreterException; +} diff --git a/src/main/java/com/mabl/net/proxy/PacInterpreterException.java b/src/main/java/com/mabl/net/proxy/PacInterpreterException.java new file mode 100644 index 0000000..6c0e330 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/PacInterpreterException.java @@ -0,0 +1,13 @@ +package com.mabl.net.proxy; + +public class PacInterpreterException extends Exception { + private static final long serialVersionUID = 1L; + + public PacInterpreterException(final String message) { + super(message); + } + + public PacInterpreterException(final String message, final Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/com/mabl/net/proxy/ReloadablePacInterpreter.java b/src/main/java/com/mabl/net/proxy/ReloadablePacInterpreter.java new file mode 100644 index 0000000..05e6e92 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/ReloadablePacInterpreter.java @@ -0,0 +1,163 @@ +package com.mabl.net.proxy; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.net.MalformedURLException; +import java.net.URL; +import java.time.Duration; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +/** + * A {@link PacInterpreter} that allows the PAC script to be reloaded explicitly or automatically with a specified period. + *

+ * After creating a {@link ReloadablePacInterpreter}, use the {@link #reload()} method to immediately reload the PAC. + * Alternatively, use the {@link #start(Duration)} method to begin automatic reloads and the {@link #stop()} method to terminate the reload timer. + *

+ *

+ * To silence GraalVM warnings set the "polyglot.engine.WarnInterpreterOnly" system property to "false" e.g. -Dpolyglot.engine.WarnInterpreterOnly=false + *

+ * + * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Proxy_servers_and_tunneling/Proxy_Auto-Configuration_PAC_file" + * @see "https://www.graalvm.org/latest/reference-manual/js/FAQ/#warning-implementation-does-not-support-runtime-compilation" + */ +public class ReloadablePacInterpreter implements PacInterpreter { + private static final Logger logger = LoggerFactory.getLogger(ReloadablePacInterpreter.class); + private final Supplier pacInterpreterSupplier; + private volatile PacInterpreter pacInterpreter; + private ScheduledExecutorService timer; // All access must be synchronized on AutoReloadingPacInterpreter.this + + protected ReloadablePacInterpreter(final Supplier pacInterpreterSupplier) throws PacInterpreterException { + if (pacInterpreterSupplier == null) { + throw new IllegalArgumentException("PAC interpreter supplier cannot be null"); + } + this.pacInterpreterSupplier = pacInterpreterSupplier; + this.pacInterpreter = getPacInterpreter(); + } + + /** + * Starts auto-updates with the given period. + * + * @param updatePeriod how frequently the PAC should be reloaded. + */ + synchronized public void start(final Duration updatePeriod) { + if (timer != null) { + return; + } + timer = Executors.newSingleThreadScheduledExecutor((final Runnable runnable) -> { + final Thread thread = new Thread(runnable, ReloadablePacInterpreter.class.getSimpleName() + " Reload Timer"); + thread.setDaemon(true); + return thread; + }); + timer.scheduleWithFixedDelay(this::reloadSafe, updatePeriod.toMillis(), updatePeriod.toMillis(), TimeUnit.MILLISECONDS); + } + + /** + * Forces an immediate reload of the backing PAC source. + * Calling this method has no effect on the timing of the next scheduled reload or whether the timer is started or stopped. + * + * @throws PacInterpreterException if an error occurs when reinitializing the underlying {@link PacInterpreter}. + */ + public void reload() throws PacInterpreterException { + logger.debug("Reloading PAC"); + pacInterpreter = getPacInterpreter(); + logger.debug("PAC reloaded successfully"); + } + + protected void reloadSafe() { + try { + reload(); + } catch (Exception e) { + logger.error("Failed to reload PAC: " + e, e); + } + } + + protected PacInterpreter getPacInterpreter() throws PacInterpreterException { + try { + return pacInterpreterSupplier.get(); + } catch (Exception e) { + throw new PacInterpreterException(e.getMessage(), e.getCause()); + } + } + + /** + * Stops auto-updates. + */ + synchronized public void stop() { + if (timer == null) { + return; + } + timer.shutdownNow(); + timer = null; + } + + @Override + public String getPac() { + return pacInterpreter.getPac(); + } + + @Override + public FindProxyResult findProxyForUrl(final String url) throws MalformedURLException, PacInterpreterException { + return pacInterpreter.findProxyForUrl(url); + } + + @Override + public FindProxyResult findProxyForUrl(final String url, final String host) throws PacInterpreterException { + return pacInterpreter.findProxyForUrl(url, host); + } + + /** + * Creates an {@link ReloadablePacInterpreter} using the given PAC script supplier. + * + * @param pacScript supplier for the PAC script. + * @return a {@link ReloadablePacInterpreter} for the given PAC script. + * @throws PacInterpreterException if an error occurs evaluating the PAC script. + */ + public static ReloadablePacInterpreter forScript(final Supplier pacScript) throws PacInterpreterException { + return new ReloadablePacInterpreter(() -> { + try { + return SimplePacInterpreter.forScript(pacScript.get()); + } catch (Exception e) { + throw new RuntimePacInterpreterException(e.getMessage(), e.getCause()); + } + }); + } + + /** + * Creates a {@link ReloadablePacInterpreter} using the given PAC file. + * + * @param pacFile the PAC file. + * @return a {@link ReloadablePacInterpreter} for the given PAC file. + * @throws PacInterpreterException if an error occurs evaluating the PAC file. + */ + public static ReloadablePacInterpreter forFile(final File pacFile) throws PacInterpreterException { + return new ReloadablePacInterpreter(() -> { + try { + return SimplePacInterpreter.forFile(pacFile); + } catch (Exception e) { + throw new RuntimePacInterpreterException(e.getMessage(), e.getCause()); + } + }); + } + + /** + * Creates an {@link ReloadablePacInterpreter} using the given PAC URL. + * + * @param pacUrl the PAC URL. + * @return a {@link ReloadablePacInterpreter} for the given PAC URL. + * @throws PacInterpreterException if an error occurs evaluating the PAC URL. + */ + public static ReloadablePacInterpreter forUrl(final URL pacUrl) throws PacInterpreterException { + return new ReloadablePacInterpreter(() -> { + try { + return SimplePacInterpreter.forUrl(pacUrl); + } catch (Exception e) { + throw new RuntimePacInterpreterException(e.getMessage(), e.getCause()); + } + }); + } +} diff --git a/src/main/java/com/mabl/net/proxy/RuntimePacInterpreterException.java b/src/main/java/com/mabl/net/proxy/RuntimePacInterpreterException.java new file mode 100644 index 0000000..9bde872 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/RuntimePacInterpreterException.java @@ -0,0 +1,13 @@ +package com.mabl.net.proxy; + +public class RuntimePacInterpreterException extends RuntimeException { + private static final long serialVersionUID = 1L; + + public RuntimePacInterpreterException(final String message) { + super(message); + } + + public RuntimePacInterpreterException(final String message, final Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/com/mabl/net/proxy/SimplePacInterpreter.java b/src/main/java/com/mabl/net/proxy/SimplePacInterpreter.java new file mode 100644 index 0000000..c836997 --- /dev/null +++ b/src/main/java/com/mabl/net/proxy/SimplePacInterpreter.java @@ -0,0 +1,159 @@ +package com.mabl.net.proxy; + +import com.mabl.io.IoUtils; +import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.Engine; +import org.graalvm.polyglot.HostAccess; +import org.graalvm.polyglot.Value; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +/** + * An interpreter for Proxy Auto-Configuration files/URLs. + *

+ * To silence GraalVM warnings set the "polyglot.engine.WarnInterpreterOnly" system property to "false" e.g. -Dpolyglot.engine.WarnInterpreterOnly=false + *

+ * + * @see "https://developer.mozilla.org/en-US/docs/Web/HTTP/Proxy_servers_and_tunneling/Proxy_Auto-Configuration_PAC_file" + * @see "https://www.graalvm.org/latest/reference-manual/js/FAQ/#warning-implementation-does-not-support-runtime-compilation" + */ +public class SimplePacInterpreter implements PacInterpreter { + private static final String PAC_UTILS_PATH = "/pacUtils.js"; + private static final String PAC_LANGUAGE_ID = "js"; + private static final String PAC_FUNCTION_NAME = "FindProxyForURL"; + private static final List> ALLOWED_JAVA_CLASSES = Collections.unmodifiableList(Arrays.asList( + // Allows JavaScript to invoke InetAddress methods (required for DNS/IP utility functions) + InetAddress.class + )); + private static final String PAC_UTILS = readPacUtils(); + private static final Engine engine = initializeEngine(); + private final String pac; + private final Value findProxyForUrlFunction; + + protected SimplePacInterpreter(final String pac) throws PacInterpreterException { + this.pac = validatePac(pac); + final Context context = initializeContext(); + + // Evaluate the PAC content, and extract a reference to the PAC function: + try { + context.eval(PAC_LANGUAGE_ID, pac); + final Value jsBindings = context.getBindings(PAC_LANGUAGE_ID); + this.findProxyForUrlFunction = jsBindings.getMember(PAC_FUNCTION_NAME); + } catch (Exception e) { + throw new PacInterpreterException("Error evaluating PAC script", e); + } + } + + private static String validatePac(final String pac) { + if (pac == null) { + throw new IllegalArgumentException("PAC cannot be null"); + } + if (pac.length() == 0) { + throw new IllegalArgumentException("PAC cannot be empty"); + } + if (!pac.contains(PAC_FUNCTION_NAME)) { + throw new IllegalArgumentException(String.format("PAC must contain \"%s\" function", PAC_FUNCTION_NAME)); + } + return pac; + } + + private static Engine initializeEngine() { + return Engine.newBuilder() + .build(); + } + + private static Context initializeContext() { + final Context context = Context.newBuilder(PAC_LANGUAGE_ID) + .engine(engine) + .allowHostAccess(HostAccess.ALL) + .allowHostClassLoading(true) + .allowHostClassLookup(clazz -> ALLOWED_JAVA_CLASSES.stream() + .map(Class::getCanonicalName) + .anyMatch(clazz::equals)) + .allowIO(true) + .build(); + + // Make PAC utility functions available to the context: + context.eval(PAC_LANGUAGE_ID, PAC_UTILS); + + return context; + } + + private static String readPacUtils() { + try { + return IoUtils.readClasspathFileToString(PAC_UTILS_PATH); + } catch (IOException e) { + // This file is included in the jar, so if we can't open/read it something is seriously wrong. + // There is likely nothing the caller can do to handle this, so just rethrow as a runtime exception. + throw new RuntimePacInterpreterException(String.format("Failed to read \"%s\" from classpath", PAC_UTILS_PATH), e); + } + } + + @Override + public String getPac() { + return pac; + } + + @Override + public FindProxyResult findProxyForUrl(final String url) throws MalformedURLException, PacInterpreterException { + return findProxyForUrl(url, new URL(url).getHost()); + } + + @Override + public FindProxyResult findProxyForUrl(final String url, final String host) throws PacInterpreterException { + final String result; + try { + // Call the PAC function with the given URL: + result = findProxyForUrlFunction.execute( + Optional.ofNullable(url).orElse(""), + Optional.ofNullable(host).orElse("")) + .asString(); + } catch (Exception e) { + throw new PacInterpreterException(String.format("Error executing %s", PAC_FUNCTION_NAME), e); + } + return FindProxyResult.parse(result); + } + + /** + * Creates a {@link SimplePacInterpreter} using the given PAC script. + * + * @param pacScript the PAC script. + * @return a {@link SimplePacInterpreter} for the given PAC script. + * @throws PacInterpreterException if an error occurs evaluating the PAC script. + */ + public static SimplePacInterpreter forScript(final String pacScript) throws PacInterpreterException { + return new SimplePacInterpreter(pacScript); + } + + /** + * Creates a {@link SimplePacInterpreter} using the given PAC file. + * + * @param pacFile the PAC file. + * @return a {@link SimplePacInterpreter} for the given PAC file. + * @throws IOException if an error occurs reading the PAC script from the given file. + * @throws PacInterpreterException if an error occurs evaluating the PAC file. + */ + public static SimplePacInterpreter forFile(final File pacFile) throws IOException, PacInterpreterException { + return forScript(IoUtils.readFileToString(pacFile)); + } + + /** + * Creates a {@link SimplePacInterpreter} using the given PAC URL. + * + * @param pacUrl the PAC URL. + * @return a {@link SimplePacInterpreter} for the given PAC URL. + * @throws IOException if an error occurs reading the PAC script from the given URL. + * @throws PacInterpreterException if an error occurs evaluating the PAC URL. + */ + public static SimplePacInterpreter forUrl(final URL pacUrl) throws IOException, PacInterpreterException { + return forScript(IoUtils.readUrlToString(pacUrl)); + } +} diff --git a/src/main/resources/pacUtils.js b/src/main/resources/pacUtils.js new file mode 100644 index 0000000..7b3cdfe --- /dev/null +++ b/src/main/resources/pacUtils.js @@ -0,0 +1,252 @@ +// Source: https://github.com/manugarg/pactester/blob/master/pac_utils.js + +/* This file is an adaption of netwerk/base/src/nsProxyAutoConfig.js file in + * mozilla source code. + * + * **** BEGIN LICENSE BLOCK **** + * Version: LGPL 2.1 + + * This file is a free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA + * **** END LICENSE BLOCK **** + * + * Original Contributors: + * Akhil Arora + * Tomi Leppikangas + * Darin Fisher + * Gagan Saksena 04/24/00 + * + * Adapted for pactester by: + * Manu Garg 01/10/2007 + * + * Adapted for GraalVM by: + * James Baldassari 07/14/2023 + */ + +function dnsDomainIs(host, domain) { + return (host.length >= domain.length && + host.substring(host.length - domain.length) == domain); +} +function dnsDomainLevels(host) { + return host.split('.').length-1; +} +function convert_addr(ipchars) { + var bytes = ipchars.split('.'); + var result = ((bytes[0] & 0xff) << 24) | + ((bytes[1] & 0xff) << 16) | + ((bytes[2] & 0xff) << 8) | + (bytes[3] & 0xff); + return result; +} +function isInNet(ipaddr, pattern, maskstr) { + var test = new RegExp("^(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})$").exec(ipaddr); + if (test == null) { + ipaddr = dnsResolve(ipaddr); + if (ipaddr == 'null') + return false; + } else if (test[1] > 255 || test[2] > 255 || + test[3] > 255 || test[4] > 255) { + return false; // not an IP address + } + var host = convert_addr(ipaddr); + var pat = convert_addr(pattern); + var mask = convert_addr(maskstr); + return ((host & mask) == (pat & mask)); + +} +function isPlainHostName(host) { + return (host.search('\\.') == -1); +} +function isResolvable(host) { + var ip = dnsResolve(host); + return (ip != 'null'); +} +function localHostOrDomainIs(host, hostdom) { + return (host == hostdom) || + (hostdom.lastIndexOf(host + '.', 0) == 0); +} +function shExpMatch(url, pattern) { + pattern = pattern.replace(/\./g, '\\.'); + pattern = pattern.replace(/\*/g, '.*'); + pattern = pattern.replace(/\?/g, '.'); + var newRe = new RegExp('^'+pattern+'$'); + return newRe.test(url); +} +var wdays = new Array('SUN', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT'); +var monthes = new Array('JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'); +function weekdayRange() { + function getDay(weekday) { + for (var i = 0; i < 6; i++) { + if (weekday == wdays[i]) + return i; + } + return -1; + } + var date = new Date(); + var argc = arguments.length; + var wday; + if (argc < 1) + return false; + if (arguments[argc - 1] == 'GMT') { + argc--; + wday = date.getUTCDay(); + } else { + wday = date.getDay(); + } + var wd1 = getDay(arguments[0]); + var wd2 = (argc == 2) ? getDay(arguments[1]) : wd1; + return (wd1 == -1 || wd2 == -1) ? false + : (wd1 <= wday && wday <= wd2); +} +function dateRange() { + function getMonth(name) { + for (var i = 0; i < 6; i++) { + if (name == monthes[i]) + return i; + } + return -1; + } + var date = new Date(); + var argc = arguments.length; + if (argc < 1) { + return false; + } + var isGMT = (arguments[argc - 1] == 'GMT'); + + if (isGMT) { + argc--; + } + // function will work even without explict handling of this case + if (argc == 1) { + var tmp = parseInt(arguments[0]); + if (isNaN(tmp)) { + return ((isGMT ? date.getUTCMonth() : date.getMonth()) == +getMonth(arguments[0])); + } else if (tmp < 32) { + return ((isGMT ? date.getUTCDate() : date.getDate()) == tmp); + } else { + return ((isGMT ? date.getUTCFullYear() : date.getFullYear()) == +tmp); + } + } + var year = date.getFullYear(); + var date1, date2; + date1 = new Date(year, 0, 1, 0, 0, 0); + date2 = new Date(year, 11, 31, 23, 59, 59); + var adjustMonth = false; + for (var i = 0; i < (argc >> 1); i++) { + var tmp = parseInt(arguments[i]); + if (isNaN(tmp)) { + var mon = getMonth(arguments[i]); + date1.setMonth(mon); + } else if (tmp < 32) { + adjustMonth = (argc <= 2); + date1.setDate(tmp); + } else { + date1.setFullYear(tmp); + } + } + for (var i = (argc >> 1); i < argc; i++) { + var tmp = parseInt(arguments[i]); + if (isNaN(tmp)) { + var mon = getMonth(arguments[i]); + date2.setMonth(mon); + } else if (tmp < 32) { + date2.setDate(tmp); + } else { + date2.setFullYear(tmp); + } + } + if (adjustMonth) { + date1.setMonth(date.getMonth()); + date2.setMonth(date.getMonth()); + } + if (isGMT) { + var tmp = date; + tmp.setFullYear(date.getUTCFullYear()); + tmp.setMonth(date.getUTCMonth()); + tmp.setDate(date.getUTCDate()); + tmp.setHours(date.getUTCHours()); + tmp.setMinutes(date.getUTCMinutes()); + tmp.setSeconds(date.getUTCSeconds()); + date = tmp; + } + return ((date1 <= date) && (date <= date2)); +} +function timeRange() { + var argc = arguments.length; + var date = new Date(); + var isGMT= false; + + if (argc < 1) { + return false; + } + if (arguments[argc - 1] == 'GMT') { + isGMT = true; + argc--; + } + + var hour = isGMT ? date.getUTCHours() : date.getHours(); + var date1, date2; + date1 = new Date(); + date2 = new Date(); + + if (argc == 1) { + return (hour == arguments[0]); + } else if (argc == 2) { + return ((arguments[0] <= hour) && (hour <= arguments[1])); + } else { + switch (argc) { + case 6: + date1.setSeconds(arguments[2]); + date2.setSeconds(arguments[5]); + case 4: + var middle = argc >> 1; + date1.setHours(arguments[0]); + date1.setMinutes(arguments[1]); + date2.setHours(arguments[middle]); + date2.setMinutes(arguments[middle + 1]); + if (middle == 2) { + date2.setSeconds(59); + } + break; + default: + throw 'timeRange: bad number of arguments' + } + } + + if (isGMT) { + date.setFullYear(date.getUTCFullYear()); + date.setMonth(date.getUTCMonth()); + date.setDate(date.getUTCDate()); + date.setHours(date.getUTCHours()); + date.setMinutes(date.getUTCMinutes()); + date.setSeconds(date.getUTCSeconds()); + } + return ((date1 <= date) && (date <= date2)); +} + +// The following functions rely on GraalVM's Java integration because they require +// functionality that is not available in pure JavaScript (e.g. host => IP resolution): +function dnsResolve(host) { + return Java.type('java.net.InetAddress') + .getByName(host) + .getHostAddress(); +} +function myIpAddress() { + return Java.type('java.net.InetAddress') + .getLocalHost() + .getHostAddress(); +} diff --git a/src/test/java/com/mabl/net/proxy/FindProxyDirectiveTest.java b/src/test/java/com/mabl/net/proxy/FindProxyDirectiveTest.java new file mode 100644 index 0000000..14805cb --- /dev/null +++ b/src/test/java/com/mabl/net/proxy/FindProxyDirectiveTest.java @@ -0,0 +1,42 @@ +package com.mabl.net.proxy; + +import org.junit.Test; + +import java.net.InetSocketAddress; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +public class FindProxyDirectiveTest { + @Test + public void direct() throws Exception { + final FindProxyDirective directive = FindProxyDirective.parse("DIRECT"); + assertTrue(directive.isDirect()); + assertFalse(directive.isProxy()); + assertEquals(ConnectionType.DIRECT, directive.connectionType()); + assertNull(directive.proxyHostAndPort()); + assertNull(directive.proxyHost()); + assertNull(directive.proxyPort()); + assertEquals("DIRECT", directive.toString()); + assertEquals(directive, FindProxyDirective.parse(" DIRECT ")); + } + + @Test + public void proxy() throws Exception { + final FindProxyDirective directive = FindProxyDirective.parse("PROXY 10.0.0.1:8080"); + assertFalse(directive.isDirect()); + assertTrue(directive.isProxy()); + assertEquals(ConnectionType.PROXY, directive.connectionType()); + assertEquals("10.0.0.1:8080", directive.proxyHostAndPort()); + assertEquals("10.0.0.1", directive.proxyHost()); + assertEquals(new Integer(8080), directive.proxyPort()); + assertEquals(InetSocketAddress.createUnresolved("10.0.0.1", 8080), directive.unresolvedProxyAddress()); + assertEquals(new InetSocketAddress("10.0.0.1", 8080), directive.resolvedProxyAddress()); + assertEquals("10.0.0.1", directive.unresolvedProxyAddress().getHostString()); + assertEquals(8080, directive.unresolvedProxyAddress().getPort()); + assertEquals("PROXY 10.0.0.1:8080", directive.toString()); + assertEquals(directive, FindProxyDirective.parse(" PROXY 10.0.0.1:8080 ")); + } +} diff --git a/src/test/java/com/mabl/net/proxy/FindProxyResultTest.java b/src/test/java/com/mabl/net/proxy/FindProxyResultTest.java new file mode 100644 index 0000000..dfe1061 --- /dev/null +++ b/src/test/java/com/mabl/net/proxy/FindProxyResultTest.java @@ -0,0 +1,144 @@ +package com.mabl.net.proxy; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class FindProxyResultTest { + @Test + public void all() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT"); + final List directives = result.all(); + + final FindProxyDirective directive1 = directives.get(0); + assertEquals(ConnectionType.PROXY, directive1.connectionType()); + assertEquals("10.0.0.1:8080", directive1.proxyHostAndPort()); + + final FindProxyDirective directive2 = directives.get(1); + assertEquals(ConnectionType.SOCKS, directive2.connectionType()); + assertEquals("10.0.0.1:1080", directive2.proxyHostAndPort()); + + final FindProxyDirective directive3 = directives.get(2); + assertEquals(ConnectionType.DIRECT, directive3.connectionType()); + assertNull(directive3.proxyHostAndPort()); + } + + @Test + public void first() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT"); + final FindProxyDirective first = result.first(); + assertEquals(ConnectionType.PROXY, first.connectionType()); + assertEquals("10.0.0.1:8080", first.proxyHostAndPort()); + } + + @Test + public void firstProxyWithProxyPresent() throws Exception { + final FindProxyResult result = FindProxyResult.parse("DIRECT; PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080"); + final Optional maybeFirstProxy = result.firstProxy(); + assertTrue(maybeFirstProxy.isPresent()); + final FindProxyDirective firstProxy = maybeFirstProxy.get(); + assertEquals(ConnectionType.PROXY, firstProxy.connectionType()); + assertEquals("10.0.0.1:8080", firstProxy.proxyHostAndPort()); + } + + @Test + public void firstProxyWithNoProxyPresent() throws Exception { + final FindProxyResult result = FindProxyResult.parse("DIRECT"); + final Optional maybeFirstProxy = result.firstProxy(); + assertFalse(maybeFirstProxy.isPresent()); + } + + @Test + public void get() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT"); + + final FindProxyDirective directive1 = result.get(0); + assertEquals(ConnectionType.PROXY, directive1.connectionType()); + assertEquals("10.0.0.1:8080", directive1.proxyHostAndPort()); + + final FindProxyDirective directive2 = result.get(1); + assertEquals(ConnectionType.SOCKS, directive2.connectionType()); + assertEquals("10.0.0.1:1080", directive2.proxyHostAndPort()); + + final FindProxyDirective directive3 = result.get(2); + assertEquals(ConnectionType.DIRECT, directive3.connectionType()); + assertNull(directive3.proxyHostAndPort()); + } + + @Test(expected = PacInterpreterException.class) + public void invalid() throws Exception { + FindProxyResult.parse("FOO"); + fail("Parsing should have failed"); + } + + @Test + public void iterator() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT"); + final List directives = new ArrayList<>(result.size()); + result.iterator().forEachRemaining(directives::add); + + final FindProxyDirective directive1 = directives.get(0); + assertEquals(ConnectionType.PROXY, directive1.connectionType()); + assertEquals("10.0.0.1:8080", directive1.proxyHostAndPort()); + + final FindProxyDirective directive2 = directives.get(1); + assertEquals(ConnectionType.SOCKS, directive2.connectionType()); + assertEquals("10.0.0.1:1080", directive2.proxyHostAndPort()); + + final FindProxyDirective directive3 = directives.get(2); + assertEquals(ConnectionType.DIRECT, directive3.connectionType()); + assertNull(directive3.proxyHostAndPort()); + } + + @Test + public void normalize() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; PROXY 10.0.0.1:8080; DIRECT; SOCKS 10.0.0.1:1080; DIRECT; DIRECT"); + assertEquals(6, result.size()); + + final FindProxyResult normalized = result.normalize(); + assertEquals(3, normalized.size()); + + final FindProxyDirective directive1 = normalized.get(0); + assertEquals(ConnectionType.PROXY, directive1.connectionType()); + assertEquals("10.0.0.1:8080", directive1.proxyHostAndPort()); + + final FindProxyDirective directive2 = normalized.get(1); + assertEquals(ConnectionType.DIRECT, directive2.connectionType()); + assertNull(directive2.proxyHostAndPort()); + + final FindProxyDirective directive3 = normalized.get(2); + assertEquals(ConnectionType.SOCKS, directive3.connectionType()); + assertEquals("10.0.0.1:1080", directive3.proxyHostAndPort()); + } + + @Test + public void random() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT"); + final Set directives = new HashSet<>(result.all()); + for (int ii = 0; ii < 10; ii++) { + assertTrue(directives.contains(result.random())); + } + } + + @Test + public void resultToString() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT "); + assertEquals("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT", result.toString()); + } + + @Test + public void size() throws Exception { + final FindProxyResult result = FindProxyResult.parse("PROXY 10.0.0.1:8080; SOCKS 10.0.0.1:1080; DIRECT"); + assertEquals(3, result.size()); + } +} diff --git a/src/test/java/com/mabl/net/proxy/PacInterpreterTest.java b/src/test/java/com/mabl/net/proxy/PacInterpreterTest.java new file mode 100644 index 0000000..f1fee8f --- /dev/null +++ b/src/test/java/com/mabl/net/proxy/PacInterpreterTest.java @@ -0,0 +1,119 @@ +package com.mabl.net.proxy; + +import com.mabl.io.IoUtils; +import io.undertow.Undertow; +import io.undertow.util.Headers; +import org.junit.After; +import org.junit.Before; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.MalformedURLException; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +abstract public class PacInterpreterTest { + protected static final String PAC_1 = readFromClasspath("/pac1.js"); + protected static final String PAC_2 = readFromClasspath("/pac2.js"); + protected static final String PAC_3 = readFromClasspath("/pac3.js"); + protected Undertow pacServer; + private volatile String pacServerContent; + + @Before + public void silenceGraalvmWarnings() { + System.setProperty("polyglot.engine.WarnInterpreterOnly", Boolean.TRUE.toString()); + } + + @After + public void tearDown() { + if (pacServer != null) { + pacServer.stop(); + pacServer = null; + pacServerContent = null; + } + } + + protected static String readFromClasspath(final String path) { + try { + return IoUtils.readClasspathFileToString(path); + } catch (IOException e) { + throw new RuntimeException(String.format("Failed to read \"%s\" from classpath", path), e); + } + } + + protected static File writePacContentToFile(final String pacContent) throws IOException { + final File pacFile = File.createTempFile("pac", ".js"); + pacFile.deleteOnExit(); + return writePacContentToFile(pacContent, pacFile); + } + + protected static File writePacContentToFile(final String pacContent, final File pacFile) throws IOException { + try (final BufferedWriter writer = new BufferedWriter(new FileWriter(pacFile))) { + writer.write(pacContent); + writer.flush(); + } + return pacFile; + } + + protected Undertow startPacServer(final String pacContent) { + updatePacServerContent(pacContent); + pacServer = Undertow.builder() + .addHttpListener(0, "localhost") + .setHandler(exchange -> { + exchange.getResponseHeaders().put(Headers.CONTENT_TYPE, "application/javascript"); + exchange.getResponseSender().send(pacServerContent); + }).build(); + pacServer.start(); + return pacServer; + } + + protected void updatePacServerContent(final String pacContent) { + assertNotNull(pacContent); + this.pacServerContent = pacContent; + } + + protected void assertPac1Correct(final PacInterpreter interpreter) throws PacInterpreterException, MalformedURLException { + final FindProxyResult results = interpreter.findProxyForUrl("https://example.com"); + assertEquals(2, results.size()); + + final FindProxyDirective first = results.first(); + assertEquals(ConnectionType.PROXY, first.connectionType()); + assertEquals("4.5.6.7:8080", first.proxyHostAndPort()); + assertTrue(interpreter.getPac().contains(first.toString())); + + final FindProxyDirective second = results.get(1); + assertEquals(ConnectionType.PROXY, second.connectionType()); + assertEquals("7.8.9.10:8080", second.proxyHostAndPort()); + assertTrue(interpreter.getPac().contains(second.toString())); + + final Set directives = new HashSet<>(results.all()); + assertTrue(directives.contains(results.random())); + } + + protected void assertPac2Correct(final PacInterpreter interpreter) throws PacInterpreterException, MalformedURLException { + final FindProxyResult results = interpreter.findProxyForUrl("https://example.com"); + assertEquals(1, results.size()); + + final FindProxyDirective first = results.first(); + assertEquals(ConnectionType.PROXY, first.connectionType()); + assertEquals("wcg1.example.com:8080", first.proxyHostAndPort()); + assertTrue(interpreter.getPac().contains(first.toString())); + } + + protected void assertPac3Correct(final PacInterpreter interpreter) throws PacInterpreterException, MalformedURLException { + final FindProxyResult results = interpreter.findProxyForUrl("https://example.com"); + assertEquals(1, results.size()); + + final FindProxyDirective first = results.first(); + assertEquals(ConnectionType.DIRECT, first.connectionType()); + assertNull(first.proxyHostAndPort()); + } + +} diff --git a/src/test/java/com/mabl/net/proxy/ReloadablePacInterpreterTest.java b/src/test/java/com/mabl/net/proxy/ReloadablePacInterpreterTest.java new file mode 100644 index 0000000..80d0d01 --- /dev/null +++ b/src/test/java/com/mabl/net/proxy/ReloadablePacInterpreterTest.java @@ -0,0 +1,74 @@ +package com.mabl.net.proxy; + +import org.junit.After; +import org.junit.Test; + +import java.io.File; +import java.net.InetSocketAddress; +import java.net.URL; +import java.time.Duration; +import java.util.concurrent.atomic.AtomicReference; + +public class ReloadablePacInterpreterTest extends PacInterpreterTest { + private ReloadablePacInterpreter pacInterpreter; + + @After + public void stopReload() { + if (pacInterpreter != null) { + pacInterpreter.stop(); + pacInterpreter = null; + } + } + + @Test + public void forScript() throws Exception { + final AtomicReference script = new AtomicReference<>(PAC_1); + + pacInterpreter = ReloadablePacInterpreter.forScript(script::get); + assertPac1Correct(pacInterpreter); + + script.set(PAC_2); + pacInterpreter.reload(); + assertPac2Correct(pacInterpreter); + } + + @Test + public void forFile() throws Exception { + final File pacFile = writePacContentToFile(PAC_1); + + pacInterpreter = ReloadablePacInterpreter.forFile(pacFile); + assertPac1Correct(pacInterpreter); + + writePacContentToFile(PAC_2, pacFile); + pacInterpreter.reload(); + assertPac2Correct(pacInterpreter); + } + + @Test + public void forServer() throws Exception { + final InetSocketAddress serverAddress = (InetSocketAddress) startPacServer(PAC_2).getListenerInfo().get(0).getAddress(); + + pacInterpreter = ReloadablePacInterpreter.forUrl(new URL(String.format("http://%s:%d/pac.js", serverAddress.getAddress().getHostAddress(), serverAddress.getPort()))); + assertPac2Correct(pacInterpreter); + + updatePacServerContent(PAC_3); + pacInterpreter.reload(); + assertPac3Correct(pacInterpreter); + } + + @Test + public void timer() throws Exception { + final AtomicReference script = new AtomicReference<>(PAC_1); + + pacInterpreter = ReloadablePacInterpreter.forScript(script::get); + assertPac1Correct(pacInterpreter); + + final Duration reloadPeriod = Duration.ofSeconds(1); + script.set(PAC_2); + pacInterpreter.start(reloadPeriod); + assertPac1Correct(pacInterpreter); + + Thread.sleep(reloadPeriod.toMillis() * 2); + assertPac2Correct(pacInterpreter); + } +} diff --git a/src/test/java/com/mabl/net/proxy/SimplePacInterpreterTest.java b/src/test/java/com/mabl/net/proxy/SimplePacInterpreterTest.java new file mode 100644 index 0000000..588a892 --- /dev/null +++ b/src/test/java/com/mabl/net/proxy/SimplePacInterpreterTest.java @@ -0,0 +1,58 @@ +package com.mabl.net.proxy; + +import org.junit.Test; + +import java.net.InetSocketAddress; +import java.net.URL; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +public class SimplePacInterpreterTest extends PacInterpreterTest { + + @Test + public void forScript() throws Exception { + final SimplePacInterpreter interpreter = SimplePacInterpreter.forScript(PAC_1); + assertPac1Correct(interpreter); + } + + @Test + public void forFile() throws Exception { + final SimplePacInterpreter interpreter = SimplePacInterpreter.forFile(writePacContentToFile(PAC_2)); + assertPac2Correct(interpreter); + } + + @Test + public void forUrl() throws Exception { + final InetSocketAddress serverAddress = (InetSocketAddress) startPacServer(PAC_3).getListenerInfo().get(0).getAddress(); + final SimplePacInterpreter interpreter = SimplePacInterpreter.forUrl(new URL(String.format("http://%s:%d/pac.js", serverAddress.getAddress().getHostAddress(), serverAddress.getPort()))); + assertPac3Correct(interpreter); + } + + @Test + public void nullMapsToDirect() throws Exception { + final String pacFileContent = "function FindProxyForURL(url, host) { return null; }"; + final SimplePacInterpreter interpreter = SimplePacInterpreter.forScript(pacFileContent); + final FindProxyResult results = interpreter.findProxyForUrl("https://example.com"); + assertEquals(1, results.size()); + + // null should map to DIRECT + final FindProxyDirective first = results.first(); + assertEquals(ConnectionType.DIRECT, first.connectionType()); + assertNull(first.proxyHostAndPort()); + } + + @Test + public void undefinedMapsToDirect() throws Exception { + final String pacFileContent = "function FindProxyForURL(url, host) { return undefined; }"; + final SimplePacInterpreter interpreter = SimplePacInterpreter.forScript(pacFileContent); + final FindProxyResult results = interpreter.findProxyForUrl("https://example.com"); + assertEquals(1, results.size()); + + // undefined should map to DIRECT + final FindProxyDirective first = results.first(); + assertEquals(ConnectionType.DIRECT, first.connectionType()); + assertNull(first.proxyHostAndPort()); + } + +} diff --git a/src/test/resources/pac1.js b/src/test/resources/pac1.js new file mode 100644 index 0000000..a5a8bac --- /dev/null +++ b/src/test/resources/pac1.js @@ -0,0 +1,30 @@ +// Source: http://findproxyforurl.com/example-pac-file/ + +function FindProxyForURL(url, host) { + // If the hostname matches, send direct. + if (dnsDomainIs(host, "intranet.domain.com") || + shExpMatch(host, "(*.abcdomain.com|abcdomain.com)")) + return "DIRECT"; + + // If the protocol or URL matches, send direct. + if (url.substring(0, 4)=="ftp:" || + shExpMatch(url, "http://abcdomain.com/folder/*")) + return "DIRECT"; + + // If the requested website is hosted within the internal network, send direct. + if (isPlainHostName(host) || + shExpMatch(host, "*.local") || + isInNet(dnsResolve(host), "10.0.0.0", "255.0.0.0") || + isInNet(dnsResolve(host), "172.16.0.0", "255.240.0.0") || + isInNet(dnsResolve(host), "192.168.0.0", "255.255.0.0") || + isInNet(dnsResolve(host), "127.0.0.0", "255.255.255.0")) + return "DIRECT"; + + // If the IP address of the local machine is within a defined + // subnet, send to a specific proxy. + if (isInNet(myIpAddress(), "10.10.5.0", "255.255.255.0")) + return "PROXY 1.2.3.4:8080"; + + // DEFAULT RULE: All other traffic, use below proxies, in fail-over order. + return "PROXY 4.5.6.7:8080; PROXY 7.8.9.10:8080"; +} diff --git a/src/test/resources/pac2.js b/src/test/resources/pac2.js new file mode 100644 index 0000000..798edbf --- /dev/null +++ b/src/test/resources/pac2.js @@ -0,0 +1,67 @@ +// Source: https://www.websense.com/content/support/library/web/v76/pac_file_best_practices/PAC_file_sample.aspx + +function FindProxyForURL(url, host) { + /* Normalize the URL for pattern matching */ + url = url.toLowerCase(); + host = host.toLowerCase(); + + /* Don't proxy local hostnames */ + if (isPlainHostName(host)) { + return 'DIRECT'; + } + + /* Don't proxy local domains */ + if (dnsDomainIs(host, ".example1.com") || + (host == "example1.com") || + dnsDomainIs(host, ".example2.com") || + (host == "example2.com") || + dnsDomainIs(host, ".example3.com") || + (host == "example3.com")) { + return 'DIRECT'; + } + + /* Don't proxy Windows Update */ + if ((host == "download.microsoft.com") || + (host == "ntservicepack.microsoft.com") || + (host == "cdm.microsoft.com") || + (host == "wustat.windows.com") || + (host == "windowsupdate.microsoft.com") || + (dnsDomainIs(host, ".windowsupdate.microsoft.com")) || + (host == "update.microsoft.com") || + (dnsDomainIs(host, ".update.microsoft.com")) || + (dnsDomainIs(host, ".windowsupdate.com"))) { + return 'DIRECT'; + } + + if (isResolvable(host)) { + var hostIP = dnsResolve(host); + + /* Don't proxy non-routable addresses (RFC 3330) */ + if (isInNet(hostIP, '0.0.0.0', '255.0.0.0') || + isInNet(hostIP, '10.0.0.0', '255.0.0.0') || + isInNet(hostIP, '127.0.0.0', '255.0.0.0') || + isInNet(hostIP, '169.254.0.0', '255.255.0.0') || + isInNet(hostIP, '172.16.0.0', '255.240.0.0') || + isInNet(hostIP, '192.0.2.0', '255.255.255.0') || + isInNet(hostIP, '192.88.99.0', '255.255.255.0') || + isInNet(hostIP, '192.168.0.0', '255.255.0.0') || + isInNet(hostIP, '198.18.0.0', '255.254.0.0') || + isInNet(hostIP, '224.0.0.0', '240.0.0.0') || + isInNet(hostIP, '240.0.0.0', '240.0.0.0')) { + return 'DIRECT'; + } + + /* Don't proxy local addresses.*/ + if (false) { + return 'DIRECT'; + } + } + + if (url.substring(0, 5) == 'http:' || + url.substring(0, 6) == 'https:' || + url.substring(0, 4) == 'ftp:') { + return 'PROXY wcg1.example.com:8080'; + } + + return 'DIRECT'; +} diff --git a/src/test/resources/pac3.js b/src/test/resources/pac3.js new file mode 100644 index 0000000..ebcc93e --- /dev/null +++ b/src/test/resources/pac3.js @@ -0,0 +1,20 @@ +// Source: https://www.websense.com/content/support/library/web/v76/pac_file_best_practices/PAC_file_sample.aspx + +function FindProxyForURL(url, host) { + if (isInNet(myIpAddress(), "1.1.0.0", "255.0.0.0")) { + return "PROXY wcg1.example.com:8080; " + "PROXY wcg2.example.com:8080"; + } + + if (isInNet(myIpAddress(), "1.2.0.0", "255.0.0.0")) { + return "PROXY wcg1.example.com:8080; " + "PROXY wcg2.example.com:8080"; + } + + if (isInNet(myIpAddress(), "1.3.0.0", "255.0.0.0")) { + return "PROXY wcg2.example.com:8080; " + "PROXY wcg1.example.com:8080"; + } + + if (isInNet(myIpAddress(), "1.4.0.0", "255.0.0.0")) { + return "PROXY wcg2.example.com:8080; " + "PROXY wcg1.example.com:8080"; + } + else return "DIRECT"; +}