Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/checklist_comment_on_new_pr.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Comment on new Pull Request with checklist
on:
pull_request:
# safe as long as this workflow doesn't access code from the PR branch
pull_request_target:
types: opened

jobs:
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/run-bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ on:
- '**/src/main/java/**'
- 'pom.xml'
- '**/pom.xml'
- '.github/workflows/run-bench.yml'

jobs:
# Job to generate the matrix configuration
Expand All @@ -41,8 +42,8 @@ jobs:

# Default branches based on event type
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}"
BRANCHES='["main", "${{ github.head_ref }}"]'

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be a change from "branch" to "sha", rather than also adding support for the sha. Don't we need it to work for both?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes; but this works for two reasons:

  • Git refs can be either branch names or SHAs.
  • The elif condition just after this handles cases where the workflow is manually dispatched by providing a branch name (or SHA). This conditional branch only deals with PRs, and that gives us the freedom to pick how we want to refer to the head commit, either by branch or SHA

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the latest PR (#688) I've changed this to use the Github ref instead of the SHA

echo "Pull request detected. Using main and PR ref: ${{ github.ref }}"
BRANCHES='["main", "${{ github.ref }}"]'
elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
# Parse space-separated branches input into JSON array
echo "Workflow dispatch with branches input detected"
Expand Down Expand Up @@ -213,7 +214,8 @@ jobs:
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-gemma-1m
-cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML \
--match-all-datasets --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} openai-1536-1m
else
java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,20 @@ public static void main(String[] args) throws IOException {
String finalOutputPath = outputPath;
String configPath = null;
int diagnostic_level = 0;
for (int i = 0; i < args.length - 1; i++) {
if (args[i].equals("--config")) configPath = args[i+1];
if (args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]);
boolean matchAllDatasets = false;
for (int i = 0; i < args.length; i++) {
if (i < args.length - 1 && args[i].equals("--config")) configPath = args[i+1];
if (i < args.length - 1 && args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]);
if (args[i].equals("--match-all-datasets")) matchAllDatasets = true;
}
if (diagnostic_level > 0) {
Grid.setDiagnosticLevel(diagnostic_level);
}
String finalConfigPath = configPath;
String[] filteredArgs = Arrays.stream(args)
.filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) &&
!arg.equals("--config") && !arg.equals(finalConfigPath))
.filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) &&
!arg.equals("--config") && !arg.equals(finalConfigPath) &&
!arg.equals("--match-all-datasets"))
.toArray(String[]::new);

// Log the filtered arguments for debugging
Expand All @@ -100,7 +103,12 @@ public static void main(String[] args) throws IOException {
var pattern = Pattern.compile(regex);

var datasetCollection = DatasetCollection.load();
var datasetNames = datasetCollection.getSection(REGRESSION_TEST_KEY).stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList());
var candidateDatasets = matchAllDatasets ? datasetCollection.getAll() : datasetCollection.getSection(REGRESSION_TEST_KEY);
var datasetNames = candidateDatasets.stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList());

if (datasetNames.size() == 0) {
throw new RuntimeException("No datasets matched the given patterns, nothing to do");
}

logger.info("Executing the following datasets: {}", datasetNames);
List<BenchResult> results = new ArrayList<>();
Expand Down
Loading