diff --git a/drivers/src/main/java/org/gorpipe/s3/driver/S3MultipartOutputStream.java b/drivers/src/main/java/org/gorpipe/s3/driver/S3MultipartOutputStream.java index 65f4a7c5..6defc70d 100644 --- a/drivers/src/main/java/org/gorpipe/s3/driver/S3MultipartOutputStream.java +++ b/drivers/src/main/java/org/gorpipe/s3/driver/S3MultipartOutputStream.java @@ -64,11 +64,22 @@ private String initiateMultipartUpload() throws IOException { .bucket(bucket) .key(key) .build(); - try { - return sendCreateMultipartUploadRequest(req).uploadId(); - } catch (Exception e) { - throw new IOException("Failed to initiate multipart upload", e); + for (int attempt = 1; attempt <= MAX_RETRIES; attempt++) { + try { + return sendCreateMultipartUploadRequest(req).uploadId(); + } catch (Exception e) { + String errorMsg = String.format("Failed to initiate multipart upload for %s/%s on attempt %d/%d: %s", + bucket, key, attempt, MAX_RETRIES, e.getMessage()); + logger.warn(errorMsg, e); + if (attempt == MAX_RETRIES) throw new IOException("Failed to initiate multipart upload after retries", e); + try { + Thread.sleep(RETRY_SLEEP_BASE_MS * attempt); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + } + } } + throw new IOException("Failed to initiate multipart upload after retries"); } @Override diff --git a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala index efa2103e..6731855c 100644 --- a/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala +++ b/gortools/src/main/scala/gorsat/Analysis/ForkWrite.scala @@ -316,7 +316,10 @@ case class ForkWrite(forkCol: Int, sh.fileOpen = false } }) - if (options.useFolder.isEmpty && !somethingToWrite && !useFork) { + + val isInsideGordFolder = DataUtil.isGord(PathUtils.getParent(fullFileName)) + + if (options.useFolder.isEmpty && !somethingToWrite && !useFork && !isInsideGordFolder) { val out = createOutFile(fullFileName, false) out.setup() out.finish() @@ -340,7 +343,7 @@ case class ForkWrite(forkCol: Int, } // Only write links for files that are NOT inside gord - if (options.useFolder.isEmpty && !singleFileHolder.fileName.contains(".gord/")) { + if (options.useFolder.isEmpty && !isInsideGordFolder) { if (useFork) { forkMap.values.foreach(sh => { val linkData = LinkFileUtil.extractLink(session.getProjectContext.getFileReader, sh.fileName, diff --git a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala index cc4d4f7b..91471fc4 100644 --- a/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala +++ b/gortools/src/main/scala/gorsat/QueryHandlers/GeneralQueryHandler.scala @@ -138,7 +138,7 @@ class GeneralQueryHandler(context: GorContext, header: Boolean) extends GorParal var cacheFile = fileCache.lookupFile(commandSignature) cacheFile = GorJavaUtilities.verifyLinkFileLastModified(context.getSession.getProjectContext,cacheFile) // Do this if we have result cache active or if we are running locally and the local cacheFile does not exist. - fileNames(i) = if (cacheFile == null) { + fileNames(i) = if (cacheFile == null || !fileReader.exists(cacheFile)) { val writeLocationPath = cacheFiles(i) if (writeLocationPath != null) { runAndStoreLinkFileInCache(nested, writeLocationPath, fileCache, useMd5) diff --git a/gortools/src/test/java/gorsat/UTestGorWriteFolder.java b/gortools/src/test/java/gorsat/UTestGorWriteFolder.java index b85d98e8..b0a1be6e 100644 --- a/gortools/src/test/java/gorsat/UTestGorWriteFolder.java +++ b/gortools/src/test/java/gorsat/UTestGorWriteFolder.java @@ -184,4 +184,40 @@ public void testPartgorWriteOverGordFolder() throws IOException { Assert.assertEquals(UTestGorWriteExplicit.WRONG_RESULT, "chrom\tbpStart\tbpStop\tx\n" + "chr1\t1\t1\t'PN1'\n" , results); } + + @Test + public void testOverwritePgorWriteGordFolderWithGorrowsMerge() throws IOException { + var folderpath = workDirPath.resolve("ref_af.gord"); + + var query = "create #x# = gorrows -p chr2:1000-16001|merge <(gorrows -p chr19:910100-920102);\n" + + "pgor [#x#]|write " + folderpath; + TestUtils.runGorPipe( + query, + "-gorroot", workDirPath.toAbsolutePath().toString(), + "-cachedir", cachePath.toString()); + + // Overwrite + TestUtils.runGorPipe( + query, + "-gorroot", workDirPath.toAbsolutePath().toString(), + "-cachedir", cachePath.toString()); + + // Dictionary must exist + Assert.assertTrue("Dictionary file must be created", Files.exists(folderpath.resolve(DEFAULT_FOLDER_DICTIONARY_NAME))); + + // Only gorz files with actual data should be in the folder — no empty gorz files. + List gorzFiles = Files.list(folderpath) + .filter(p -> p.toString().endsWith(".gorz")) + .collect(Collectors.toList()); + for (Path gorzFile : gorzFiles) { + long rowCount = TestUtils.runGorPipeCount("gor " + gorzFile); + Assert.assertTrue("No empty gorz files should exist in gord folder: " + gorzFile.getFileName(), rowCount > 0); + } + + // Reading the dict must return all chr2 and chr19 rows + long rowCount = TestUtils.runGorPipeCount("gor " + folderpath, + "-gorroot", workDirPath.toAbsolutePath().toString(), + "-cachedir", cachePath.toString()); + Assert.assertEquals("pgor write gord should contain chr2 + chr19 rows", 15001L + 10002L, rowCount); + } }