Skip to content

Commit

Permalink
Implement VCF bundles. (#1703)
Browse files Browse the repository at this point in the history
* Implement VariantBundles.
  • Loading branch information
cmnbroad authored Sep 3, 2024
1 parent 204a0db commit 46f6f0f
Show file tree
Hide file tree
Showing 10 changed files with 708 additions and 92 deletions.
65 changes: 64 additions & 1 deletion src/main/java/htsjdk/beta/io/IOPathUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
import htsjdk.beta.exception.HtsjdkIOException;
import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.utils.ValidationUtils;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Optional;
import java.util.function.Function;

public class IOPathUtils {

Expand Down Expand Up @@ -69,4 +71,65 @@ public static void writeStringToPath(final IOPath ioPath, final String contents)
e);
}
}

/**
* Takes an IOPath and returns a new IOPath object that keeps the same basename as the original but with
* a new extension. Only the last component of the extension will be replaced, e.g. ("my.fasta.gz", ".tmp") ->
* "my.fasta.tmp". If the original path has no extension, an exception will be thrown.
*
* If the input IOPath was created from a rawInputString that specifies a relative local path, the new path will
* have a rawInputString that specifies an absolute path.
*
* Examples
* - ("test_na12878.bam", ".bai") -> "test_na12878.bai"
* - ("test_na12878.bam", "bai") -> "test_na12878.bai"
* - ("test_na12878.ext.bam, ".bai") -> "test_na12878.ext.md5"
*
* @param path The original path
* @param newExtension The new file extension. If no leading "." is provided as part of the new extension, one will be added.
* @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type <T>
* @return A new IOPath object with the new extension
*/
public static <T extends IOPath> T replaceExtension(
final IOPath path,
final String newExtension,
final Function<String, T> ioPathConstructor){
final String extensionToUse = newExtension.startsWith(".") ?
newExtension :
"." + newExtension;
final Optional<String> oldExtension = path.getExtension();
if (oldExtension.isEmpty()){
throw new RuntimeException("The original path has no extension to replace" + path.getURIString());
}
final String oldFileName = path.toPath().getFileName().toString();
final String newFileName = oldFileName.replaceAll(oldExtension.get() + "$", extensionToUse);
return ioPathConstructor.apply(path.toPath().resolveSibling(newFileName).toUri().toString());
}

/**
* Takes an IOPath and returns a new IOPath object that keeps the same name as the original, but with
* the new extension added. If no leading "." is provided as part of the new extension, one will be added.
*
* If the input IOPath was created from a rawInputString that specifies a relative local path, the new path will
* have a rawInputString that specifies an absolute path.
*
* Examples:
* - ("test_na12878.bam", ".bai") -> "test_na12878.bam.bai"
* - ("test_na12878.bam", "md5") -> "test_na12878.bam.md5"
*
* @param path The original path
* @param extension The file extension to add. If no leading "." is provided as part of the extension, one will be added.
* @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type <T>
* @return A new IOPath object with the new extension
*/
public static <T extends IOPath> T appendExtension(
final IOPath path,
final String extension,
final Function<String, T> ioPathConstructor){
final String oldFileName = path.toPath().getFileName().toString();
final String newExtension = extension.startsWith(".") ?
extension :
"." + extension;
return ioPathConstructor.apply(path.toPath().resolveSibling(oldFileName + newExtension).toUri().toString());
}
}
27 changes: 17 additions & 10 deletions src/main/java/htsjdk/beta/io/bundle/BundleJSON.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public class BundleJSON {
public static final String BUNDLE_EXTENSION = ".json";
private static final Log LOG = Log.getInstance(BundleJSON.class);

public static final String JSON_PROPERTY_SCHEMA = "schema";
public static final String JSON_PROPERTY_SCHEMA_NAME = "schemaName";
public static final String JSON_PROPERTY_SCHEMA_VERSION = "schemaVersion";
public static final String JSON_PROPERTY_PRIMARY = "primary";
Expand All @@ -34,8 +35,7 @@ public class BundleJSON {
public static final String JSON_SCHEMA_NAME = "htsbundle";
public static final String JSON_SCHEMA_VERSION = "0.1.0"; // TODO: bump this to 1.0.0

final private static Set<String> TOP_LEVEL_PROPERTIES =
Set.of(JSON_PROPERTY_SCHEMA_NAME, JSON_PROPERTY_SCHEMA_VERSION, JSON_PROPERTY_PRIMARY);
final private static Set<String> TOP_LEVEL_PROPERTIES = Set.of(JSON_PROPERTY_SCHEMA, JSON_PROPERTY_PRIMARY);

/**
* Serialize a bundle to a JSON string representation. All resources in the bundle must
Expand All @@ -46,9 +46,14 @@ public class BundleJSON {
* @throws IllegalArgumentException if any resource in bundle is not an IOPathResources.
*/
public static String toJSON(final Bundle bundle) {
ValidationUtils.validateArg(
!bundle.getPrimaryContentType().equals(JSON_PROPERTY_PRIMARY),
"Primary content type cannot be named 'primary'");
final JSONObject schemaMap = new JSONObject()
.put(JSON_PROPERTY_SCHEMA_NAME, JSON_SCHEMA_NAME)
.put(JSON_PROPERTY_SCHEMA_VERSION, JSON_SCHEMA_VERSION);
final JSONObject outerJSON = new JSONObject()
.put(JSON_PROPERTY_SCHEMA_NAME, JSON_SCHEMA_NAME)
.put(JSON_PROPERTY_SCHEMA_VERSION, JSON_SCHEMA_VERSION)
.put(JSON_PROPERTY_SCHEMA, schemaMap)
.put(JSON_PROPERTY_PRIMARY, bundle.getPrimaryContentType());

bundle.forEach(bundleResource -> {
Expand Down Expand Up @@ -120,7 +125,7 @@ public static <T extends IOPath> Bundle toBundle(
String.format("A JSON string with more than one bundle was provided but only a single bundle is allowed in this context (%s)",
e.getMessage()));
}
return bundles.stream().findFirst().get();
return bundles.get(0);
} catch (JSONException | UnsupportedOperationException e2) {
throw new IllegalArgumentException(
String.format("The JSON can be interpreted neither as an individual bundle (%s) nor as a bundle collection (%s)",
Expand Down Expand Up @@ -188,15 +193,17 @@ private static <T extends IOPath> Bundle toBundle(
final JSONObject jsonObject, // must be a single Bundle object
final Function<String, T> ioPathConstructor) {
try {
// validate the schema name
final String schemaName = getRequiredPropertyAsString(jsonObject, JSON_PROPERTY_SCHEMA_NAME);
// validate the schema name and version
final JSONObject schemaMap = jsonObject.getJSONObject(JSON_PROPERTY_SCHEMA);
if (schemaMap == null) {
throw new IllegalArgumentException("JSON bundle is missing the required schema property");
}
final String schemaName = getRequiredPropertyAsString(schemaMap, JSON_PROPERTY_SCHEMA_NAME);
if (!schemaName.equals(JSON_SCHEMA_NAME)) {
throw new IllegalArgumentException(
String.format("Expected bundle schema name %s but found %s", JSON_SCHEMA_NAME, schemaName));
}

// validate the schema version
final String schemaVersion = getRequiredPropertyAsString(jsonObject, JSON_PROPERTY_SCHEMA_VERSION);
final String schemaVersion = getRequiredPropertyAsString(schemaMap, JSON_PROPERTY_SCHEMA_VERSION);
if (!schemaVersion.equals(JSON_SCHEMA_VERSION)) {
throw new IllegalArgumentException(String.format("Expected bundle schema version %s but found %s",
JSON_SCHEMA_VERSION, schemaVersion));
Expand Down
7 changes: 3 additions & 4 deletions src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import htsjdk.beta.plugin.variants.VariantsFormats;

/**
* Constants for specifying tandard primary content types, secondary content types, and (optional) formats
* for resources contained in a {@link Bundle}.
* Constants for specifying standard content types and formats for resources contained in a {@link Bundle}.
*
* Bundles generally contain one primary resource, plus one or more secondary resources such as an index or md5 file.
* Each resource has an associated content type, and optionally a format. The bundle itself has a primary content
Expand All @@ -17,8 +16,8 @@
* correspond to one of the enum values in {@link htsjdk.beta.plugin.HtsContentType}, since each of these has a
* corresponding {@link htsjdk.beta.plugin.HtsCodec} that handles that type of resource, such as reads or variants.
*
* Secondary resources can also be any string, but the standard secondary content types are defined here, i.e., for
* primary content type "READS", a secondary content type might be "READS_INDEX".
* Secondary resource content types can also be any string, but the standard secondary content types are defined
* here, i.e., for primary content type "READS", a secondary content type might be "READS_INDEX".
*
* Finally, each resource in a bundle can have an optional format, which is a string that corresponds to the format
* for that resource. For example, a primary content type of "READS" might have a format of "BAM".
Expand Down
14 changes: 3 additions & 11 deletions src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ public class ReadsBundle<T extends IOPath> extends Bundle implements Serializabl
* @param reads An {@link IOPath}-derived object that represents a source of reads.
*/
public ReadsBundle(final T reads) {
this(Arrays.asList(toInputResource(
BundleResourceType.CT_ALIGNED_READS,
ValidationUtils.nonNull(reads, BundleResourceType.CT_ALIGNED_READS))));
this(Arrays.asList(toInputResource(BundleResourceType.CT_ALIGNED_READS, reads)));
}

/**
Expand All @@ -59,10 +57,8 @@ public ReadsBundle(final T reads) {
*/
public ReadsBundle(final T reads, final T index) {
this(Arrays.asList(
toInputResource(BundleResourceType.CT_ALIGNED_READS, ValidationUtils.nonNull(reads, BundleResourceType.CT_ALIGNED_READS)),
toInputResource(
BundleResourceType.CT_READS_INDEX,
ValidationUtils.nonNull(index, BundleResourceType.CT_READS_INDEX))));
toInputResource(BundleResourceType.CT_ALIGNED_READS, reads),
toInputResource(BundleResourceType.CT_READS_INDEX, index)));
}

/**
Expand Down Expand Up @@ -174,10 +170,6 @@ public static <T extends IOPath> ReadsBundle<T> resolveIndex(
return new ReadsBundle<>(reads);
}

public static boolean looksLikeAReadsBundle(final IOPath rawReadPath) {
return rawReadPath.getURI().getPath().endsWith(BundleJSON.BUNDLE_EXTENSION);
}

private static <T extends IOPath> IOPathResource toInputResource(final String providedContentType, final T ioPath) {
ValidationUtils.nonNull(ioPath, "ioPath");
final Optional<Tuple<String, String>> typePair = getInferredContentTypes(ioPath);
Expand Down
Loading

0 comments on commit 46f6f0f

Please sign in to comment.