Skip to content

Commit

Permalink
Add subject type option
Browse files Browse the repository at this point in the history
  • Loading branch information
lszeremeta committed Apr 3, 2021
1 parent 33ab519 commit 00ed6fb
Show file tree
Hide file tree
Showing 5 changed files with 365 additions and 65 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Running SDFEater without parameters displays help.
* `-i,--input <arg>` - input SDF file path (required)
* `-f,--format <arg>` - output format (e.g. `cypher`, `jsonld`, `cvme`, `smiles`, `inchi`) (required; full list below)
* `-p,--periodic` - add additional atoms data from [periodic table](https://github.com/lszeremeta/SDFEater/blob/master/src/main/resources/pl/edu/uwb/ii/sdfeater/periodic_table.json) (for `cypher` output format)
* `-s,--subject <arg>` - subject type (`iri`, `uuid`, `bnode`; `iri` by default; for all formats excluding cypher, cvme, smiles, inchi)
* `-u,--urls` - try to generate full database URLs instead of IDs (for `cypher` output format, always enabled in `cvme`)

Remember about the appropriate file path when using Docker image. Suppose you mounted your local directory `/home/user/input` under `/app/input` and the path to the SDF file you want to use in SDFEater is `/home/user/input/file.sdf`. In this case, enter the path `/app/input/file.sdf` or `input/file.sdf` as the value of the `-i` argument.
Expand Down
11 changes: 6 additions & 5 deletions src/main/java/pl/edu/uwb/ii/sdfeater/File.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ class File {
* appropriate program structures
*
* @param molecule Molecule object to which values from the file will be entered
* @param format Output format from Format enum
* @param format Output format
* @param subject Subject type
*/
void parse(Molecule molecule, SDFEater.Format format) {
void parse(Molecule molecule, SDFEater.Format format, SDFEater.Subject subject) {
try {
FileInputStream fstream = new FileInputStream(filename);
BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
Expand Down Expand Up @@ -156,13 +157,13 @@ void parse(Molecule molecule, SDFEater.Format format) {
case jsonldhtml:
case rdfxml:
case rdfthrift:
molecule.addToJenaModel();
molecule.addToJenaModel(subject);
break;
case rdfa:
molecule.printRDFaMolecule();
molecule.printRDFaMolecule(subject);
break;
case microdata:
molecule.printMicrodataMolecule();
molecule.printMicrodataMolecule(subject);
break;
default:
break;
Expand Down
43 changes: 35 additions & 8 deletions src/main/java/pl/edu/uwb/ii/sdfeater/Molecule.java
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,20 @@ void printChemSKOSMolecule() {

/**
* Add main molecule data to Jena model
*
* @param subject subject type
*/
void addToJenaModel() {
Resource me = ResourceFactory.createResource("http://example.com/molecule#entity" + createID());
void addToJenaModel(SDFEater.Subject subject) {
Resource me = ResourceFactory.createResource();

if (subject == SDFEater.Subject.iri) {
me = ResourceFactory.createResource("http://example.com/molecule#entity" + createID());
} else if (subject == SDFEater.Subject.uuid) {
me = ResourceFactory.createResource("urn:uuid:" + uuid);
} else if (subject == SDFEater.Subject.bnode) {
me = ResourceFactory.createResource();
}

for (Map.Entry<String, List<String>> entry : properties.entrySet()) {

String key = entry.getKey();
Expand Down Expand Up @@ -381,7 +392,7 @@ void addToJenaModel() {
/**
* Print main molecule data in RDFa
*/
void printRDFaMolecule() {
void printRDFaMolecule(SDFEater.Subject subject) {
StringBuilder output_str = new StringBuilder();
for (Map.Entry<String, List<String>> entry : properties.entrySet()) {
String key = entry.getKey();
Expand Down Expand Up @@ -421,8 +432,15 @@ void printRDFaMolecule() {
}

if (output_str.length() > 0) {
String mID = createID();
System.out.println(" <div typeof='schema:MolecularEntity' about='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
if (subject == SDFEater.Subject.iri) {
String mID = createID();
System.out.println(" <div typeof='schema:MolecularEntity' about='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
} else if (subject == SDFEater.Subject.uuid) {
System.out.println(" <div typeof='schema:MolecularEntity' about='urn:uuid:" + uuid + "'>");
} else if (subject == SDFEater.Subject.bnode) {
System.out.println(" <div typeof='schema:MolecularEntity' about='_:b" + createID() + "'>");
}

System.out.print(output_str);
System.out.println(" </div>");
}
Expand All @@ -431,8 +449,10 @@ void printRDFaMolecule() {

/**
* Print main molecule data in Microdata
*
* @param subject subject type
*/
void printMicrodataMolecule() {
void printMicrodataMolecule(SDFEater.Subject subject) {
StringBuilder output_str = new StringBuilder();
for (Map.Entry<String, List<String>> entry : properties.entrySet()) {
String key = entry.getKey();
Expand Down Expand Up @@ -472,8 +492,15 @@ void printMicrodataMolecule() {
}

if (output_str.length() > 0) {
String mID = createID();
System.out.println(" <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
if (subject == SDFEater.Subject.iri) {
String mID = createID();
System.out.println(" <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='http://example.com/molecule#entity" + mID + "' id='entity" + mID + "'>");
} else if (subject == SDFEater.Subject.uuid) {
System.out.println(" <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='urn:uuid:" + uuid + "'>");
} else if (subject == SDFEater.Subject.bnode) {
System.out.println(" <div itemscope itemtype='http://schema.org/MolecularEntity' itemid='_:b" + createID() + "'>");
}

System.out.print(output_str);
System.out.println(" </div>");
}
Expand Down
50 changes: 34 additions & 16 deletions src/main/java/pl/edu/uwb/ii/sdfeater/SDFEater.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ public static void main(String[] args) {
Option formatarg = new Option("f", "format", true, "output format (cypher, cvme, smiles, inchi, turtle, ntriples, rdfxml, rdfthrift, jsonldhtml, jsonld, rdfa, microdata)");
formatarg.setRequired(true);
options.addOption(formatarg);
Option subject = new Option("s", "subject", true, "subject type (iri, uuid, bnode; iri by default); for all formats excluding cypher, cvme, smiles, inchi");
subject.setRequired(false);
options.addOption(subject);
Option urls = new Option("u", "urls", false, "try to generate full database URLs instead of IDs (for cypher output format, always enabled in cvme)");
urls.setRequired(false);
options.addOption(urls);
Expand All @@ -103,51 +106,57 @@ public static void main(String[] args) {
if (cmd.hasOption("format")) {
String format = cmd.getOptionValue("format");
if (format.equalsIgnoreCase("cypher") && !cmd.hasOption("urls") && !cmd.hasOption("periodic")) {
file.parse(molecule, Format.cypher);
file.parse(molecule, Format.cypher, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("cypher") && cmd.hasOption("urls") && !cmd.hasOption("periodic")) {
file.parse(molecule, Format.cypheru);
file.parse(molecule, Format.cypheru, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("cypher") && cmd.hasOption("periodic") && !cmd.hasOption("urls")) {
loadPeriodicTableData();
file.parse(molecule, Format.cypherp);
file.parse(molecule, Format.cypherp, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("cypher") && cmd.hasOption("urls") && cmd.hasOption("periodic")) {
loadPeriodicTableData();
file.parse(molecule, Format.cypherup);
file.parse(molecule, Format.cypherup, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("cvme")) {
file.parse(molecule, Format.cvme);
file.parse(molecule, Format.cvme, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("smiles")) {
file.parse(molecule, Format.smiles);
file.parse(molecule, Format.smiles, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("inchi")) {
file.parse(molecule, Format.inchi);
file.parse(molecule, Format.inchi, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("turtle")) {
initializeJenaModel();
file.parse(molecule, Format.turtle);
file.parse(molecule, Format.turtle, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("ntriples")) {
initializeJenaModel();
file.parse(molecule, Format.ntriples);
file.parse(molecule, Format.ntriples, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("jsonldhtml")) {
initializeJenaModel();
file.parse(molecule, Format.jsonldhtml);
file.parse(molecule, Format.jsonldhtml, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("jsonld")) {
initializeJenaModel();
file.parse(molecule, Format.jsonld);
file.parse(molecule, Format.jsonld, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("rdfxml")) {
initializeJenaModel();
file.parse(molecule, Format.rdfxml);
file.parse(molecule, Format.rdfxml, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("rdfthrift")) {
initializeJenaModel();
file.parse(molecule, Format.rdfthrift);
file.parse(molecule, Format.rdfthrift, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("rdfa")) {
file.parse(molecule, Format.rdfa);
file.parse(molecule, Format.rdfa, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else if (format.equalsIgnoreCase("microdata")) {
file.parse(molecule, Format.microdata);
file.parse(molecule, Format.microdata, Subject.valueOf(cmd.getOptionValue("subject", Subject.iri.toString())));
} else {
System.err.println("The selected format is not supported");
formatter.printHelp("SDFEater.jar", options);
}

}
} catch (IllegalArgumentException e) {
System.err.println("Incorrect option selected");
formatter.printHelp("SDFEater.jar", options);
} catch (ParseException e) {
System.err.println(e.getMessage());
System.err.println("Parse error: " + e.getMessage());
formatter.printHelp("SDFEater.jar", options);
} catch (Exception e) {
System.err.println("Error: " + e.getMessage());
formatter.printHelp("SDFEater.jar", options);
}
}
Expand All @@ -172,4 +181,13 @@ public enum Format {
rdfa,
microdata
}

/**
* Subject type
*/
public enum Subject {
iri,
uuid,
bnode
}
}
Loading

0 comments on commit 00ed6fb

Please sign in to comment.