diff --git a/components-core/pom.xml b/components-core/pom.xml
index 8319a8e129..48a6506bdb 100644
--- a/components-core/pom.xml
+++ b/components-core/pom.xml
@@ -519,6 +519,11 @@
commons-math3
+
+ org.apache.commons
+ commons-pool2
+
+
org.jgrapht
jgrapht-core
diff --git a/components-core/src/main/java/org/dllearner/algorithms/celoe/AccuracyBasedComparator.java b/components-core/src/main/java/org/dllearner/algorithms/celoe/AccuracyBasedComparator.java
new file mode 100644
index 0000000000..5ac2fa46d7
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/celoe/AccuracyBasedComparator.java
@@ -0,0 +1,63 @@
+/**
+ * Copyright (C) 2007 - 2016, Jens Lehmann
+ *
+ * This file is part of DL-Learner.
+ *
+ * DL-Learner is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * DL-Learner is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.dllearner.algorithms.celoe;
+
+import org.dllearner.utilities.owl.OWLClassExpressionLengthMetric;
+import org.dllearner.utilities.owl.OWLClassExpressionUtils;
+
+import java.util.Comparator;
+
+public class AccuracyBasedComparator implements Comparator {
+
+ private final OWLClassExpressionLengthMetric lengthMetric;
+
+ public AccuracyBasedComparator(OWLClassExpressionLengthMetric lengthMetric) {
+ this.lengthMetric = lengthMetric;
+ }
+
+ @Override
+ public int compare(OENode node1, OENode node2) {
+ int result = compareByAccuracy(node1, node2);
+
+ if (result != 0) {
+ return result;
+ }
+
+ return compareByLength(node1, node2);
+ }
+
+ private int compareByAccuracy(OENode node1, OENode node2) {
+ double node1Accuracy = node1.getAccuracy();
+ double node2Accuracy = node2.getAccuracy();
+
+ return Double.compare(node1Accuracy, node2Accuracy);
+ }
+
+ private int compareByLength(OENode node1, OENode node2) {
+ int node1Length = OWLClassExpressionUtils.getLength(node1.getDescription(), lengthMetric);
+ int mode2Length = OWLClassExpressionUtils.getLength(node2.getDescription(), lengthMetric);
+
+ return Integer.compare(mode2Length, node1Length);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return (o instanceof AccuracyBasedComparator);
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java b/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java
index 369d0d944e..e3bc6ea4d6 100644
--- a/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java
+++ b/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java
@@ -27,10 +27,7 @@
import org.dllearner.core.owl.DatatypePropertyHierarchy;
import org.dllearner.core.owl.ObjectPropertyHierarchy;
import org.dllearner.kb.OWLAPIOntology;
-import org.dllearner.learningproblems.ClassAsInstanceLearningProblem;
-import org.dllearner.learningproblems.ClassLearningProblem;
-import org.dllearner.learningproblems.PosNegLP;
-import org.dllearner.learningproblems.PosOnlyLP;
+import org.dllearner.learningproblems.*;
import org.dllearner.reasoning.ClosedWorldReasoner;
import org.dllearner.reasoning.OWLAPIReasoner;
import org.dllearner.reasoning.ReasonerImplementation;
@@ -50,6 +47,8 @@
import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
import java.io.File;
+import java.text.DecimalFormat;
+import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;
@@ -171,6 +170,22 @@ public class CELOE extends AbstractCELA implements Cloneable{
private boolean stopOnFirstDefinition = false;
private int expressionTestCountLastImprovement;
+
+ OWLClassExpressionLengthMetric lengthMetric = OWLClassExpressionLengthMetric.getDefaultMetric();
+
+ private TreeMap solutionCandidates;
+ private final double solutionCandidatesMinAccuracyDiff = 0.0001;
+
+ @ConfigOption(defaultValue = "0.0", description = "determines a lower bound on noisiness of an expression with respect to noisePercentage " +
+ "in order to be considered a reasonable solution candidate (must be non-negative), e.g. for noisePercentage = 15 and noisePercentageMargin = 5, " +
+ "the algorithm will suggest expressions with the number of misclassified positives less than or equal to 20% of all examples " +
+ "as solution candidates as well; note: difference between accuracies of any two candidates must be at least 0.01% to ensure diversity")
+ private double noisePercentageMargin = 0.0;
+
+ @ConfigOption(defaultValue = "20", description = "the number of solution candidates within margin to be presented, sorted in descending order by accuracy")
+ private int maxNrOfResultsWithinMargin = 20;
+
+ private double noiseWithMargin;
@SuppressWarnings("unused")
@@ -228,6 +243,9 @@ public CELOE(CELOE celoe){
setWriteSearchTree(celoe.writeSearchTree);
setReplaceSearchTree(celoe.replaceSearchTree);
+
+ setMaxNrOfResultsWithinMargin(celoe.maxNrOfResultsWithinMargin);
+ setNoisePercentageMargin(celoe.noisePercentageMargin);
}
public CELOE(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner) {
@@ -313,6 +331,17 @@ public void init() throws ComponentInitException {
if (!((AbstractRefinementOperator) operator).isInitialized())
operator.init();
+
+ operator.setLengthMetric(lengthMetric);
+
+ AccuracyBasedComparator solutionComparator = new AccuracyBasedComparator(lengthMetric);
+ solutionCandidates = new TreeMap<>(solutionComparator);
+
+ if (noisePercentageMargin < 0) {
+ noisePercentageMargin = 0.0;
+ }
+
+ noiseWithMargin = (noisePercentage + noisePercentageMargin) / 100.0;
initialized = true;
}
@@ -327,6 +356,8 @@ public void start() {
currentHighestAccuracy = 0.0;
OENode nextNode;
+ String timeStamp = new SimpleDateFormat("HH.mm.ss").format(new Date());
+ logger.info("Time " + getCurrentCpuMillis() / 1000.0 + "s; " + timeStamp);
logger.info("start class:" + startClass);
addNode(startClass, null);
@@ -372,9 +403,17 @@ public void start() {
// print some stats
printAlgorithmRunStats();
+
+ printSolutionCandidates();
// print solution(s)
logger.info("solutions:\n" + getSolutionString());
+
+ if (learningProblem instanceof PosNegLP) {
+ ((PosNegLP) learningProblem).printTestEvaluation(bestEvaluatedDescriptions.getBest().getDescription());
+ }
+
+ printBestConceptsTimesAndAccuracies();
isRunning = false;
}
@@ -506,7 +545,7 @@ private TreeSet refineNode(OENode node) {
MonitorFactory.getTimeMonitor("refineNode").stop();
return refinements;
}
-
+
/**
* Add node to search tree if it is not too weak.
* @return TRUE if node was added and FALSE otherwise
@@ -528,27 +567,27 @@ private boolean addNode(OWLClassExpression description, OENode parentNode) {
logger.trace(sparql_debug, sparql_debug_out + "NOT ALLOWED");
return false;
}
-
+
// quality of class expression (return if too weak)
Monitor mon = MonitorFactory.start("lp");
logger.trace(sparql_debug, sparql_debug_out);
double accuracy = learningProblem.getAccuracyOrTooWeak(description, noise);
logger.trace(sparql_debug, "`acc:"+accuracy);
mon.stop();
-
+
// issue a warning if accuracy is not between 0 and 1 or -1 (too weak)
if(accuracy > 1.0 || (accuracy < 0.0 && accuracy != -1)) {
throw new RuntimeException("Invalid accuracy value " + accuracy + " for class expression " + description +
". This could be caused by a bug in the heuristic measure and should be reported to the DL-Learner bug tracker.");
}
-
+
expressionTests++;
-
+
// return FALSE if 'too weak'
if(accuracy == -1) {
return false;
}
-
+
OENode node = new OENode(description, accuracy);
searchTree.addNode(parentNode, node);
@@ -616,7 +655,23 @@ private boolean addNode(OWLClassExpression description, OENode parentNode) {
// System.out.println(bestEvaluatedDescriptions.getSet().size());
}
-
+
+ if (accuracy >= 1 - noiseWithMargin) {
+ if (solutionCandidates.isEmpty()
+ || (accuracy > solutionCandidates.firstKey().getAccuracy()
+ && solutionCandidates.keySet().stream().allMatch(
+ n -> Math.abs(accuracy - n.getAccuracy()) > solutionCandidatesMinAccuracyDiff
+ )
+ )
+ ) {
+ solutionCandidates.put(node, getCurrentCpuMillis() / 1000.0);
+ }
+
+ if (solutionCandidates.size() > maxNrOfResultsWithinMargin) {
+ solutionCandidates.pollFirstEntry();
+ }
+ }
+
return true;
}
@@ -747,8 +802,8 @@ private boolean terminationCriteriaSatisfied() {
stop ||
(maxClassExpressionTestsAfterImprovement != 0 && (expressionTests - expressionTestCountLastImprovement >= maxClassExpressionTestsAfterImprovement)) ||
(maxClassExpressionTests != 0 && (expressionTests >= maxClassExpressionTests)) ||
- (maxExecutionTimeInSecondsAfterImprovement != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSecondsAfterImprovement* 1000000000L))) ||
- (maxExecutionTimeInSeconds != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds* 1000000000L))) ||
+ (maxExecutionTimeInSecondsAfterImprovement != 0 && ((getCurrentCpuMillis() - timeLastImprovement) >= (maxExecutionTimeInSecondsAfterImprovement * 1000L))) ||
+ (maxExecutionTimeInSeconds != 0 && (getCurrentCpuMillis() >= (maxExecutionTimeInSeconds * 1000L))) ||
(terminateOnNoiseReached && (100*getCurrentlyBestAccuracy()>=100-noisePercentage)) ||
(stopOnFirstDefinition && (getCurrentlyBestAccuracy() >= 1));
}
@@ -761,34 +816,82 @@ private void reset() {
bestEvaluatedDescriptions.getSet().clear();
expressionTests = 0;
runtimeVsBestScore.clear();
+
+ solutionCandidates.clear();
}
private void printAlgorithmRunStats() {
+ String timeStamp = new SimpleDateFormat("HH.mm.ss").format(new Date());
+ logger.info("Time " + getCurrentCpuMillis() / 1000.0 + "s; " + timeStamp);
+
if (stop) {
logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + searchTree.size() + " nodes in the search tree.\n");
+ logger.info(reasoner.toString());
} else {
- totalRuntimeNs = System.nanoTime()-nanoStartTime;
- logger.info("Algorithm terminated successfully (time: " + Helper.prettyPrintNanoSeconds(totalRuntimeNs) + ", "+expressionTests+" descriptions tested, " + searchTree.size() + " nodes in the search tree).\n");
+ logger.info("Algorithm terminated successfully ("+expressionTests+" descriptions tested, " + searchTree.size() + " nodes in the search tree).\n");
logger.info(reasoner.toString());
}
}
-
+
+ private void printSolutionCandidates() {
+ DecimalFormat df = new DecimalFormat();
+
+ if (solutionCandidates.size() > 0) {
+ // we do not need to print the best node if we display the top 20 solutions below anyway
+ logger.info("solutions within margin (at most " + maxNrOfResultsWithinMargin + " are shown):");
+ int show = 1;
+ for (OENode c : solutionCandidates.descendingKeySet()) {
+ int tpTest = learningProblem instanceof PosNegLP
+ ? ((PosNegLP) learningProblem).getTestCoverage(c.getDescription())
+ : 0;
+
+ logger.info(show + ": " + renderer.render(c.getDescription())
+ + " (accuracy " + df.format(100 * c.getAccuracy()) + "% / "
+ + df.format(100 * computeTestAccuracy(c.getDescription())) + "%"
+ + ", coverage " + c.getNumberOfCoveredPositiveExamples() + " / " + tpTest
+ + ", length " + OWLClassExpressionUtils.getLength(c.getDescription())
+ + ", depth " + OWLClassExpressionUtils.getDepth(c.getDescription())
+ + ", time " + df.format(solutionCandidates.get(c)) + "s)");
+ if (show >= maxNrOfResultsWithinMargin) {
+ break;
+ }
+ show++;
+ }
+ } else {
+ logger.info("no appropriate solutions within margin found (try increasing the noisePercentageMargin)");
+ }
+ }
+
private void showIfBetterSolutionsFound() {
if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > currentHighestAccuracy) {
currentHighestAccuracy = bestEvaluatedDescriptions.getBestAccuracy();
expressionTestCountLastImprovement = expressionTests;
- timeLastImprovement = System.nanoTime();
+ timeLastImprovement = getCurrentCpuMillis();
long durationInMillis = getCurrentRuntimeInMilliSeconds();
String durationStr = getDurationAsString(durationInMillis);
+ double cpuTime = getCurrentCpuMillis() / 1000.0;
+
+ OWLClassExpression bestDescription = bestEvaluatedDescriptions.getBest().getDescription();
+ double testAccuracy = computeTestAccuracy(bestDescription);
+
// track new best accuracy if enabled
if(keepTrackOfBestScore) {
runtimeVsBestScore.put(getCurrentRuntimeInMilliSeconds(), currentHighestAccuracy);
}
- logger.info("more accurate (" + dfPercent.format(currentHighestAccuracy) + ") class expression found after " + durationStr + ": " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription()));
+
+ logger.info(
+ "Time " + cpuTime +
+ "s: more accurate (training: " + dfPercent.format(currentHighestAccuracy) +
+ ", test: " + dfPercent.format(testAccuracy) +
+ ") class expression found after " + durationStr + ": " +
+ descriptionToString(bestEvaluatedDescriptions.getBest().getDescription())
+ );
+
+ recordBestConceptTimeAndAccuracy(cpuTime, currentHighestAccuracy, testAccuracy);
}
}
-
+
private void writeSearchTree(TreeSet refinements) {
StringBuilder treeString = new StringBuilder("best node: ").append(bestEvaluatedDescriptions.getBest()).append("\n");
if (refinements.size() > 1) {
@@ -1100,6 +1203,22 @@ public SortedMap getRuntimeVsBestScore(long ticksIntervalTimeValue
return map;
}
+ public int getMaxNrOfResultsWithinMargin() {
+ return maxNrOfResultsWithinMargin;
+ }
+
+ public void setMaxNrOfResultsWithinMargin(int maxNrOfResultsWithinMargin) {
+ this.maxNrOfResultsWithinMargin = maxNrOfResultsWithinMargin;
+ }
+
+ public double getNoisePercentageMargin() {
+ return noisePercentageMargin;
+ }
+
+ public void setNoisePercentageMargin(double noisePercentageMargin) {
+ this.noisePercentageMargin = noisePercentageMargin;
+ }
+
/* (non-Javadoc)
* @see java.lang.Object#clone()
*/
diff --git a/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java b/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java
index 8c1423c7e2..aff8edf4ca 100644
--- a/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java
+++ b/components-core/src/main/java/org/dllearner/algorithms/celoe/OENode.java
@@ -19,13 +19,17 @@
package org.dllearner.algorithms.celoe;
import org.dllearner.core.AbstractSearchTreeNode;
+import org.dllearner.core.LearningProblem;
+import org.dllearner.learningproblems.PosNegLP;
import org.dllearner.utilities.datastructures.SearchTreeNode;
import org.dllearner.utilities.owl.OWLAPIRenderers;
import org.dllearner.utilities.owl.OWLClassExpressionUtils;
import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLIndividual;
import java.text.DecimalFormat;
-import java.util.Map;
+import java.util.*;
+import java.util.stream.Collectors;
/**
* A node in the search tree of the ontology engineering algorithm.
@@ -54,7 +58,21 @@ public class OENode extends AbstractSearchTreeNode implements SearchTree
// OWLClassExpression in this node - it is a better heuristic indicator than child count
// (and avoids the problem that adding children changes the heuristic value)
private int refinementCount = 0;
-
+
+ private static boolean useCompactedCoverage = false;
+
+ private static OWLIndividual[] allPositiveExamples;
+ private static OWLIndividual[] allNegativeExamples;
+
+ private static Map positiveExamplesIndices;
+ private static Map negativeExamplesIndices;
+
+ private int[] coveredPositiveExamplesCompact;
+ private int[] coveredNegativeExamplesCompact;
+
+ private final Set coveredPositiveExamples = new TreeSet<>();
+ private final Set coveredNegativeExamples = new TreeSet<>();
+
private static DecimalFormat dfPercent = new DecimalFormat("0.00%");
public OENode(OWLClassExpression description, double accuracy) {
@@ -135,4 +153,120 @@ public int getRefinementCount() {
public void setRefinementCount(int refinementCount) {
this.refinementCount = refinementCount;
}
+
+ public Set getCoveredPositiveExamples() {
+ if (useCompactedCoverage) {
+ return getCoveredPositiveExamplesCompact();
+ }
+
+ return coveredPositiveExamples;
+ }
+
+ private Set getCoveredPositiveExamplesCompact() {
+ return Arrays.stream(coveredPositiveExamplesCompact).mapToObj(i -> allPositiveExamples[i])
+ .collect(Collectors.toSet());
+ }
+
+ public int getNumberOfCoveredPositiveExamples() {
+ return useCompactedCoverage ? coveredPositiveExamplesCompact.length : coveredPositiveExamples.size();
+ }
+
+ public Set getCoveredNegativeExamples() {
+ if (useCompactedCoverage) {
+ return getCoveredNegativeExamplesCompact();
+ }
+
+ return coveredNegativeExamples;
+ }
+
+ private Set getCoveredNegativeExamplesCompact() {
+ return Arrays.stream(coveredNegativeExamplesCompact).mapToObj(i -> allNegativeExamples[i])
+ .collect(Collectors.toSet());
+ }
+
+ public int getNumberOfCoveredNegativeExamples() {
+ return useCompactedCoverage ? coveredNegativeExamplesCompact.length : coveredNegativeExamples.size();
+ }
+
+ public void setCoveredPositiveExamples(Set coveredPositiveExamples) {
+ if (useCompactedCoverage) {
+ setCoveredPositiveExamplesCompact(coveredPositiveExamples);
+ return;
+ }
+
+ this.coveredPositiveExamples.clear();
+
+ if (coveredPositiveExamples != null) {
+ this.coveredPositiveExamples.addAll(coveredPositiveExamples);
+ }
+ }
+
+ private void setCoveredPositiveExamplesCompact(Set coveredPositiveExamples) {
+ coveredPositiveExamplesCompact = new int[coveredPositiveExamples.size()];
+
+ int ind = 0;
+ for (OWLIndividual ex : coveredPositiveExamples) {
+ coveredPositiveExamplesCompact[ind] = positiveExamplesIndices.get(ex);
+ ind++;
+ }
+ }
+
+ public void setCoveredNegativeExamples(Set coveredNegativeExamples) {
+ if (useCompactedCoverage) {
+ setCoveredNegativeExamplesCompact(coveredNegativeExamples);
+ return;
+ }
+
+ this.coveredNegativeExamples.clear();
+
+ if (coveredNegativeExamples != null) {
+ this.coveredNegativeExamples.addAll(coveredNegativeExamples);
+ }
+ }
+
+ private void setCoveredNegativeExamplesCompact(Set coveredNegativeExamples) {
+ coveredNegativeExamplesCompact = new int[coveredNegativeExamples.size()];
+
+ int ind = 0;
+ for (OWLIndividual ex : coveredNegativeExamples) {
+ coveredNegativeExamplesCompact[ind] = negativeExamplesIndices.get(ex);
+ ind++;
+ }
+ }
+
+ public static void enableCompactCoverageRepresentation(LearningProblem learningProblem) {
+ if (!(learningProblem instanceof PosNegLP)) {
+ throw new UnsupportedOperationException("Compacted coverage representation is only supported for PosNegLP learning problems.");
+ }
+
+ Set positives = ((PosNegLP) learningProblem).getPositiveExamples();
+ Set negatives = ((PosNegLP) learningProblem).getNegativeExamples();
+
+ enableCompactCoverageRepresentation(positives, negatives);
+ }
+
+ protected static void enableCompactCoverageRepresentation(Set allPositiveExamples, Set allNegativeExamples) {
+ OENode.allPositiveExamples = allPositiveExamples.toArray(OWLIndividual[]::new);
+ OENode.allNegativeExamples = allNegativeExamples.toArray(OWLIndividual[]::new);
+
+ Map positiveExamplesIndices = new TreeMap<>();
+ Map negativeExamplesIndices = new TreeMap<>();
+
+ int ind = 0;
+ for (OWLIndividual ex : OENode.allPositiveExamples) {
+ positiveExamplesIndices.put(ex, ind);
+ ind++;
+ }
+
+ ind = 0;
+ for (OWLIndividual ex : OENode.allNegativeExamples) {
+ negativeExamplesIndices.put(ex, ind);
+ ind++;
+ }
+
+ OENode.positiveExamplesIndices = positiveExamplesIndices;
+ OENode.negativeExamplesIndices = negativeExamplesIndices;
+
+ useCompactedCoverage = true;
+ }
}
\ No newline at end of file
diff --git a/components-core/src/main/java/org/dllearner/algorithms/ocel/ExampleBasedNode.java b/components-core/src/main/java/org/dllearner/algorithms/ocel/ExampleBasedNode.java
index 5a34f8e10e..3d900ba660 100644
--- a/components-core/src/main/java/org/dllearner/algorithms/ocel/ExampleBasedNode.java
+++ b/components-core/src/main/java/org/dllearner/algorithms/ocel/ExampleBasedNode.java
@@ -20,10 +20,12 @@
import org.dllearner.core.AbstractCELA;
import org.dllearner.core.AbstractSearchTreeNode;
+import org.dllearner.core.Heuristic;
import org.dllearner.core.StringRenderer;
import org.dllearner.learningproblems.PosNegLP;
import org.dllearner.utilities.datastructures.SearchTreeNode;
import org.dllearner.utilities.datastructures.WeakSearchTreeNode;
+import org.dllearner.utilities.owl.OWLClassExpressionLengthMetric;
import org.semanticweb.owlapi.model.OWLClassExpression;
import org.semanticweb.owlapi.model.OWLIndividual;
@@ -71,12 +73,14 @@ public enum QualityEvaluationMethod { START, REASONER, TOO_WEAK_LIST, OVERLY_GEN
private boolean isPosOnlyCandidate = true;
private OCEL learningAlgorithm;
+ private ExampleBasedHeuristic heuristic;
- public ExampleBasedNode(OWLClassExpression concept, AbstractCELA learningAlgorithm) {
+ public ExampleBasedNode(OWLClassExpression concept, AbstractCELA learningAlgorithm, ExampleBasedHeuristic heuristic) {
this.concept = concept;
horizontalExpansion = 0;
isQualityEvaluated = false;
this.learningAlgorithm = (OCEL) learningAlgorithm;
+ this.heuristic = heuristic;
}
public void setHorizontalExpansion(int horizontalExpansion) {
@@ -145,8 +149,7 @@ public String getStats() {
// comment this out to display the heuristic score with default parameters
// learningAlgorithm.getHeuristic()
int nrOfPositiveExamples = ((PosNegLP) learningAlgorithm.getLearningProblem()).getPositiveExamples().size();
- int nrOfNegativeExamples = ((PosNegLP) learningAlgorithm.getLearningProblem()).getNegativeExamples().size();
- double heuristicScore = MultiHeuristic.getNodeScore(this, nrOfPositiveExamples, nrOfNegativeExamples, learningAlgorithm.getNegativeWeight(), learningAlgorithm.getStartNodeBonus(), learningAlgorithm.getExpansionPenaltyFactor(), learningAlgorithm.getNegationPenalty());
+ double heuristicScore = heuristic.getNodeScore(this);
ret += "h:" +df.format(heuristicScore) + " ";
int wrongPositives = nrOfPositiveExamples - coveredPositives.size();
diff --git a/components-core/src/main/java/org/dllearner/algorithms/ocel/MultiHeuristic.java b/components-core/src/main/java/org/dllearner/algorithms/ocel/MultiHeuristic.java
index b6def2af70..69e91784f5 100644
--- a/components-core/src/main/java/org/dllearner/algorithms/ocel/MultiHeuristic.java
+++ b/components-core/src/main/java/org/dllearner/algorithms/ocel/MultiHeuristic.java
@@ -23,10 +23,11 @@
import org.dllearner.core.ComponentInitException;
import org.dllearner.core.annotations.NoConfigOption;
import org.dllearner.core.config.ConfigOption;
-import org.semanticweb.owlapi.model.OWLClassExpression;
-import org.semanticweb.owlapi.model.OWLDataSomeValuesFrom;
-import org.semanticweb.owlapi.model.OWLObjectComplementOf;
+import org.dllearner.utilities.owl.ExpressionDecomposer;
+import org.dllearner.utilities.owl.OWLClassExpressionLengthMetric;
+import org.semanticweb.owlapi.model.*;
+import java.util.List;
import java.util.Set;
/**
@@ -100,6 +101,10 @@ public class MultiHeuristic implements ExampleBasedHeuristic, Component {
private int nrOfNegativeExamples;
@NoConfigOption
private int nrOfExamples;
+
+ private final ExpressionDecomposer decomposer = new ExpressionDecomposer();
+
+ private OWLClassExpressionLengthMetric lengthMetric = OWLClassExpressionLengthMetric.getDefaultMetric();
@Deprecated
public MultiHeuristic(int nrOfPositiveExamples, int nrOfNegativeExamples) {
@@ -161,7 +166,7 @@ public double getNodeScore(ExampleBasedNode node) {
} else {
accuracy += startNodeBonus;
}
- int he = node.getHorizontalExpansion() - getHeuristicLengthBonus(node.getConcept());
+ double he = node.getHorizontalExpansion() - getHeuristicLengthBonus(node.getConcept());
return accuracy + gainBonusFactor * gain - expansionPenaltyFactor * he - nodeChildPenalty * node.getChildren().size();
}
@@ -176,34 +181,30 @@ public static double getNodeScore(ExampleBasedNode node, int nrOfPositiveExample
// this function can be used to give some constructs a length bonus
// compared to their syntactic length
- private int getHeuristicLengthBonus(OWLClassExpression description) {
-
-
- int bonus = 0;
-
- Set nestedClassExpressions = description.getNestedClassExpressions();
- for (OWLClassExpression expression : nestedClassExpressions) {
- // do not count TOP symbols (in particular in ALL r.TOP and EXISTS r.TOP)
+ private double getHeuristicLengthBonus(OWLClassExpression description) {
+ double bonus = 0.0;
+
+ for (OWLClassExpression expression : decomposer.decompose(description)) {
+ // encourage the algorithm to refine EXISTS r.TOP and MIN n r.TOP
+ // as they provide little extra information
+ if ((expression instanceof OWLObjectSomeValuesFrom && ((OWLObjectSomeValuesFrom) expression).getFiller().isOWLThing())
+ || (expression instanceof OWLObjectMinCardinality && ((OWLObjectMinCardinality) expression).getFiller().isOWLThing())
+ ) {
+ bonus += lengthMetric.getClassLength() / 2.0;
+ }
+ // do not count TOP symbols in ALL r.TOP and MAX n r.BOTTOM
// as they provide no extra information
- if(expression.isOWLThing())
- bonus = 1; //2;
-
- // we put a penalty on negations, because they often overfit
- // (TODO: make configurable)
- else if(expression instanceof OWLObjectComplementOf) {
- bonus = -negationPenalty;
+ else if ((expression instanceof OWLObjectAllValuesFrom && ((OWLObjectAllValuesFrom) expression).getFiller().isOWLThing())
+ || (expression instanceof OWLObjectMaxCardinality && ((OWLObjectMaxCardinality) expression).getFiller().isOWLNothing())
+ ) {
+ bonus += lengthMetric.getClassLength();
}
-
-// if(OWLClassExpression instanceof BooleanValueRestriction)
-// bonus = -1;
-
- // some bonus for doubles because they are already penalised by length 3
- else if(expression instanceof OWLDataSomeValuesFrom) {
-// System.out.println(description);
- bonus = 3; //2;
+ // optionally, penalize negations (they may cause the algorithm to overfit)
+ else if(expression instanceof OWLObjectComplementOf) {
+ bonus -= negationPenalty;
}
}
-
+
return bonus;
}
@@ -270,4 +271,12 @@ public int getNegationPenalty() {
public void setNegationPenalty(int negationPenalty) {
this.negationPenalty = negationPenalty;
}
+
+ public OWLClassExpressionLengthMetric getLengthMetric() {
+ return lengthMetric;
+ }
+
+ public void setLengthMetric(OWLClassExpressionLengthMetric lengthMetric) {
+ this.lengthMetric = lengthMetric;
+ }
}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java b/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java
index 997bd3c68a..c7086c2910 100644
--- a/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java
+++ b/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java
@@ -48,6 +48,7 @@
import java.io.File;
import java.text.DecimalFormat;
+import java.text.SimpleDateFormat;
import java.util.*;
/**
@@ -83,14 +84,18 @@ public class OCEL extends AbstractCELA {
// often the learning problems needn't be accessed directly; instead
// use the example sets below and the posonly variable
- private OWLClassExpression startDescription;
private int nrOfExamples;
private int nrOfPositiveExamples;
private Set positiveExamples;
private int nrOfNegativeExamples;
private Set negativeExamples;
+ private Set positiveTestExamples;
+ private Set negativeTestExamples;
+ private boolean anyTestExamples = false;
+
private int allowedMisclassifications = 0;
+ private int allowedMisclassificationsWithinMargin = 0;
// search tree options
@ConfigOption(defaultValue = "false", description = "specifies whether to write a search tree")
@@ -149,8 +154,13 @@ public class OCEL extends AbstractCELA {
@ConfigOption(defaultValue = "30", description = "maximum number of candidates to retain")
private int candidatePostReductionSize = 30;
+ @ConfigOption(defaultValue = "300", description = "the number of seconds between two consecutive candidate reductions")
+ private long candidateReductionInterval = 300L;
+
// solution protocol
- private List solutions = new LinkedList<>();
+ private TreeMap solutions;
+ private TreeMap solutionCandidates;
+ private final double solutionCandidatesMinAccuracyDiff = 0.0001;
@ConfigOption(defaultValue = "false", description = "specifies whether to compute and log benchmark information")
private boolean computeBenchmarkInformation = false;
@@ -243,6 +253,20 @@ public class OCEL extends AbstractCELA {
@ConfigOption(defaultValue = "0.0", description = "noise regulates how many positives can be misclassified and when " +
"the algorithm terminates")
private double noisePercentage = noisePercentageDefault;
+ private double noise = noisePercentage / 100.0;
+
+ @ConfigOption(defaultValue = "0.0", description = "determines a lower bound on noisiness of an expression with respect to noisePercentage " +
+ "in order to be considered a reasonable solution candidate (must be non-negative), e.g. for noisePercentage = 15 and noisePercentageMargin = 5, " +
+ "the algorithm will suggest expressions with the number of misclassified positives less than or equal to 20% of all examples " +
+ "as solution candidates as well; note: difference between accuracies of any two candidates must be at least 0.01% to ensure diversity")
+ private double noisePercentageMargin = 0.0;
+
+ @ConfigOption(defaultValue = "20", description = "the number of solutions to be presented, sorted in descending order by accuracy")
+ private int maxNrOfResults = 20;
+
+ @ConfigOption(defaultValue = "20", description = "the number of solution candidates within margin to be presented, sorted in descending order by accuracy")
+ private int maxNrOfResultsWithinMargin = 20;
+
@ConfigOption(
defaultValue = "owl:Thing",
description = "You can specify a start class for the algorithm",
@@ -358,6 +382,10 @@ public void init() throws ComponentInitException {
}
operator.setLengthMetric(lengthMetric);
+ if (heuristic instanceof MultiHeuristic) {
+ ((MultiHeuristic) heuristic).setLengthMetric(lengthMetric);
+ }
+
// create an algorithm object and pass all configuration
// options to it
@@ -372,6 +400,20 @@ public void init() throws ComponentInitException {
// note: used concepts and roles do not need to be passed
// as argument, because it is sufficient to prepare the
// concept and role hierarchy accordingly
+
+ noise = noisePercentage / 100.0;
+
+ positiveTestExamples = ((PosNegLP) learningProblem).getPositiveTestExamples();
+ negativeTestExamples = ((PosNegLP) learningProblem).getNegativeTestExamples();
+ anyTestExamples = positiveTestExamples.size() > 0 || negativeTestExamples.size() > 0;
+
+ QualityBasedComparator solutionComparator = new QualityBasedComparator(lengthMetric);
+ solutions = new TreeMap<>(solutionComparator);
+ solutionCandidates = new TreeMap<>(solutionComparator);
+
+ if (noisePercentageMargin < 0) {
+ noisePercentageMargin = 0.0;
+ }
initialized = true;
}
@@ -386,6 +428,7 @@ public void start() {
searchTree = new SearchTreeNonWeakPartialSet<>(heuristic);
searchTreeStable = new SearchTreeNonWeak<>(nodeComparatorStable);
solutions.clear();
+ solutionCandidates.clear();
maxExecutionTimeAlreadyReached = false;
minExecutionTimeAlreadyReached = false;
guaranteeXgoodAlreadyReached = false;
@@ -409,17 +452,18 @@ public void start() {
*/
// calculate quality threshold required for a solution
- allowedMisclassifications = (int) Math.round(noisePercentage * nrOfExamples / 100);
+ allowedMisclassifications = (int) Math.round(noisePercentage * nrOfExamples / 100.0);
+ allowedMisclassificationsWithinMargin = (int) Math.round((noisePercentage + noisePercentageMargin) * nrOfExamples / 100.0);
// start search with start class
ExampleBasedNode startNode;
- if (startDescription == null) {
- startNode = new ExampleBasedNode(dataFactory.getOWLThing(), this);
+ if (startClass == null) {
+ startNode = new ExampleBasedNode(dataFactory.getOWLThing(), this, heuristic);
startNode.setCoveredExamples(positiveExamples, negativeExamples);
} else {
- startNode = new ExampleBasedNode(startDescription, this);
- Set coveredNegatives = reasoner.hasType(startDescription, negativeExamples);
- Set coveredPositives = reasoner.hasType(startDescription, positiveExamples);
+ startNode = new ExampleBasedNode(startClass, this, heuristic);
+ Set coveredNegatives = reasoner.hasType(startClass, negativeExamples);
+ Set coveredPositives = reasoner.hasType(startClass, positiveExamples);
startNode.setCoveredExamples(coveredPositives, coveredNegatives);
}
@@ -430,6 +474,8 @@ public void start() {
ExampleBasedNode bestNode = startNode;
ExampleBasedNode bestNodeStable = startNode;
+ String timeStamp = new SimpleDateFormat("HH.mm.ss").format(new Date());
+ logger.info("Time " + getCurrentCpuMillis() / 1000.0 + "s; " + timeStamp);
logger.info("starting top down refinement with: " + renderer.render(startNode.getConcept()) + " (" + df.format(100 * startNode.getAccuracy()) + "% accuracy)");
int loop = 0;
@@ -440,7 +486,7 @@ public void start() {
long lastReductionTime = System.nanoTime();
// try a traversal after x seconds
long traversalInterval = 300L * 1000000000L;
- long reductionInterval = 300L * 1000000000L;
+ long reductionInterval = candidateReductionInterval * 1000000000L;
long currentTime;
while (!isTerminationCriteriaReached()) {
@@ -468,13 +514,24 @@ public void start() {
// we record when a more accurate node is found and log it
if (bestNodeStable.getCovPosMinusCovNeg() < searchTreeStable.best()
.getCovPosMinusCovNeg()) {
- String acc = new DecimalFormat(".00%").format((searchTreeStable.best().getAccuracy()));
+ double time = getCurrentCpuMillis() / 1000.0;
+ double acc = (searchTreeStable.best().getAccuracy());
+ double testAcc = computeTestAccuracy(searchTreeStable.best().getConcept());
+
+ DecimalFormat percentFormatter = new DecimalFormat(".00%");
// no handling needed, it will just look ugly in the output
- logger.info("more accurate (" + acc + ") class expression found: " + renderer.render(searchTreeStable.best().getConcept()));
+ logger.info(
+ "Time " + time +
+ "s: more accurate (training: " + percentFormatter.format(acc) + ", test: " + percentFormatter.format(testAcc) +
+ ") class expression found: " + renderer.render(searchTreeStable.best().getConcept())
+ );
if (logger.isTraceEnabled()) {
logger.trace(Sets.difference(positiveExamples, bestNodeStable.getCoveredNegatives()).toString());
logger.trace(Sets.difference(negativeExamples, bestNodeStable.getCoveredNegatives()).toString());
}
+
+ recordBestConceptTimeAndAccuracy(time, acc, testAcc);
+
printBestSolutions(5);
printStatistics(false);
bestNodeStable = searchTreeStable.best();
@@ -512,17 +569,51 @@ public void start() {
loop++;
}// end while
+ timeStamp = new SimpleDateFormat("HH.mm.ss").format(new Date());
+ logger.info("Time " + getCurrentCpuMillis() / 1000.0 + "s; " + timeStamp);
+
+ if (solutionCandidates.size() > 0) {
+ // we do not need to print the best node if we display the top 20 solutions below anyway
+ logger.info("solutions within margin (at most " + maxNrOfResultsWithinMargin + " are shown):");
+ int show = 1;
+ for (ExampleBasedNode c : solutionCandidates.descendingKeySet()) {
+ int tpTest = learningProblem instanceof PosNegLP
+ ? ((PosNegLP) learningProblem).getTestCoverage(c.getConcept())
+ : 0;
+
+ logger.info(show + ": " + renderer.render(c.getConcept())
+ + " (accuracy " + df.format(100 * c.getAccuracy()) + "% / "
+ + df.format(100 * computeTestAccuracy(c.getConcept())) + "%"
+ + ", coverage " + c.getCoveredPositives().size() + " / " + tpTest
+ + ", length " + OWLClassExpressionUtils.getLength(c.getConcept())
+ + ", depth " + OWLClassExpressionUtils.getDepth(c.getConcept())
+ + ", time " + df.format(solutionCandidates.get(c)) + "s)");
+ if (show >= maxNrOfResultsWithinMargin) {
+ break;
+ }
+ show++;
+ }
+ } else {
+ logger.info("no appropriate solutions within margin found (try increasing the noisePercentageMargin)");
+ }
+
if (solutions.size() > 0) {
- int solutionLimit = 20;
// we do not need to print the best node if we display the top 20 solutions below anyway
- logger.info("solutions (at most " + solutionLimit + " are shown):");
+ logger.info("solutions (at most " + maxNrOfResults + " are shown):");
int show = 1;
- for (ExampleBasedNode c : solutions) {
+ for (ExampleBasedNode c : solutions.descendingKeySet()) {
+ int tpTest = learningProblem instanceof PosNegLP
+ ? ((PosNegLP) learningProblem).getTestCoverage(c.getConcept())
+ : 0;
+
logger.info(show + ": " + renderer.render(c.getConcept())
- + " (accuracy " + df.format(100 * c.getAccuracy()) + "%, length "
- + OWLClassExpressionUtils.getLength(c.getConcept())
- + ", depth " + OWLClassExpressionUtils.getDepth(c.getConcept()) + ")");
- if (show >= solutionLimit) {
+ + " (accuracy " + df.format(100 * c.getAccuracy()) + "% / "
+ + df.format(100 * computeTestAccuracy(c.getConcept())) + "%"
+ + ", coverage " + c.getCoveredPositives().size() + " / " + tpTest
+ + ", length " + OWLClassExpressionUtils.getLength(c.getConcept())
+ + ", depth " + OWLClassExpressionUtils.getDepth(c.getConcept())
+ + ", time " + df.format(solutions.get(c)) + "s)");
+ if (show >= maxNrOfResults) {
break;
}
show++;
@@ -531,6 +622,12 @@ public void start() {
logger.info("no appropriate solutions found (try increasing the noisePercentage parameter to what was reported as most accurate expression found above)");
}
+ if (learningProblem instanceof PosNegLP) {
+ ((PosNegLP) learningProblem).printTestEvaluation(bestNodeStable.getConcept());
+ }
+
+ printBestConceptsTimesAndAccuracies();
+
logger.debug("size of candidate set: " + searchTree.size());
printBestSolutions(20);
@@ -539,6 +636,7 @@ public void start() {
int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList;
if (stop) {
logger.info("Algorithm stopped (" + conceptTests + " descriptions tested).\n");
+ logger.info(reasoner.toString());
} else {
logger.info("Algorithm terminated successfully (" + conceptTests + " descriptions tested).\n");
logger.info(reasoner.toString());
@@ -634,7 +732,7 @@ private void extendNodeProper(ExampleBasedNode node, OWLClassExpression concept,
properRefinements.add(refinement);
tooWeakList.add(refinement);
- ExampleBasedNode newNode = new ExampleBasedNode(refinement, this);
+ ExampleBasedNode newNode = new ExampleBasedNode(refinement, this, heuristic);
newNode.setHorizontalExpansion(OWLClassExpressionUtils.getLength(refinement, lengthMetric) - 1);
newNode.setTooWeak(true);
newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.TOO_WEAK_LIST);
@@ -691,7 +789,7 @@ private void extendNodeProper(ExampleBasedNode node, OWLClassExpression concept,
if (nonRedundant) {
// newly created node
- ExampleBasedNode newNode = new ExampleBasedNode(refinement, this);
+ ExampleBasedNode newNode = new ExampleBasedNode(refinement, this, heuristic);
// die -1 ist wichtig, da sonst keine gleich langen Refinements
// für den neuen Knoten erlaubt wären z.B. person => male
newNode.setHorizontalExpansion(OWLClassExpressionUtils.getLength(refinement, lengthMetric) - 1);
@@ -732,17 +830,18 @@ private void extendNodeProper(ExampleBasedNode node, OWLClassExpression concept,
// are performed => rely on fast instance checker)
for (OWLIndividual i : coveredPositives) {
// TODO: move code to a separate function
- if (quality != -1) {
- boolean covered = reasoner.hasType(refinement, i);
- if (!covered)
- misclassifiedPositives++;
- else
- newlyCoveredPositives.add(i);
+ if (quality == -1) {
+ break;
+ }
- if (misclassifiedPositives > allowedMisclassifications)
- quality = -1;
+ boolean covered = reasoner.hasType(refinement, i);
+ if (!covered)
+ misclassifiedPositives++;
+ else
+ newlyCoveredPositives.add(i);
- }
+ if (misclassifiedPositives > allowedMisclassifications)
+ quality = -1;
}
Set newlyCoveredNegatives = null;
@@ -757,6 +856,13 @@ private void extendNodeProper(ExampleBasedNode node, OWLClassExpression concept,
}
}
+// Set newlyCoveredPositives = reasoner.hasType(refinement, positiveExamples);
+// Set newlyCoveredNegatives = reasoner.hasType(refinement, negativeExamples);
+// int misclassifiedPositives = nrOfPositiveExamples - newlyCoveredPositives.size();
+//
+// if (misclassifiedPositives > allowedMisclassifications)
+// quality = -1;
+
propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart2;
newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.REASONER);
if (quality != -1 && !(((PosNegLP) learningProblem).getAccuracyMethod() instanceof AccMethodNoWeakness) &&
@@ -782,8 +888,30 @@ private void extendNodeProper(ExampleBasedNode node, OWLClassExpression concept,
tooWeakList.add(refinement);
} else {
// Lösung gefunden
- if (quality >= 0 && quality <= allowedMisclassifications) {
- solutions.add(newNode);
+ if (quality >= 0) {
+ if (quality <= allowedMisclassifications) {
+ solutions.put(newNode, getCurrentCpuMillis() / 1000.0);
+
+ if (solutions.size() > maxNrOfResults) {
+ solutions.pollFirstEntry();
+ }
+ }
+
+ if (quality <= allowedMisclassificationsWithinMargin) {
+ if (solutionCandidates.isEmpty()
+ || (newNode.getAccuracy() > solutionCandidates.firstKey().getAccuracy()
+ && solutionCandidates.keySet().stream().allMatch(
+ n -> Math.abs(newNode.getAccuracy() - n.getAccuracy()) > solutionCandidatesMinAccuracyDiff
+ )
+ )
+ ) {
+ solutionCandidates.put(newNode, getCurrentCpuMillis() / 1000.0);
+ }
+
+ if (solutionCandidates.size() > maxNrOfResultsWithinMargin) {
+ solutionCandidates.pollFirstEntry();
+ }
+ }
}
// we need to make sure that all positives are covered
@@ -983,7 +1111,7 @@ private void traverseTree() {
//noinspection UnusedAssignment
currentAccuracy = accuracy;
- if (accuracy > 1 - (noisePercentage / 100)) {
+ if (accuracy > 1 - noise) {
logger.info("traversal found " + mc);
logger.info("accuracy: " + accuracy);
System.exit(0);
@@ -1136,7 +1264,7 @@ private boolean isTerminationCriteriaReached() {
return true;
}
- long totalTimeNeeded = System.currentTimeMillis() - this.runtime;
+ long totalTimeNeeded = getCurrentCpuMillis();
long maxMilliSeconds = maxExecutionTimeInSeconds * 1000;
long minMilliSeconds = minExecutionTimeInSeconds * 1000;
int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList;
@@ -1149,7 +1277,7 @@ private boolean isTerminationCriteriaReached() {
else if (maxExecutionTimeAlreadyReached)
return true;
//test
- else if (maxMilliSeconds < totalTimeNeeded) {
+ else if (maxMilliSeconds <= totalTimeNeeded) {
this.stop();
logger.info("Maximum time (" + maxExecutionTimeInSeconds
+ " seconds) reached, stopping now...");
@@ -1431,4 +1559,36 @@ public void setHeuristic(ExampleBasedHeuristic heuristic) {
public ExampleBasedHeuristic getHeuristic() {
return heuristic;
}
+
+ public long getCandidateReductionInterval() {
+ return candidateReductionInterval;
+ }
+
+ public void setCandidateReductionInterval(long candidateReductionInterval) {
+ this.candidateReductionInterval = candidateReductionInterval;
+ }
+
+ public double getNoisePercentageMargin() {
+ return noisePercentageMargin;
+ }
+
+ public void setNoisePercentageMargin(double noisePercentageMargin) {
+ this.noisePercentageMargin = noisePercentageMargin;
+ }
+
+ public double getMaxNrOfResults() {
+ return maxNrOfResults;
+ }
+
+ public void setMaxNrOfResults(int maxNrOfResults) {
+ this.maxNrOfResults = maxNrOfResults;
+ }
+
+ public double getMaxNrOfResultsWithinMargin() {
+ return maxNrOfResultsWithinMargin;
+ }
+
+ public void setMaxNrOfResultsWithinMargin(int maxNrOfResultsWithinMargin) {
+ this.maxNrOfResultsWithinMargin = maxNrOfResultsWithinMargin;
+ }
}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/ocel/QualityBasedComparator.java b/components-core/src/main/java/org/dllearner/algorithms/ocel/QualityBasedComparator.java
new file mode 100644
index 0000000000..cf94923d4d
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/ocel/QualityBasedComparator.java
@@ -0,0 +1,87 @@
+/**
+ * Copyright (C) 2007 - 2016, Jens Lehmann
+ *
+ * This file is part of DL-Learner.
+ *
+ * DL-Learner is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * DL-Learner is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.dllearner.algorithms.ocel;
+
+import org.dllearner.utilities.owl.OWLClassExpressionLengthMetric;
+import org.dllearner.utilities.owl.OWLClassExpressionUtils;
+
+import java.util.Comparator;
+
+public class QualityBasedComparator implements Comparator {
+
+ private final OWLClassExpressionLengthMetric lengthMetric;
+
+ public QualityBasedComparator(OWLClassExpressionLengthMetric lengthMetric) {
+ this.lengthMetric = lengthMetric;
+ }
+
+ @Override
+ public int compare(ExampleBasedNode node1, ExampleBasedNode node2) {
+ if (node1.isQualityEvaluated() && node2.isQualityEvaluated()) {
+ return compareByQuality(node1, node2);
+ }
+
+ throw new RuntimeException("Nodes with not evaluated quality are incomparable.");
+ }
+
+ private int compareByQuality(ExampleBasedNode node1, ExampleBasedNode node2) {
+ int result = Boolean.compare(node2.isTooWeak(), node1.isTooWeak());
+
+ if (result != 0) {
+ return result;
+ }
+
+ result = compareWithNonWeakQuality(node1, node2);
+
+ if (result != 0) {
+ return result;
+ }
+
+ return node1.getConcept().compareTo(node2.getConcept());
+ }
+
+ private int compareWithNonWeakQuality(ExampleBasedNode node1, ExampleBasedNode node2) {
+ int result = compareByAccuracy(node1, node2);
+
+ if (result != 0) {
+ return result;
+ }
+
+ return compareByLength(node1, node2);
+ }
+
+ private int compareByAccuracy(ExampleBasedNode node1, ExampleBasedNode node2) {
+ double node1Accuracy = node1.getAccuracy();
+ double node2Accuracy = node2.getAccuracy();
+
+ return Double.compare(node1Accuracy, node2Accuracy);
+ }
+
+ private int compareByLength(ExampleBasedNode node1, ExampleBasedNode node2) {
+ int node1Length = OWLClassExpressionUtils.getLength(node1.getConcept(), lengthMetric);
+ int mode2Length = OWLClassExpressionUtils.getLength(node2.getConcept(), lengthMetric);
+
+ return Integer.compare(mode2Length, node1Length);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return (o instanceof QualityBasedComparator);
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELAbstract.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELAbstract.java
new file mode 100644
index 0000000000..9034120fdf
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELAbstract.java
@@ -0,0 +1,1056 @@
+package org.dllearner.algorithms.parcel;
+
+import com.google.common.collect.Sets;
+import org.apache.log4j.Logger;
+import org.dllearner.algorithms.celoe.OENode;
+import org.dllearner.algorithms.parcel.reducer.ParCELImprovedCoverageGreedyReducer;
+import org.dllearner.algorithms.parcel.reducer.ParCELReducer;
+import org.dllearner.algorithms.parcel.split.ParCELDoubleSplitterAbstract;
+import org.dllearner.core.*;
+import org.dllearner.core.config.ConfigOption;
+import org.dllearner.core.owl.ClassHierarchy;
+import org.dllearner.core.owl.DatatypePropertyHierarchy;
+import org.dllearner.core.owl.OWLObjectUnionOfImplExt;
+import org.dllearner.core.owl.ObjectPropertyHierarchy;
+import org.dllearner.refinementoperators.*;
+import org.dllearner.utilities.owl.EvaluatedDescriptionComparator;
+import org.dllearner.utilities.owl.OWLAPIRenderers;
+import org.dllearner.utilities.owl.OWLClassExpressionUtils;
+import org.semanticweb.owlapi.model.OWLClass;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLDataProperty;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+import java.lang.invoke.MethodHandles;
+import java.text.DecimalFormat;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.stream.Collectors;
+
+/**
+ * Abstract class for all ParCEL algorithms family
+ *
+ * @author An C. Tran
+ *
+ */
+public abstract class ParCELAbstract extends AbstractCELA implements ParCELearnerMBean {
+
+ protected static final Logger logger = Logger.getLogger(MethodHandles.lookup().lookupClass());
+
+ // ----------------------------------
+ // configuration options
+ // ----------------------------------
+ @ConfigOption(defaultValue = "4", description = "Number of workers will be created to serve the learner")
+ protected int numberOfWorkers = 4; //
+
+ @ConfigOption(defaultValue = "0.0", description = "The percentage of noise within the examples")
+ protected double noisePercentage = 0.0;
+ protected double noiseAllowed; // = this.noisePercentage/100d;
+
+ @ConfigOption(defaultValue = "10", description = "Max number of splits will be applied for data properties with double range. This parameter is not used if a Splitter is provided")
+ protected int maxNoOfSplits = 10;
+
+ @ConfigOption(defaultValue = "0", description = "Minimal coverage that a partial definition must approach so that it can be used")
+ protected double minimalCoverage = 0; //0 means no constrain on this condition
+
+ @ConfigOption(defaultValue = "false", description = "Use value restriction or not")
+ protected boolean useHasValue = false;
+
+ @ConfigOption(defaultValue = "true", description = "Use negation or not")
+ protected boolean useNegation = true;
+
+ @ConfigOption(defaultValue = "false", description = "Use data restriction or not")
+ protected boolean useHasData = false;
+
+ @ConfigOption(defaultValue = "true", description = "Use cardinality restrictions or not")
+ protected boolean useCardinalityRestrictions = true;
+
+ @ConfigOption(defaultValue = "5", description = "Cardinality limit")
+ protected int cardinalityLimit = 5;
+
+ @ConfigOption(description="support of disjunction (owl:unionOf) within a qualified number restriction or a universal quantification", defaultValue="false")
+ protected boolean useRestrictedDisjunction = false;
+
+ @ConfigOption(defaultValue = "owl:Thing",
+ description = "You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax either with full IRIs or prefixed IRIs.",
+ exampleValue = "ex:Male or http://example.org/ontology/Female")
+ protected OWLClassExpression startClass; // description of the root node
+
+ protected ParCELDoubleSplitterAbstract splitter = null;
+
+
+ protected int maxHorizExp = 0;
+
+
+ //-------------------------------------------
+ //common variables for the learner
+ //-------------------------------------------
+
+ /**
+ * Hold all generated description to avoid the duplicated descriptions (this may contains only
+ * the weak description but it may take time to check both in this data structure and the search
+ * tree to check for the duplication). Redundancy in this case may help increasing performance.
+ */
+ protected ConcurrentSkipListSet allDescriptions = null;
+
+
+ /**
+ * The search tree holds all evaluated descriptions that are not correct and not weak ==>
+ * candidate for partial definitions. Nodes in the search tree must be sorted so that it can
+ * help the searching more efficiently (best search rather than 'blind' breath first or depth
+ * first)
+ * NOTE: node = (description + accuracy/correctness/completeness/... values)
+ */
+ protected ConcurrentSkipListSet searchTree = null;
+
+
+ /**
+ * partial definitions (they should be sorted so that we can get the best
+ * partial definition at any time)
+ */
+ protected SortedSet partialDefinitions = null;
+
+
+
+ /**
+ * Heuristic used in the searching expansion (choosing node for expansion)
+ */
+ protected ParCELHeuristic heuristic = null;
+
+
+ /**
+ * Reducer which will be used to reduce the partial definitions
+ */
+ protected ParCELReducer reducer = null;
+
+
+ /**
+ * Pool of workers
+ */
+ protected ThreadPoolExecutor workerPool;
+
+ // configuration for worker pool
+ protected int minNumberOfWorker = 2;
+ protected int maxNumberOfWorker = 4; // max number of workers will be created
+ protected final int maxTaskQueueLength = 2000;
+ protected final long keepAliveTime = 100; // ms
+
+ //examples
+ protected Set positiveExamples;
+ protected Set negativeExamples;
+ protected Set positiveTestExamples;
+ protected Set negativeTestExamples;
+
+ /**
+ * Refinement operator pool which provides refinement operators
+ */
+ protected ParCELRefinementOperatorPool refinementOperatorPool;
+
+ @ConfigOption(description = "The refinement operator to use (currently only rho is supported)")
+ protected RefinementOperator operator;
+
+ /**
+ * contains tasks submitted to thread pool
+ */
+ protected BlockingQueue taskQueue;
+
+
+ // just for pretty representation of description
+ protected String baseURI;
+ protected Map prefix;
+
+ protected final DecimalFormat df = new DecimalFormat();
+
+ /**
+ * This may be considered as the noise allowed in learning, i.e. the maximum number of positive
+ * examples can be discard (uncovered)
+ */
+ protected int uncoveredPositiveExampleAllowed = 0;
+
+ /**
+ * Holds the uncovered positive example, this will be updated when the worker found a partial
+ * definition since the callback method "definitionFound" is synchronized", there is no need to
+ * create a thread-safe for this set
+ */
+ protected Set uncoveredPositiveExamples;
+
+ /**
+ * Holds the covered negative examples, this will be updated when the worker found a partial definition
+ * since the callback method "partialDefinitionFound" is synchronized,
+ * there is no need to create a thread-safe for this set
+ */
+ protected Set coveredNegativeExamples;
+
+ // ---------------------------------------------------------
+ // flags to indicate the status of the application
+ // ---------------------------------------------------------
+ /**
+ * The learner is stopped (reasons: done, timeout, out of memory, etc.)
+ */
+ protected volatile boolean stop = false;
+
+ /**
+ * All positive examples are covered
+ */
+ protected volatile boolean done = false;
+
+ /**
+ * Learner get timeout
+ */
+ protected volatile boolean timeout = false;
+
+ // ------------------------------------------------
+ // variables for statistical purpose
+ // ------------------------------------------------
+ protected long miliStarttime = Long.MIN_VALUE;
+ protected long miliLearningTime = Long.MIN_VALUE;
+
+ // some properties for statistical purpose
+ protected int currentMaxHorizExp = 0;
+ protected int bestDescriptionLength = 0;
+ protected double maxAccuracy = 0.0d;
+
+ // will be used in MBean for debugging purpose
+ protected int noOfCompactedPartialDefinition;
+ protected int noOfUncoveredPositiveExamples;
+
+ // number of task created (for debugging purpose only)
+ protected int noOfTask = 0;
+
+ protected long trainingTime = 0;
+
+ public ParCELAbstract() {
+ super();
+ this.reducer = new ParCELImprovedCoverageGreedyReducer();
+ }
+
+ /**
+ *
+ * Constructor for the learning algorithm
+ *
+ * @param learningProblem
+ * Learning problem, must be a PDLLPosNegLP
+ * @param reasoningService
+ * Reasoner
+ */
+ public ParCELAbstract(ParCELPosNegLP learningProblem, AbstractReasonerComponent reasoningService) {
+ super(learningProblem, reasoningService);
+
+ // default compactor used by this algorithm
+ this.reducer = new ParCELImprovedCoverageGreedyReducer();
+ //this.reducer = new ParCELPredScoreReducer();
+ }
+
+ protected void initOperatorIfAny() {
+ if (operator == null) {
+ return;
+ }
+
+ if (operator instanceof CustomHierarchyRefinementOperator) {
+ ClassHierarchy classHierarchy = initClassHierarchy();
+ ObjectPropertyHierarchy objectPropertyHierarchy = initObjectPropertyHierarchy();
+ DatatypePropertyHierarchy datatypePropertyHierarchy = initDataPropertyHierarchy();
+
+ ((CustomHierarchyRefinementOperator) operator).setClassHierarchy(classHierarchy);
+ ((CustomHierarchyRefinementOperator) operator).setObjectPropertyHierarchy(objectPropertyHierarchy);
+ ((CustomHierarchyRefinementOperator) operator).setDataPropertyHierarchy(datatypePropertyHierarchy);
+ }
+
+ if (operator instanceof RhoDRDown) {
+ ((RhoDRDown) operator).setUseDisjunction(false);
+ ((RhoDRDown) operator).setUseRestrictedDisjunction(useRestrictedDisjunction);
+ }
+ }
+
+ protected void initSearchTree() {
+ // TODO: only ParCELPosNegLP supported
+
+ ParCELNode.enableCompactCoverageRepresentation(learningProblem);
+
+ // create a start node in the search tree
+ allDescriptions.add(startClass);
+
+ ParCELEvaluationResult accAndCorr = getAccuracyAndCorrectness(null, startClass);
+
+ ParCELNode startNode = new ParCELNode(
+ null, startClass,
+ accAndCorr.accuracy, accAndCorr.correctness, accAndCorr.completeness
+ );
+
+ startNode.setCoveredPositiveExamples(accAndCorr.coveredPositiveExamples);
+ startNode.setCoveredNegativeExamples(accAndCorr.coveredNegativeExamples);
+
+ searchTree.add(startNode);
+ }
+
+ /**
+ * ============================================================================================
+ * Callback method for worker when partial definitions found (callback for an evaluation request
+ * from reducer)
+ * If a definition (partial) found, do the following tasks:
+ * 1. Add the definition into the partial definition set
+ * 2. Update: uncovered positive examples, max accuracy, best description length
+ * 3. Check for the completeness of the learning. If yes, stop the learning
+ *
+ * @param definitions
+ * New partial definitions
+ */
+ public void newPartialDefinitionsFound(Set definitions) {
+
+ for (ParCELExtraNode def : definitions) {
+ // NOTE: in the previous version, this node will be added back into the search tree
+ // it is not necessary since in DLLearn, a definition may be revised to get a better one
+ // but
+ // in this approach, we do not refine partial definition.
+
+ // remove uncovered positive examples by the positive examples covered by the new
+ // partial definition
+ int uncoveredPositiveExamplesRemoved;
+ int uncoveredPositiveExamplesSize;
+
+ //re-calculate the generation time of pdef
+ def.setGenerationTime(def.getGenerationTime() - miliStarttime);
+
+ synchronized (uncoveredPositiveExamples) {
+ uncoveredPositiveExamplesRemoved = this.uncoveredPositiveExamples.size();
+ this.uncoveredPositiveExamples.removeAll(def.getCoveredPositiveExamples());
+ uncoveredPositiveExamplesSize = this.uncoveredPositiveExamples.size();
+ } //end of uncovere dPositive examples synchronise
+
+ uncoveredPositiveExamplesRemoved -= uncoveredPositiveExamplesSize;
+
+ if (uncoveredPositiveExamplesRemoved > 0) {
+
+ // set the generation time for the new partial definition
+ //def.setGenerationTime(System.currentTimeMillis() - miliStarttime); //this is set by workers
+ synchronized (partialDefinitions) {
+ partialDefinitions.add(def);
+ }
+
+ synchronized (coveredNegativeExamples) {
+ coveredNegativeExamples.addAll(def.getCoveredNegativeExamples());
+ }
+
+ // for used in bean (for tracing purpose)
+ this.noOfUncoveredPositiveExamples -= uncoveredPositiveExamplesRemoved;
+
+ if (logger.isTraceEnabled() || logger.isDebugEnabled()) {
+ logger.trace("PARTIAL definition found: "
+ + OWLAPIRenderers.toManchesterOWLSyntax(def.getDescription())
+ + "\n\t - covered positive examples ("
+ + def.getNumberOfCoveredPositiveExamples() + "): "
+ + def.getCoveredPositiveExamples()
+ + "\n\t - uncovered positive examples left: "
+ + uncoveredPositiveExamplesSize + "/" + positiveExamples.size());
+ } else if (logger.isInfoEnabled()) {
+ logger.info("PARTIAL definition found. Uncovered positive examples left: "
+ + uncoveredPositiveExamplesSize + "/" + positiveExamples.size()
+ + "\n" + OWLAPIRenderers.toManchesterOWLSyntax(def.getDescription()));
+ double actualTrainingTime = getCurrentCpuMillis() / 1000.0;
+
+ OWLClassExpression bestDescription = getUnionCurrentlyBestDescription();
+ double acc = computeAccuracy(bestDescription);
+ double testAcc = computeTestAccuracy(bestDescription);
+
+ logger.info("Training time: " + actualTrainingTime + "s Accuracy: " + acc + " Test accuracy: " + testAcc);
+
+ recordBestConceptTimeAndAccuracy(actualTrainingTime, acc, testAcc);
+ }
+
+ }
+
+ // update the max accuracy and max description length
+ if (def.getAccuracy() > this.maxAccuracy) {
+ this.maxAccuracy = def.getAccuracy();
+ this.bestDescriptionLength = OWLClassExpressionUtils.getLength(def.getDescription());
+ }
+
+ // check if the complete definition found
+ if (uncoveredPositiveExamplesSize <= uncoveredPositiveExampleAllowed) {
+ this.done = true;
+ // stop();
+ }
+ }
+ }
+
+ protected double computeAccuracy(OWLClassExpression description) {
+ if (learningProblem instanceof ParCELPosNegLP) {
+ return ((ParCELPosNegLP) learningProblem).getAccuracy(description);
+ }
+
+ return 0.0;
+ }
+
+
+ @Override
+ protected double computeTestAccuracy(OWLClassExpression description) {
+ if (learningProblem instanceof ParCELPosNegLP) {
+ return ((ParCELPosNegLP) learningProblem).getTestAccuracy(description);
+ }
+
+ return 0.0;
+ }
+
+ protected void createRefinementOperatorPool() throws ComponentInitException {
+ if (operator == null || !(operator instanceof RhoDRDown)) {
+ // -----------------------------------------
+ // prepare for refinement operator creation
+ // -----------------------------------------
+ Set usedConcepts = new TreeSet<>(reasoner.getClasses());
+
+ // remove the ignored concepts out of the list of concepts will be used by refinement
+ // operator
+ if (this.ignoredConcepts != null) {
+ usedConcepts.removeAll(ignoredConcepts);
+ } //set ignored concept is applicable
+
+ ClassHierarchy classHierarchy = (ClassHierarchy) reasoner.getClassHierarchy().cloneAndRestrict(usedConcepts);
+
+ // create a splitter and refinement operator pool
+ // there are two options: i) using object pool, ii) using set of objects (removed from
+ // this revision)
+ if (this.splitter != null) {
+ splitter.setReasoner(reasoner);
+ splitter.setPositiveExamples(positiveExamples);
+ splitter.setNegativeExamples(negativeExamples);
+ splitter.init();
+
+ Map> splits = splitter.computeSplits();
+
+ // i) option 1: create an object pool
+ refinementOperatorPool = new ParCELRefinementOperatorPool(reasoner, classHierarchy,
+ startClass, splits, numberOfWorkers + 1);
+ }
+ else { // no splitter provided create an object pool
+ refinementOperatorPool = new ParCELRefinementOperatorPool(reasoner, classHierarchy,
+ startClass, numberOfWorkers + 1, maxNoOfSplits);
+ }
+
+ refinementOperatorPool.getFactory().setUseNegation(useNegation);
+ refinementOperatorPool.getFactory().setUseHasValue(useHasValue);
+ refinementOperatorPool.getFactory().setUseHasData(useHasData);
+ refinementOperatorPool.getFactory().setCardinalityLimit(cardinalityLimit);
+ refinementOperatorPool.getFactory().setUseRestrictedDisjunction(useRestrictedDisjunction);
+ refinementOperatorPool.getFactory().setUseCardinalityRestrictions(useCardinalityRestrictions);
+ } else {
+ ParCELRefinementOperatorFactory opFactory;
+
+ // create a splitter and refinement operator pool
+ // there are two options: i) using object pool, ii) using set of objects (removed from
+ // this revision)
+ if (this.splitter != null) {
+ splitter.setReasoner(reasoner);
+ splitter.setPositiveExamples(positiveExamples);
+ splitter.setNegativeExamples(negativeExamples);
+ splitter.init();
+
+ Map> splits = splitter.computeSplits();
+
+ opFactory = new ParCELRefinementOperatorFactory((RhoDRDown) operator, splits);
+ } else { // no splitter provided create an object pool
+ opFactory = new ParCELRefinementOperatorFactory((RhoDRDown) operator);
+ }
+
+ refinementOperatorPool = new ParCELRefinementOperatorPool(opFactory);
+ refinementOperatorPool.setMaxIdle(numberOfWorkers + 1);
+ }
+ }
+
+ protected void createWorkerPool() {
+ taskQueue = new LinkedBlockingQueue<>(maxTaskQueueLength);
+
+ workerPool = new ThreadPoolExecutor(minNumberOfWorker, maxNumberOfWorker, keepAliveTime,
+ TimeUnit.MILLISECONDS, taskQueue, new ParCELWorkerThreadFactory());
+
+ if (logger.isInfoEnabled())
+ logger.info("Worker pool created, core pool size: " + workerPool.getCorePoolSize() +
+ ", max pool size: " + workerPool.getMaximumPoolSize());
+ }
+
+ public ParCELEvaluationResult getAccuracyAndCorrectness(OENode parent, OWLClassExpression refinement) {
+ if (parent == null) {
+ return getAccuracyAndCorrectnessRoot(refinement);
+ }
+
+ if (refinementOperatorPool.getFactory().getOperatorPrototype() instanceof DownwardRefinementOperator) {
+ return getAccuracyAndCorrectnessDownward(parent, refinement);
+ }
+
+ return getAccuracyAndCorrectnessUpward(parent, refinement);
+ }
+
+ protected ParCELEvaluationResult getAccuracyAndCorrectnessRoot(OWLClassExpression refinement) {
+ // TODO: only ParCELPosNegLP supported
+
+ ParCELPosNegLP posNegLP = (ParCELPosNegLP) learningProblem;
+
+ Set potentiallyCoveredPositives = posNegLP.getPositiveExamples();
+ Set potentiallyCoveredNegatives = posNegLP.getNegativeExamples();
+
+ return posNegLP.getAccuracyAndCorrectness5(
+ refinement, potentiallyCoveredPositives, potentiallyCoveredNegatives
+ );
+ }
+
+ protected ParCELEvaluationResult getAccuracyAndCorrectnessDownward(OENode parent, OWLClassExpression refinement) {
+ // TODO: only ParCELPosNegLP supported
+
+ Set potentiallyCoveredPositives = parent.getCoveredPositiveExamples();
+ Set potentiallyCoveredNegatives = parent.getCoveredNegativeExamples();
+
+ return ((ParCELPosNegLP) learningProblem).getAccuracyAndCorrectness5(
+ refinement, potentiallyCoveredPositives, potentiallyCoveredNegatives
+ );
+ }
+
+ protected ParCELEvaluationResult getAccuracyAndCorrectnessUpward(OENode parent, OWLClassExpression refinement) {
+ // TODO: only ParCELPosNegLP supported
+
+ ParCELPosNegLP posNegLP = (ParCELPosNegLP) learningProblem;
+
+ Set coveredPositives = getCoveredPositiveExamplesUpward(parent, refinement);
+ Set coveredNegatives = getCoveredNegativeExamplesUpward(parent, refinement);
+
+ return posNegLP.getAccuracyAndCorrectness4(coveredPositives, coveredNegatives);
+ }
+
+ protected Set getCoveredPositiveExamplesUpward(OENode parent, OWLClassExpression refinement) {
+ // TODO: only ParCELPosNegLP supported
+
+ ParCELPosNegLP posNegLP = (ParCELPosNegLP) learningProblem;
+
+ Set uncoveredPositives = new TreeSet<>(posNegLP.getPositiveExamples());
+ uncoveredPositives.removeAll(parent.getCoveredPositiveExamples());
+
+ Set coveredPositives = reasoner.hasType(refinement, uncoveredPositives);
+ coveredPositives.addAll(parent.getCoveredPositiveExamples());
+
+ return coveredPositives;
+ }
+
+ protected Set getCoveredNegativeExamplesUpward(OENode parent, OWLClassExpression refinement) {
+ // TODO: only ParCELPosNegLP supported
+
+ ParCELPosNegLP posNegLP = (ParCELPosNegLP) learningProblem;
+
+ Set uncoveredNegatives = new TreeSet<>(posNegLP.getNegativeExamples());
+ uncoveredNegatives.removeAll(parent.getCoveredNegativeExamples());
+
+ Set coveredNegatives = reasoner.hasType(refinement, uncoveredNegatives);
+ coveredNegatives.addAll(parent.getCoveredNegativeExamples());
+
+ return coveredNegatives;
+ }
+
+ /**
+ * Get the union of all the best (reduced) partial definitions
+ *
+ * @return An union of all reduced partial definitions
+ */
+ public OWLClassExpression getUnionCurrentlyBestDescription() {
+ List compactedDescriptions = getReducedPartialDefinition().stream()
+ .map(OENode::getDescription)
+ .collect(Collectors.toList());
+
+ return new OWLObjectUnionOfImplExt(compactedDescriptions);
+ }
+
+
+ /**
+ * Get the union of all the best (reduced) partial definitions using a given reducer
+ *
+ * @return An union of all reduced partial definitions
+ */
+ public OWLClassExpression getUnionCurrentlyBestDescription(ParCELReducer reducer) {
+ List compactedDescriptions = getReducedPartialDefinition(reducer).stream()
+ .map(OENode::getDescription)
+ .collect(Collectors.toList());
+
+ return new OWLObjectUnionOfImplExt(compactedDescriptions);
+ }
+
+
+ /**
+ * Get the max overall completeness so far
+ *
+ * @return max overall completeness
+ */
+ public abstract double getCurrentlyOveralMaxCompleteness();
+
+
+ /**
+ * Get the set of reduced partial definitions using default reducer
+ *
+ * @return set of reduced partial definitions
+ */
+ public abstract SortedSet getReducedPartialDefinition();
+
+
+ /**
+ * Get the number of reduced partial definitions
+ *
+ * @return number of reduced partial definitions
+ */
+ public int getNoOfReducedPartialDefinition() {
+ return noOfCompactedPartialDefinition;
+ }
+
+
+ /**
+ * Get the reduced partial definitions using the given reducer
+ *
+ * @param reducer Reducer which will be used to reduce the partial definitions
+ *
+ * @return reduced partial definitions
+ */
+ public abstract SortedSet getReducedPartialDefinition(ParCELReducer reducer);
+
+
+ //===========================================
+ // call-back methods for workers
+ //===========================================
+ /**
+ * Update the max horizontal expansion
+ *
+ * @param newHozExp New horizontal expansion
+ */
+ public synchronized void updateMaxHorizontalExpansion(int newHozExp) {
+ if (maxHorizExp < newHozExp) {
+ maxHorizExp = newHozExp;
+ }
+ }
+
+ public int getMaximumHorizontalExpansion() {
+ return maxHorizExp;
+ }
+
+
+ /**
+ * ============================================================================================
+ * Callback method for worker when the evaluated node is not a partial definition and weak node
+ * either
+ *
+ * NOTE: there is not need for using synchronisation for this method since the thread safe data
+ * structure is currently using
+ *
+ * @param newNodes
+ * New nodes to add to the search tree
+ */
+ public void newRefinementDescriptions(Set newNodes) {
+ searchTree.addAll(newNodes);
+ }
+
+
+ /*
+ *
+ * Get the learning time in milisecond. Learning time does not include the reduction time
+ */
+ public long getLearningTime() {
+ return miliLearningTime;
+ }
+
+ /**
+ * Get total number of partial definitions found so far
+ *
+ * @return Number of partial definitions
+ */
+ public long getNumberOfPartialDefinitions() {
+ return this.partialDefinitions.size();
+ }
+
+ /**
+ * Add a description into search tree. No synchronization is needed since safe-thread is using
+ *
+ * @param des
+ * Description to be added
+ *
+ * @return True is the description can be added (has not been in the search tree/all
+ * descriptions set
+ */
+ public boolean addDescription(OWLClassExpression des) {
+ return this.allDescriptions.add(des);
+ }
+
+ // -------------------------------------------------------
+ // setters and getters for learner configuration options
+ // -------------------------------------------------------
+
+ //number of workers
+ public void setNumberOfWorkers(int numberOfWorkers) {
+ this.numberOfWorkers = numberOfWorkers;
+ }
+
+ public int getNumberOfWorkers() {
+ return numberOfWorkers;
+ }
+
+ //time out (max execution time)
+ public void setMaxExecutionTimeInSeconds(int maxExecutionTime) {
+ this.maxExecutionTimeInSeconds = maxExecutionTime;
+ }
+
+ public long getMaxExecutionTimeInSeconds() {
+ return maxExecutionTimeInSeconds;
+ }
+
+ //noise allowed
+ public void setNoisePercentage(double noise) {
+ this.noisePercentage = noise;
+ }
+
+ public double getNoisePercentage() {
+ return this.noisePercentage;
+ }
+
+ //max no of splits
+ public int getMaxNoOfSplits() {
+ return maxNoOfSplits;
+ }
+
+ public void setMaxNoOfSplits(int maxNoOfSplits) {
+ this.maxNoOfSplits = maxNoOfSplits;
+ }
+
+ //ignored concepts
+ public Set getIgnoredConcepts() {
+ return ignoredConcepts;
+ }
+
+ public void setIgnoredConcepts(Set ignoredConcepts) {
+ this.ignoredConcepts = ignoredConcepts;
+ }
+
+ //minimal covered of the partial definitions
+ public double getMinimalCoverage() {
+ return minimalCoverage;
+ }
+
+ public void setMinimalCoverage(double minimalCoverage) {
+ this.minimalCoverage = minimalCoverage;
+ }
+
+ public ParCELReducer getReducer() {
+ return this.reducer;
+ }
+
+ public String getBaseURI() {
+ return reasoner.getBaseURI();
+ }
+
+ public Map getPrefix() {
+ return reasoner.getPrefixes();
+ }
+
+ public long getTotalNumberOfDescriptionsGenerated() {
+ return this.allDescriptions.size();
+ }
+
+ public boolean getUseHasValue() {
+ return this.useHasValue;
+ }
+
+ public void setUseHasValue(boolean useHasValue) {
+ this.useHasValue = useHasValue;
+ }
+
+ public boolean getUseHasData() {
+ return this.useHasData;
+ }
+
+ public void setUseNegation(boolean useNegation) {
+ this.useNegation = useNegation;
+ }
+
+ public boolean getUseNegation() {
+ return this.useNegation;
+ }
+
+ public void setUseHasData(boolean useHasData) {
+ this.useHasData = useHasData;
+ }
+
+ public void setUseCardinalityRestrictions(boolean useCardinalityRestrictions) {
+ this.useCardinalityRestrictions = useCardinalityRestrictions;
+ }
+
+ public boolean getUseCardinalityRestrictions() {
+ return this.useCardinalityRestrictions;
+ }
+
+ public void setCardinalityLimit(int cardinalityLimit) {
+ this.cardinalityLimit = cardinalityLimit;
+ }
+
+ public int getCardinalityLimit() {
+ return this.cardinalityLimit;
+ }
+
+ public boolean isUseRestrictedDisjunction() {
+ return useRestrictedDisjunction;
+ }
+
+ public void setUseRestrictedDisjunction(boolean useRestrictedDisjunction) {
+ this.useRestrictedDisjunction = useRestrictedDisjunction;
+ }
+
+ public void setOperator(RefinementOperator refinementOp) {
+ this.operator = refinementOp;
+ }
+
+ public RefinementOperator getOperator() {
+ return this.operator;
+ }
+
+ public void setSplitter(ParCELDoubleSplitterAbstract splitter) {
+ this.splitter = splitter;
+ }
+
+ public void setStartClass(OWLClassExpression startClass) {
+ this.startClass = startClass;
+ }
+
+ @Override
+ public int getWorkerPoolSize() {
+ return this.workerPool.getQueue().size();
+ }
+
+ /**
+ * ============================================================================================
+ * Stop the learning algorithm: Stop the workers and set the "stop" flag to true
+ */
+ @Override
+ public void stop() {
+
+ if (!stop) {
+ stop = true;
+ workerPool.shutdownNow();
+
+ //wait until all workers are terminated
+ try {
+ //System.out.println("-------------Waiting for worker pool----------------");
+ workerPool.awaitTermination(10, TimeUnit.SECONDS);
+ }
+ catch (InterruptedException ie) {
+ logger.error(ie);
+ }
+ }
+ }
+
+ /**=========================================================================================================
+ * Set heuristic will be used
+ *
+ * @param newHeuristic
+ */
+ public void setHeuristic(ParCELHeuristic newHeuristic) {
+ this.heuristic = newHeuristic;
+
+ if (logger.isInfoEnabled())
+ logger.info("Changing heuristic to " + newHeuristic.getClass().getName());
+ }
+
+ public boolean isTimeout() {
+ timeout = (this.maxExecutionTimeInSeconds > 0 && (getCurrentCpuMillis()) > this.maxExecutionTimeInSeconds * 1000);
+ return timeout;
+ }
+
+ public boolean isDone() {
+ return done;
+ }
+
+ @Override
+ public boolean isRunning() {
+ return !stop && !done && !timeout;
+ }
+
+ /**
+ * ============================================================================================
+ * Check if the learner can be terminated
+ *
+ * @return True if termination condition is true (manual stop inquiry, complete definition
+ * found, or timeout), false otherwise
+ */
+ protected boolean isTerminateCriteriaSatisfied() {
+ return stop || done || isTimeout();
+ // (Runtime.getRuntime().totalMemory() >= this.maxHeapSize
+ // && Runtime.getRuntime().freeMemory() < this.outOfMemory);
+ }
+
+ /**
+ * Check whether the learner is terminated by the partial definitions
+ *
+ * @return True if the learner is terminated by the partial definitions, false otherwise
+ */
+ public boolean terminatedByPartialDefinitions() {
+ return this.done;
+ }
+
+ protected double getNoiseAllowed() {
+ return noiseAllowed;
+ }
+
+ /**
+ * ============================================================================================
+ * Get the currently best description in the set of partial definition
+ */
+ @Override
+ public OWLClassExpression getCurrentlyBestDescription() {
+ if (!partialDefinitions.isEmpty()) {
+ return partialDefinitions.iterator().next().getDescription();
+ } else
+ return null;
+ }
+
+ /**
+ * ============================================================================================
+ * Get all partial definition without any associated information such as accuracy, correctness,
+ * etc.
+ */
+ @Override
+ public List getCurrentlyBestDescriptions() {
+ return PLOENodesToDescriptions(partialDefinitions);
+ }
+
+ /**
+ * ============================================================================================
+ * Convert a set of PLOENode into a list of descriptions
+ *
+ * @param nodes
+ * Set of PLOENode need to be converted
+ *
+ * @return Set of descriptions corresponding to the given set of PLOENode
+ */
+ private List PLOENodesToDescriptions(Set nodes) {
+ List result = new LinkedList<>();
+ for (ParCELExtraNode node : nodes)
+ result.add(node.getDescription());
+ return result;
+ }
+
+ /**
+ * ============================================================================================
+ * The same as getCurrentBestDescription. An evaluated description is a description with its
+ * evaluated properties including accuracy and correctness
+ */
+ @Override
+ public EvaluatedDescription extends Score> getCurrentlyBestEvaluatedDescription() {
+ if (!partialDefinitions.isEmpty()) {
+ ParCELNode firstNode = partialDefinitions.first();
+ return new EvaluatedDescription<>(firstNode.getDescription(), new ParCELScore(firstNode));
+ } else
+ return null;
+ }
+
+ /**
+ * ============================================================================================
+ * Get all partial definitions found so far
+ */
+ @Override
+ public NavigableSet extends EvaluatedDescription extends Score>> getCurrentlyBestEvaluatedDescriptions() {
+ return extraPLOENodesToEvaluatedDescriptions(partialDefinitions);
+ }
+
+ /**
+ * ============================================================================================
+ * Method for PLOENode - EvaluatedDescription conversion
+ *
+ * @param partialDefs
+ * Set of ExtraPLOENode nodes which will be converted into EvaluatedDescription
+ *
+ * @return Set of corresponding EvaluatedDescription
+ */
+ private NavigableSet extends EvaluatedDescription extends Score>> extraPLOENodesToEvaluatedDescriptions(
+ Set partialDefs) {
+ TreeSet> result = new TreeSet<>(
+ new EvaluatedDescriptionComparator());
+ for (ParCELExtraNode node : partialDefs) {
+ result.add(new EvaluatedDescription<>(node.getDescription(), new ParCELScore(node)));
+ }
+ return result;
+ }
+ /**
+ * Get all unreduced partial definitions
+ *
+ * @return unreduced partial definitions
+ */
+ public Set getPartialDefinitions() {
+ return partialDefinitions;
+ }
+
+
+ public Collection getSearchTree() {
+ return searchTree;
+ }
+
+ public ParCELHeuristic getHeuristic() {
+ return heuristic;
+ }
+
+ public int getSearchTreeSize() {
+ return (searchTree != null ? searchTree.size() : -1);
+ }
+
+ public long getMiliStarttime() {
+ return this.miliStarttime;
+ }
+
+ public long getMiliLearningTime() {
+ return miliLearningTime;
+ }
+
+ public double getMaxAccuracy() {
+ return maxAccuracy;
+ }
+
+ public int getCurrentlyBestDescriptionLength() {
+ return bestDescriptionLength;
+ }
+
+
+ @Override
+ public long getTotalDescriptions() {
+ return allDescriptions.size();
+ }
+
+ @Override
+ public double getCurrentlyBestAccuracy() {
+ return ((positiveExamples.size() - uncoveredPositiveExamples.size()) + negativeExamples.size()) /
+ (double)(positiveExamples.size() + negativeExamples.size());
+ }
+
+ @Override
+ public int getCurrentlyMaxExpansion() {
+ return this.currentMaxHorizExp;
+ }
+
+ protected void printSearchTree(ParCELExtraNode node) {
+ List processingNodes = new LinkedList<>();
+
+ processingNodes.add(node);
+
+ processingNodes.addAll(node.getCompositeNodes());
+
+ for (OENode n : processingNodes) {
+ OENode parent = n.getParent();
+ while (parent != null) {
+ logger.debug(" <-- " + OWLAPIRenderers.toManchesterOWLSyntax(parent.getDescription()));
+ //" [acc:" + df.format(parent.getAccuracy()) +
+ //", correctness:" + df.format(parent.getCorrectness()) + ", completeness:" + df.format(parent.getCompleteness()) +
+ //", score:" + df.format(this.heuristic.getScore(parent)) + "]");
+
+ //print out the children nodes
+ Collection children = parent.getChildren();
+ for (OENode child : children) {
+ OENode tmp = child;
+ logger.debug(" --> " + OWLAPIRenderers.toManchesterOWLSyntax(tmp.getDescription()));
+ //" [acc:" + df.format(tmp.getAccuracy()) +
+ //", correctness:" + df.format(tmp.getCorrectness()) + ", completeness:" + df.format(tmp.getCompleteness()) +
+ //", score:" + df.format(this.heuristic.getScore(tmp)) + "]");
+ }
+ parent = parent.getParent();
+ } //while parent is not null
+
+ logger.debug("===============");
+
+ }
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCompletenessComparator.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCompletenessComparator.java
new file mode 100644
index 0000000000..1bc44cdb18
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCompletenessComparator.java
@@ -0,0 +1,37 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.Comparator;
+
+import org.dllearner.utilities.owl.OWLClassExpressionLengthCalculator;
+
+/**
+ * Use to compare 2 ParCELExtraNode nodes based on their completeness (coverage). The description
+ * length and ConceptComparator will be used it they have equal coverage
+ *
+ * @author An C. Tran
+ *
+ */
+public class ParCELCompletenessComparator implements Comparator {
+
+ @Override
+ public int compare(ParCELNode node1, ParCELNode node2) {
+
+ int v1 = node1.getNumberOfCoveredPositiveExamples();
+ int v2 = node2.getNumberOfCoveredPositiveExamples();
+
+ if (v1 > v2)
+ return -1;
+ else if (v1 < v2)
+ return 1;
+ else {
+ int len1 = new OWLClassExpressionLengthCalculator().getLength(node1.getDescription());
+ int len2 = new OWLClassExpressionLengthCalculator().getLength(node2.getDescription());
+ if (len1 < len2)
+ return -1;
+ else if (len1 > len2)
+ return 1;
+ else
+ return node1.getDescription().compareTo(node2.getDescription());
+ }
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCorrectnessComparator.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCorrectnessComparator.java
new file mode 100644
index 0000000000..a822b29bd9
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCorrectnessComparator.java
@@ -0,0 +1,36 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.Comparator;
+
+import org.dllearner.utilities.owl.OWLClassExpressionLengthCalculator;
+
+/**
+ * Used to compare 2 ParCELExtraNode nodes based on their correctness. The description length and
+ * ConceptComparator will be used it they have equal coverage
+ *
+ * @author An C. Tran
+ */
+public class ParCELCorrectnessComparator implements Comparator {
+
+ @Override
+ public int compare(ParCELExtraNode node1, ParCELExtraNode node2) {
+ double correctness1 = node1.getCorrectness();
+ double correctness2 = node2.getCorrectness();
+
+ if (correctness1 > correctness2)
+ return -1; // smaller will be on the top
+ else if (correctness1 < correctness2)
+ return 1;
+ else {
+ int len1 = new OWLClassExpressionLengthCalculator().getLength(node1.getDescription());
+ int len2 = new OWLClassExpressionLengthCalculator().getLength(node2.getDescription());
+
+ if (len1 < len2)
+ return -1;
+ else if (len1 > len2)
+ return 1;
+ else
+ return node1.getDescription().compareTo(node2.getDescription());
+ }
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCoveredNegativeExampleComparator.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCoveredNegativeExampleComparator.java
new file mode 100644
index 0000000000..0a981c8d8d
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELCoveredNegativeExampleComparator.java
@@ -0,0 +1,37 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.Comparator;
+
+import org.dllearner.utilities.owl.OWLClassExpressionLengthCalculator;
+
+/**
+ * Use to compare 2 ParCELExtraNode nodes based on the number of covered negative examples. The
+ * description length and ConceptComparator will be used it they have equal coverage
+ *
+ * @author An C. Tran
+ *
+ */
+public class ParCELCoveredNegativeExampleComparator implements Comparator {
+
+ @Override
+ public int compare(ParCELExtraNode node1, ParCELExtraNode node2) {
+ int coveredNeg1 = node1.getNumberOfCoveredNegativeExamples();
+ int coveredNeg2 = node2.getNumberOfCoveredNegativeExamples();
+
+ if (coveredNeg1 > coveredNeg2)
+ return -1; // smaller will be on the top
+ else if (coveredNeg1 < coveredNeg2)
+ return 1;
+ else {
+ int len1 = new OWLClassExpressionLengthCalculator().getLength(node1.getDescription());
+ int len2 = new OWLClassExpressionLengthCalculator().getLength(node2.getDescription());
+
+ if (len1 < len2)
+ return -1;
+ else if (len1 > len2)
+ return 1;
+ else
+ return node1.getDescription().compareTo(node2.getDescription());
+ }
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefaultHeuristic.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefaultHeuristic.java
new file mode 100644
index 0000000000..18b33f85b5
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefaultHeuristic.java
@@ -0,0 +1,137 @@
+package org.dllearner.algorithms.parcel;
+
+
+/**
+ * Implements the heuristic used to expand the search tree. Dimensions used:
+ * + correctness: main value
+ * + horizontal expansion: penalty
+ * + accuracy gained from the parent node: bonus
+ * + refinement nodes: penalty
+ * + concept type + name (org.dllearner.utilities.owl.ConceptComparator)
+ *
+ * @author An C. Tran
+ */
+public class ParCELDefaultHeuristic implements ParCELHeuristic {
+
+ //correct
+ protected double correctnessFactor = 1.0;
+
+ // penalty for long descriptions
+ protected double expansionPenaltyFactor = 0.05; //0.01, 0.05
+
+ // bonus for gained accuracy
+ protected double gainBonusFactor = 0.2; //0.1, 0.2
+
+ // penalty if a node description has very many refinements since exploring
+ // such a node is computationally very expensive
+ protected double nodeRefinementPenalty = 0.0001;
+
+ // award for node with high accuracy
+ protected double accuracyAwardFactor = 0.01; //0.01
+
+
+ /**
+ * Compare two node
+ *
+ * @param node1 Node to compare
+ * @param node2 Node to compare
+ * @return 1 if node1 "greater" than node2 and vice versa
+ */
+ @Override
+ public int compare(ParCELNode node1, ParCELNode node2) {
+ double diff = getNodeScore(node1) - getNodeScore(node2);
+
+ if (diff > 0) { // node1 has better score than node2
+ return 1;
+ } else if (diff < 0) {
+ return -1;
+ } else {
+ // syntactic comparison as final comparison criterion
+ int comp = node1.getDescription().compareTo(node2.getDescription());
+// return comp;
+
+ // this allows duplicate descriptions exists in the set (with dif. horz. value)
+ if (comp != 0)
+ return comp;
+ else
+ return -1;
+ }
+ }
+
+ /**
+ * Calculate score for a node which is used as the searching heuristic
+ *
+ * @param node Node to be scored
+ * @return Score of the node
+ */
+ protected double getNodeScore(ParCELNode node) {
+
+ // the scoring mainly bases on correctness
+ double score = node.getCorrectness();// * correctnessFactor;
+
+ // bonus for the accuracy gained
+ if (!node.isRoot()) {
+ double parentAccuracy = ((ParCELNode) (node.getParent())).getAccuracy();
+ score += (parentAccuracy - node.getAccuracy()) * gainBonusFactor;
+ }
+
+ // award node with high accuracy
+ score += node.getAccuracy() * accuracyAwardFactor;
+
+ // penalty for horizontal expansion
+ score -= node.getHorizontalExpansion() * expansionPenaltyFactor;
+ //score -= node.getDescription().getLength() * expansionPenaltyFactor;
+
+ score -= node.getRefinementCount() * nodeRefinementPenalty;
+
+ return score;
+ }
+
+ @Override
+ public double getScore(ParCELNode node) {
+ return this.getNodeScore(node);
+ }
+
+ public double getCorrectnessFactor() {
+ return correctnessFactor;
+ }
+
+ public void setCorrectnessFactor(double correctnessFactor) {
+ this.correctnessFactor = correctnessFactor;
+ }
+
+ public double getExpansionPenaltyFactor() {
+ return expansionPenaltyFactor;
+ }
+
+ public void setExpansionPenaltyFactor(double expansionPenaltyFactor) {
+ this.expansionPenaltyFactor = expansionPenaltyFactor;
+ //System.out.println("ExpansionPenaltyFactor changed: " + expansionPenaltyFactor);
+ }
+
+ public double getGainBonusFactor() {
+ return gainBonusFactor;
+ }
+
+ public void setGainBonusFactor(double gainBonusFactor) {
+ this.gainBonusFactor = gainBonusFactor;
+ }
+
+ public double getNodeRefinementPenalty() {
+ return nodeRefinementPenalty;
+ }
+
+ public void setNodeRefinementPenalty(double nodeRefinementPenalty) {
+ this.nodeRefinementPenalty = nodeRefinementPenalty;
+ }
+
+ public double getAccuracyAwardFactor() {
+ return accuracyAwardFactor;
+ }
+
+ public void setAccuracyAwardFactor(double accuracyAwardFactor) {
+ this.accuracyAwardFactor = accuracyAwardFactor;
+ }
+
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefinitionGenerationTimeComparator.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefinitionGenerationTimeComparator.java
new file mode 100644
index 0000000000..fc812d6a7c
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefinitionGenerationTimeComparator.java
@@ -0,0 +1,40 @@
+package org.dllearner.algorithms.parcel;
+
+/**
+ * Compare two node based on their generation time.
+ * This will be used in the Generation Time Greedy Compactness strategy
+ * GOLR
+ *
+ * @author An C. Tran
+ */
+
+import java.util.Comparator;
+
+import org.dllearner.utilities.owl.OWLClassExpressionLengthCalculator;
+import org.dllearner.utilities.owl.OWLClassExpressionUtils;
+
+public class ParCELDefinitionGenerationTimeComparator implements
+ Comparator {
+
+ @Override
+ public int compare(ParCELExtraNode node1, ParCELExtraNode node2) {
+ double genTime1 = node1.getGenerationTime();
+ double genTime2 = node2.getGenerationTime();
+
+ if (genTime1 < genTime2)
+ return -1;
+ else if (genTime1 > genTime2)
+ return 1;
+ else {
+ if (OWLClassExpressionUtils.getLength(node1.getDescription()) <
+ OWLClassExpressionUtils.getLength(node2.getDescription()))
+ return -1;
+ else if (OWLClassExpressionUtils.getLength(node1.getDescription()) >
+ OWLClassExpressionUtils.getLength(node2.getDescription()))
+ return 1;
+ else
+ return node1.getDescription().compareTo(node2.getDescription());
+ }
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefinitionLengthComparator.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefinitionLengthComparator.java
new file mode 100644
index 0000000000..8c1339c61f
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELDefinitionLengthComparator.java
@@ -0,0 +1,35 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.Comparator;
+
+import com.google.common.collect.ComparisonChain;
+import org.dllearner.utilities.owl.OWLClassExpressionLengthCalculator;
+import org.dllearner.utilities.owl.OWLClassExpressionUtils;
+
+/**
+ * Compare two node based on their definition length. This will be used in the Definition Length
+ * Greedy Compactness strategy
+ *
+ * @author An C. Tran
+ *
+ */
+
+public class ParCELDefinitionLengthComparator implements Comparator {
+
+ @Override
+ public int compare(ParCELExtraNode node1, ParCELExtraNode node2) {
+ ComparisonChain.start().compare(OWLClassExpressionUtils.getLength(node1.getDescription()),
+ OWLClassExpressionUtils.getLength(node2.getDescription()))
+ .compare(node1.getDescription(), node2.getDescription())
+ .result();
+ int len1 = new OWLClassExpressionLengthCalculator().getLength(node1.getDescription());
+ int len2 = new OWLClassExpressionLengthCalculator().getLength(node2.getDescription());
+ if (len1 < len2)
+ return -1;
+ else if (len1 > len2)
+ return 1;
+ else
+ return node1.getDescription().compareTo(node2.getDescription());
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELEvaluationResult.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELEvaluationResult.java
new file mode 100644
index 0000000000..902263e405
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELEvaluationResult.java
@@ -0,0 +1,146 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * This class represents the result of an evaluation result return from the ParCEL learning problem
+ * (ParCELPosNegLP)
+ * Information included in a ParCEL evaluation includes:
+ *
+ * - - accuracy: double
+ * - - correctness: double
+ * - - completeness: double
+ * - - covered positive examples: Set<Individual>
+ * - - covered negative examples: Set<Individual>
+ *
+ *
+ * @author An C. Tran
+ *
+ */
+public class ParCELEvaluationResult {
+
+ protected double accuracy = 0.0;
+ protected double correctness = 0.0;
+ protected double completeness = 0.0;
+ protected Set coveredPositiveExamples = null;
+ protected Set coveredNegativeExamples = null;
+
+ /**
+ * Default constructor, use default value for all properties
+ */
+
+ public ParCELEvaluationResult() {
+
+ }
+
+
+ /**
+ * This use to create an evaluation result that is not for a correct definition. So, we don't
+ * need to hold the set of covered positive examples
+ *
+ * @param accuracy
+ * @param correctness
+ */
+ public ParCELEvaluationResult(double accuracy, double correctness, double completeness) {
+ this.accuracy = accuracy;
+ this.correctness = correctness;
+ this.completeness = completeness;
+ }
+
+ /**
+ * Used to create an evaluation result for a correct definition.
+ * Covered positive examples have to be kept to be used in the result compactness later on.
+ * This is usually used in case of partial definition
+ *
+ * @param accuracy
+ * @param correctness
+ * @param completeness
+ * @param coveredPositiveExamples
+ */
+ public ParCELEvaluationResult(double accuracy, double correctness, double completeness,
+ Set coveredPositiveExamples) {
+ this.accuracy = accuracy;
+ this.correctness = correctness;
+ this.completeness = completeness;
+ this.coveredPositiveExamples = coveredPositiveExamples;
+ }
+
+ /**
+ * Used to create an evaluation result for a correct definition.
+ * Both covered positive examples and covered negative examples will be kept to be used in the
+ * result compactness later on.
+ *
+ * @param accuracy
+ * @param correctness
+ * @param completeness
+ * @param coveredPositiveExamples
+ * @param coveredNegativeExamples
+ */
+ public ParCELEvaluationResult(double accuracy, double correctness, double completeness,
+ Set coveredPositiveExamples, Set coveredNegativeExamples) {
+
+ this.accuracy = accuracy;
+ this.correctness = correctness;
+ this.completeness = completeness;
+ this.coveredPositiveExamples = coveredPositiveExamples;
+ this.coveredNegativeExamples = coveredNegativeExamples;
+ }
+
+ // ---------------------------------------
+ // Getters and setters
+ // ---------------------------------------
+ public double getAccuracy() {
+ return accuracy;
+ }
+
+ public void setAccuracy(double accuracy) {
+ this.accuracy = accuracy;
+ }
+
+ public double getCorrectness() {
+ return correctness;
+ }
+
+ public void setCorrectness(double correctness) {
+ this.correctness = correctness;
+ }
+
+ public double getCompleteness() {
+ return completeness;
+ }
+
+ public void setCompleteness(double completeness) {
+ this.completeness = completeness;
+ }
+
+ public Set getCoveredPositiveExamples() {
+ return coveredPositiveExamples;
+ }
+
+ public void setCoveredPositiveExamples(Set coveredPositiveExamples) {
+ this.coveredPositiveExamples = coveredPositiveExamples;
+ }
+
+ public Set getCoveredNegativeExamples() {
+ return coveredNegativeExamples;
+ }
+
+ public void setCoveredNegativeExamples(Set coveredNegativeExamples) {
+ this.coveredNegativeExamples = coveredNegativeExamples;
+ }
+
+ @Override
+ public String toString() {
+ return "ParCELEvaluationResult{" +
+ "accuracy=" + accuracy +
+ ", correctness=" + correctness +
+ ", completeness=" + completeness +
+ ", coveredPositiveExamples=" + Optional.ofNullable(coveredPositiveExamples).map(Set::size).map(Object::toString).orElse("n/a") +
+ ", coveredNegativeExamples=" + Optional.ofNullable(coveredNegativeExamples).map(Set::size).map(Objects::toString).orElse("n/a") +
+ '}';
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELExtraNode.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELExtraNode.java
new file mode 100644
index 0000000000..23a873b78e
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELExtraNode.java
@@ -0,0 +1,166 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.dllearner.algorithms.celoe.OENode;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * Generation time and node type properties are added. This information is needed by some
+ * reduction algorithms
+ *
+ * @author An C. Tran
+ *
+ */
+public class ParCELExtraNode extends ParCELNode {
+
+ protected double generationTime = Double.MIN_VALUE; // time in ms that the node was generated
+ protected double extraInfo = Double.MIN_VALUE;
+ protected int type = -1;
+
+ /**
+ * Nodes in the search tree that constitute this node (in case this node a a combination of a
+ * description with counter partial definitions) to form a partial definition
+ */
+ final Set compositeNodes = new HashSet<>();
+
+
+ /**
+ * Constructor
+ *
+ * @param parentNode
+ * @param description
+ * @param accuracy
+ */
+ /*
+ public ParCELExtraNode(OENode parentNode, Description description, double accuracy) {
+ super(parentNode, description, accuracy);
+ }
+ */
+
+ /**
+ * Constructor
+ *
+ * @param parentNode
+ * @param description
+ * @param accuracy
+ * @param correctness
+ */
+ /*
+ public ParCELExtraNode(OENode parentNode, Description description, double accuracy,
+ double correctness) {
+ super(parentNode, description, accuracy, correctness);
+ }
+ */
+
+
+ /**
+ * Create a ParCELExtraNode from an OENode
+ * @param node
+ */
+ public ParCELExtraNode(ParCELNode node) {
+ super(node.getParent(), node.getDescription(), node.getAccuracy(), node.getCorrectness(), node.getCompleteness());
+ setCoveredPositiveExamples(node.getCoveredPositiveExamples());
+ setCoveredNegativeExamples(node.getCoveredNegativeExamples());
+ }
+
+ /**
+ * Create a node with a set of positive examples covered by the description of the node
+ *
+ * @param node
+ * @param cp
+ */
+ public ParCELExtraNode(ParCELNode node, Set cp) {
+ super(node.getParent(), node.getDescription(), node.getAccuracy(), node.getCorrectness(), node.getCompleteness());
+ setCoveredPositiveExamples(cp);
+ }
+
+ /**
+ * Constructor with the correctness and the generation time of the description
+ *
+ * @param parentNode
+ * @param description
+ * @param accuracy
+ * @param correctness
+ */
+ /*
+ public ParCELExtraNode(ParCELNode parentNode, Description description, double accuracy,
+ double correctness) {
+ super(parentNode, description, accuracy, correctness);
+ this.coveredPositiveExamples = null;
+ }
+
+ */
+ /**
+ * Constructor with the set of positive examples covered by the description of the node
+ *
+ * @param parentNode
+ * @param description
+ * @param accuracy
+ * @param correctness
+ * @param cp Covered positive examples
+ */
+ public ParCELExtraNode(ParCELNode parentNode, OWLClassExpression description, double accuracy,
+ double correctness, double completeness, Set cp)
+ {
+ super(parentNode, description, accuracy, correctness, completeness);
+ setCoveredPositiveExamples(cp);
+ }
+
+
+ /**
+ * Constructor with the set of positive examples covered by the description of the node
+ *
+ * @param parentNode
+ * @param description
+ * @param accuracy
+ * @param correctness
+ * @param cn Covered positive examples
+ */
+ public ParCELExtraNode(ParCELNode parentNode, OWLClassExpression description, double accuracy,
+ double correctness, double completeness, Set cp, Set cn)
+ {
+ super(parentNode, description, accuracy, correctness, completeness);
+ setCoveredPositiveExamples(cp);
+ setCoveredNegativeExamples(cn);
+ }
+
+
+ // -------------------------
+ // getters and setters
+ // -------------------------
+
+ public double getGenerationTime() {
+ return generationTime;
+ }
+
+ public void setGenerationTime(double d) {
+ this.generationTime = d;
+ }
+
+ public double getExtraInfo() {
+ return extraInfo;
+ }
+
+ public void setExtraInfo(double d) {
+ this.extraInfo = d;
+ }
+
+ public void setType(int t) {
+ this.type = t;
+ }
+
+ public int getType() {
+ return this.type;
+ }
+
+ public void setCompositeList(Set compositeNodes) {
+ this.compositeNodes.addAll(compositeNodes);
+ }
+
+ public Set getCompositeNodes() {
+ return this.compositeNodes;
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELHeuristic.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELHeuristic.java
new file mode 100644
index 0000000000..ace47b3ae7
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELHeuristic.java
@@ -0,0 +1,16 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.Comparator;
+
+/**
+ * Interface for heuristics used in ParCEL
+ *
+ * @author An C. Tran
+ *
+ */
+public interface ParCELHeuristic extends Comparator {
+
+ int compare(ParCELNode node1, ParCELNode node2);
+
+ double getScore(ParCELNode node);
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELNode.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELNode.java
new file mode 100644
index 0000000000..a52b007f1a
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELNode.java
@@ -0,0 +1,92 @@
+package org.dllearner.algorithms.parcel;
+
+import java.text.DecimalFormat;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.dllearner.algorithms.celoe.OENode;
+import org.dllearner.core.LearningProblem;
+import org.dllearner.learningproblems.PosNegLP;
+import org.dllearner.utilities.owl.OWLAPIRenderers;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * Represents a node in the search tree used in the ParCEL
+ * A node includes description and its corresponding properties such as: correctness, accuracy,
+ * distance between the description the leaning problem, its parent node, etc.
+ *
+ * @author An C. Tran
+ *
+ */
+public class ParCELNode extends OENode {
+
+ private double correctness = -1.0;
+ private double completeness = -1.0;
+
+ private final DecimalFormat dfPercent = new DecimalFormat("0.00%");
+
+
+ public ParCELNode(OENode parentNode, OWLClassExpression description, double accuracy,
+ double correctness, double completeness) {
+ super(description, accuracy);
+ setParent(parentNode);
+ this.correctness = correctness;
+ this.completeness = completeness;
+ }
+
+
+ public ParCELNode(OENode parentNode, OWLClassExpression description,
+ Set coveredPositiveExamples, Set coveredNegativeExamples) {
+ super(description, 0);
+ setParent(parentNode);
+ setCoveredPositiveExamples(coveredPositiveExamples);
+ setCoveredNegativeExamples(coveredNegativeExamples);
+ }
+
+ public void setCorrectness(double cor) {
+ this.correctness = cor;
+ }
+
+ public double getCorrectness() {
+ return this.correctness;
+ }
+
+ public void setCompleteness(double comp) {
+ this.completeness = comp;
+ }
+
+ public double getCompleteness() {
+ return this.completeness;
+ }
+
+ public void setAccuracy(double acc) {
+ this.accuracy = acc;
+ }
+
+ @Override
+ public String toString() {
+ String ret = OWLAPIRenderers.toManchesterOWLSyntax(this.getDescription());
+ ret += " [acc:" + dfPercent.format(this.getAccuracy());
+ ret += ", cor:" + dfPercent.format(this.getCorrectness());
+ ret += ", comp:" + dfPercent.format(this.completeness);
+ ret += ", horz:" + this.horizontalExpansion + "]";
+ return ret;
+
+ }
+
+ public void setDescription(OWLClassExpression newDescription) {
+ this.description = newDescription;
+ }
+
+ public static void enableCompactCoverageRepresentation(LearningProblem learningProblem) {
+ if (!(learningProblem instanceof ParCELPosNegLP)) {
+ throw new UnsupportedOperationException("Compacted coverage representation is only supported for ParCELPosNegLP learning problems.");
+ }
+
+ Set positives = ((ParCELPosNegLP) learningProblem).getPositiveExamples();
+ Set negatives = ((ParCELPosNegLP) learningProblem).getNegativeExamples();
+
+ enableCompactCoverageRepresentation(positives, negatives);
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELOntologyUtil.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELOntologyUtil.java
new file mode 100644
index 0000000000..76774db1b6
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELOntologyUtil.java
@@ -0,0 +1,83 @@
+package org.dllearner.algorithms.parcel;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import org.semanticweb.owlapi.apibinding.OWLManager;
+import org.semanticweb.owlapi.model.IRI;
+import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.model.OWLOntologyCreationException;
+import org.semanticweb.owlapi.model.OWLOntologyManager;
+import org.semanticweb.owlapi.model.OWLOntologyStorageException;
+
+/**
+ * Implementation of some utility functions for ontology manipulation
+ *
+ * @author An C. Tran
+ *
+ */
+
+public class ParCELOntologyUtil {
+
+ /**
+ * Load ontology from file into memery given its path
+ *
+ * @param ontologyFilePath
+ *
+ * @return Opened ontology
+ * @throws OWLOntologyCreationException
+ */
+ public static OWLOntology loadOntology(String ontologyFilePath)
+ throws OWLOntologyCreationException {
+ OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
+ OWLOntology ontology;
+
+ String flash = (System.getProperty("os.name").contains("Windows")) ? "/" : "";
+
+ File f = new File(ontologyFilePath);
+
+ if (!ontologyFilePath.contains("file:"))
+ ontologyFilePath = "file:" + flash + f.getAbsolutePath();
+
+ ontologyFilePath = ontologyFilePath.replace('\\', '/');
+
+ ontology = manager.loadOntology(IRI.create(ontologyFilePath));
+
+ return ontology;
+ }
+
+ /**
+ * Persist the ontology
+ *
+ * @param ontology Ontology which need to be persisted
+ *
+ * @throws OWLOntologyStorageException
+ */
+ public static void persistOntology(OWLOntology ontology) throws OWLOntologyStorageException {
+ OWLOntologyManager manager = ontology.getOWLOntologyManager();
+ manager.saveOntology(ontology);
+ }
+
+ /**
+ * Save an ontology to another file
+ *
+ * @param ontology
+ * Ontology contains changes
+ * @param newFilePath
+ * Path to the new ontology file
+ *
+ * @throws OWLOntologyStorageException
+ * @throws IOException
+ */
+ public static void persistOntology(OWLOntology ontology, String newFilePath)
+ throws OWLOntologyStorageException, IOException {
+ OWLOntologyManager manager = ontology.getOWLOntologyManager();
+
+ File f = new File(newFilePath);
+ FileOutputStream fo = new FileOutputStream(f);
+
+ manager.saveOntology(ontology, fo);
+ fo.close();
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELPosNegLP.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELPosNegLP.java
new file mode 100644
index 0000000000..3b8159f0ea
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELPosNegLP.java
@@ -0,0 +1,720 @@
+package org.dllearner.algorithms.parcel;
+
+/**
+ * ParCEL Learning problem: provides correctness, completeness, and accuracy calculation.
+ * Predictive accuracy calculation is used.
+ *
+ * This learning problem uses a different scoring in comparison with DL-Learner PosNegLP
+ *
+ * @author An C. Tran
+ */
+
+import java.util.Set;
+import java.util.HashSet;
+import java.util.TreeSet;
+
+import org.apache.log4j.Logger;
+import org.dllearner.core.*;
+import org.dllearner.core.config.ConfigOption;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+@ComponentAnn(name = "ParCELPosNegLP", shortName = "parcelPosNegLP", version = 0.1, description = "ParCEL Positive&Negative Learning Problem")
+public class ParCELPosNegLP extends AbstractClassExpressionLearningProblem {
+
+ protected Set positiveExamples;
+ protected Set negativeExamples;
+
+ @ConfigOption(description = "list of positive testing examples")
+ protected Set positiveTestExamples;
+ @ConfigOption(description = "list of negative testing examples")
+ protected Set negativeTestExamples;
+
+ //currently uncovered positive examples
+ protected Set uncoveredPositiveExamples;
+
+ private final Logger logger = Logger.getLogger(ParCELPosNegLP.class);
+
+ // reasoner component is declared in AbstractLearningProblem class
+
+ /**
+ * Constructor, used in case that positive and negative examples are provided when this
+ * component is initialized
+ *
+ * @param reasoningService
+ * Reasoner, provides reasoning service. Used to checking the instance type
+ * @param positiveExamples
+ * Set of positive examples
+ * @param negativeExamples
+ * Set of negative examples
+ */
+ public ParCELPosNegLP(AbstractReasonerComponent reasoningService,
+ Set positiveExamples, Set negativeExamples) {
+ super(reasoningService);
+ this.positiveExamples = positiveExamples;
+ this.negativeExamples = negativeExamples;
+ this.uncoveredPositiveExamples = this.positiveExamples;
+ }
+
+ /**
+ * This constructor is used when the learning configuration file is used
+ *
+ * @param reasoningService
+ */
+ public ParCELPosNegLP(AbstractReasonerComponent reasoningService) {
+ super(reasoningService);
+ }
+
+ /**
+ * This constructor can be used by SpringDefinition to create bean object Properties of new bean
+ * may be initialised later using setters
+ */
+ public ParCELPosNegLP() {
+ super();
+ }
+
+ /**
+ * Get list of positive examples covered by a description
+ *
+ * @param description
+ * Description
+ *
+ * @return Set of positive examples covered by the description
+ */
+ protected Set coveredPositiveExamples(OWLClassExpression description) {
+ Set coveredPositiveExamples = new HashSet<>();
+
+ for (OWLIndividual example : positiveExamples)
+ if (reasoner.hasType(description, example))
+ coveredPositiveExamples.add(example);
+
+ return coveredPositiveExamples;
+ }
+
+ /**
+ * Get list of uncovered positive examples covered by a description
+ *
+ * @param description
+ * Description
+ *
+ * @return Set of positive examples covered by the description
+ */
+ protected Set coveredUncoveredPositiveExamples(OWLClassExpression description) {
+ Set coveredPositiveExamples = new HashSet<>();
+
+ for (OWLIndividual example : uncoveredPositiveExamples)
+ if (reasoner.hasType(description, example))
+ coveredPositiveExamples.add(example);
+
+ return coveredPositiveExamples;
+ }
+
+ /**
+ * Get number of positive examples covered by a description
+ *
+ * @param description
+ * Description
+ * @return Number if positive examples covered by the description
+ */
+ public int getNumberOfCoveredPositiveExamples(OWLClassExpression description) {
+ return getNumberOfCoveredPositiveExamples(description, positiveExamples);
+ }
+
+ public int getNumberOfCoveredPositiveTestExamples(OWLClassExpression description) {
+ return getNumberOfCoveredPositiveExamples(description, positiveTestExamples);
+ }
+
+ protected int getNumberOfCoveredPositiveExamples(OWLClassExpression description, Set allPosExamples) {
+ int coveredPos = 0;
+
+ for (OWLIndividual example : allPosExamples) {
+ if (reasoner.hasType(description, example))
+ coveredPos++;
+ }
+
+ return coveredPos;
+ }
+
+ /**
+ * Get number of negative examples covered by a description
+ *
+ * @param description
+ * Description to test
+ *
+ * @return Number of negative examples covered by the description
+ */
+ public int getNumberOfCoveredNegativeExamples(OWLClassExpression description) {
+ return getNumberOfCoveredNegativeExamples(description, negativeExamples);
+ }
+
+ public int getNumberOfCoveredNegativeTestExamples(OWLClassExpression description) {
+ return getNumberOfCoveredNegativeExamples(description, negativeTestExamples);
+ }
+
+ protected int getNumberOfCoveredNegativeExamples(OWLClassExpression description, Set allNegExamples) {
+ int coveredNeg = 0;
+
+ for (OWLIndividual example : allNegExamples) {
+ if (reasoner.hasType(description, example)) {
+ coveredNeg++;
+ }
+ }
+
+ return coveredNeg;
+ }
+
+ /**
+ * Calculate predictive accuracy of a description pred-accuracy(D) =
+ * (covered-positive-examples(D) + uncovered-negative-examples(D)) / all-examples
+ *
+ * @param description
+ * Description which will ve calculated the accuracy
+ *
+ * @return Predictive accuracy of a description
+ */
+ protected double accuracy_cal(OWLClassExpression description) {
+ int cp = this.getNumberOfCoveredPositiveExamples(description);
+ int un = this.negativeExamples.size()
+ - this.getNumberOfCoveredNegativeExamples(description);
+
+ return (cp + un) / (double) (positiveExamples.size() + negativeExamples.size());
+ }
+
+ protected double testAccuracy_cal(OWLClassExpression description) {
+ int cp = this.getNumberOfCoveredPositiveTestExamples(description);
+ int un = this.negativeTestExamples.size()
+ - this.getNumberOfCoveredNegativeTestExamples(description);
+
+ return (cp + un) / (double) (positiveTestExamples.size() + negativeTestExamples.size());
+ }
+
+ /**
+ * Calculate the correctness of a description
+ *
+ * @param description
+ * Description to calculate
+ *
+ * @return Correctness of the description
+ */
+ protected double correctness_cal(OWLClassExpression description) {
+ int un = this.negativeExamples.size()
+ - this.getNumberOfCoveredNegativeExamples(description);
+ return un / (double) this.negativeExamples.size();
+ }
+
+ /**
+ * Calculate the completeness of a description
+ *
+ * @param description
+ * Description to calculate
+ *
+ * @return Complete if the description
+ */
+ protected double completeness_cal(OWLClassExpression description) {
+ int cp = this.getNumberOfCoveredPositiveExamples(description);
+ return cp / (double) this.positiveExamples.size();
+ }
+
+ /**
+ * Calculate accuracy, completeness and correctness:
+ * correctness(D) = not-covered-examples(D) / all-negative-examples
+ * completeness(D) = covered-positive-examples / all-positive-examples
+ * accuracy(D) = [covered-positive-examples(D) + not-covered-negative-examples(D)] /
+ * all-examples
+ * Noise has not been supported in the current version
+ *
+ *
+ * @param description
+ * Description to be calculated accuracy and correctness
+ *
+ * @return A ParCELEvaluationResult object. If the description is weak, its accuracy will be -1
+ *
+ * NOTE: do we need "weak" concept with the value of -1? How if we just simply assign 0
+ * for it?
+ */
+ public ParCELEvaluationResult getAccuracyAndCorrectness(OWLClassExpression description) {
+
+ int notCoveredPos = 0;
+ int notCoveredNeg = 0;
+ Set coveredPositiveExamples = new HashSet<>();
+
+ for (OWLIndividual example : positiveExamples) {
+ if (!reasoner.hasType(description, example))
+ notCoveredPos++;
+ else
+ coveredPositiveExamples.add(example);
+ }
+
+ if (coveredPositiveExamples.size() > 0) {
+
+ notCoveredNeg = negativeExamples.size()
+ - getNumberOfCoveredNegativeExamples(description);
+
+ double correctness = (double) notCoveredNeg / (double) negativeExamples.size();
+ double completeness = (double) coveredPositiveExamples.size() / positiveExamples.size();
+
+ // if the description is not a partial definition (correct), set of covered positive
+ // examples will not be used
+ if (correctness < 1.0d)
+ coveredPositiveExamples = null;
+
+ double accuracy = (positiveExamples.size() - notCoveredPos + notCoveredNeg)
+ / (double) (positiveExamples.size() + negativeExamples.size());
+
+ // accuracy = (covered positive examples + not covered negative examples) / all examples
+ // (completeness + correctness)
+ return new ParCELEvaluationResult(accuracy, correctness, completeness,
+ coveredPositiveExamples);
+
+ } else {
+ // a node will be considered as "weak" if it covers none of the positive example and
+ // the accuracy will be assigned -1
+ return new ParCELEvaluationResult(-1, 0, 0);
+ }
+
+ }
+
+ /**
+ * In this accuracy calculation, the accuracy value is based on the current uncovered positive
+ * examples but the covered positive examples returned still takes all positive examples into
+ * account
+ *
+ * @param description
+ * Description to be calculated
+ * @return
+ */
+ public ParCELEvaluationResult getAccuracyAndCorrectness21(OWLClassExpression description) {
+
+ int notCoveredNeg = 0;
+ Set coveredPositiveExamples = new HashSet<>();
+
+ // create a new set which contains all members of the uncovered positive examples
+ Set localUncoveredPositiveExamples = null;
+
+ if (this.uncoveredPositiveExamples != null) {
+ synchronized (this.uncoveredPositiveExamples) {
+ localUncoveredPositiveExamples = new HashSet<>(
+ this.uncoveredPositiveExamples);
+ }
+ } else
+ localUncoveredPositiveExamples = new HashSet<>(this.positiveExamples);
+
+ int originalNoOfUncoveredPositiveExamples = localUncoveredPositiveExamples.size();
+
+ // calculate the covered positive examples, we do
+ for (OWLIndividual example : positiveExamples) {
+ if (reasoner.hasType(description, example))
+ coveredPositiveExamples.add(example);
+ }
+
+ int noOfUpdatedCoveredPositiveExamples = localUncoveredPositiveExamples.size();
+ localUncoveredPositiveExamples.removeAll(coveredPositiveExamples);
+ noOfUpdatedCoveredPositiveExamples -= localUncoveredPositiveExamples.size();
+
+ if (noOfUpdatedCoveredPositiveExamples > 0) {
+ notCoveredNeg = negativeExamples.size()
+ - getNumberOfCoveredNegativeExamples(description);
+
+ double correctness = (double) notCoveredNeg / (double) negativeExamples.size();
+
+ double completeness = (double) coveredPositiveExamples.size() / positiveExamples.size();
+
+ // double accuracy = (positiveExamples.size() - notCoveredPos +
+ // notCoveredNeg)/(double)(positiveExamples.size() + negativeExamples.size());
+ double accuracy = (noOfUpdatedCoveredPositiveExamples + notCoveredNeg)
+ / (double) (originalNoOfUncoveredPositiveExamples + negativeExamples.size());
+ // accuracy = (covered positive examples + not covered negative examples) / all examples
+ // (completeness + correctness)
+
+ if (correctness < 1.0d)
+ coveredPositiveExamples = null;
+
+ return new ParCELEvaluationResult(accuracy, correctness, completeness,
+ coveredPositiveExamples);
+
+ } else {
+ // a node will be considered as "weak" if it covers none of the positive example and
+ // the accuracy will be assigned -1
+ return new ParCELEvaluationResult(-1, 0, 0);
+ }
+
+ }
+
+
+ /**
+ * In this accuracy calculation, the accuracy value is based on the current uncovered positive
+ * examples but the covered positive examples returned still takes all positive examples into
+ * account
+ *
+ * @param description
+ * Description to be calculated
+ * @return
+ */
+ public ParCELEvaluationResult getAccuracyAndCorrectness2(OWLClassExpression description, double noise) {
+
+ int notCoveredNeg = 0;
+ Set coveredPositiveExamples = new HashSet<>();
+
+ // create a new set which contains all members of the uncovered positive examples
+ Set localUncoveredPositiveExamples;
+
+ if (this.uncoveredPositiveExamples != null) {
+ synchronized (this.uncoveredPositiveExamples) {
+ localUncoveredPositiveExamples = new HashSet<>(
+ this.uncoveredPositiveExamples);
+ }
+ } else
+ localUncoveredPositiveExamples = new HashSet<>(this.positiveExamples);
+
+ int originalNoOfUncoveredPositiveExamples = localUncoveredPositiveExamples.size();
+
+ // calculate the covered positive examples, we do
+ for (OWLIndividual example : positiveExamples) {
+ if (reasoner.hasType(description, example))
+ coveredPositiveExamples.add(example);
+ }
+
+ int noOfUpdatedCoveredPositiveExamples = localUncoveredPositiveExamples.size();
+ localUncoveredPositiveExamples.removeAll(coveredPositiveExamples);
+ noOfUpdatedCoveredPositiveExamples -= localUncoveredPositiveExamples.size();
+
+ if (noOfUpdatedCoveredPositiveExamples > 0) {
+ notCoveredNeg = negativeExamples.size()
+ - getNumberOfCoveredNegativeExamples(description);
+
+ double correctness = (double) notCoveredNeg / (double) negativeExamples.size();
+
+ double completeness = (double) coveredPositiveExamples.size() / positiveExamples.size();
+
+ // double accuracy = (positiveExamples.size() - notCoveredPos +
+ // notCoveredNeg)/(double)(positiveExamples.size() + negativeExamples.size());
+ double accuracy = (noOfUpdatedCoveredPositiveExamples + notCoveredNeg)
+ / (double) (originalNoOfUncoveredPositiveExamples + negativeExamples.size());
+ // accuracy = (covered positive examples + not covered negative examples) / all examples
+ // (completeness + correctness)
+
+ if (correctness < 1.0d - noise)
+ coveredPositiveExamples = null;
+
+ return new ParCELEvaluationResult(accuracy, correctness, completeness,
+ coveredPositiveExamples);
+
+ } else {
+ // a node will be considered as "weak" if it covers none of the positive example and
+ // the accuracy will be assigned -1
+ return new ParCELEvaluationResult(-1, 0, 0);
+ }
+
+ }
+
+
+ /**
+ * In this accuracy calculation, positive examples covered by a new partial definition will be
+ * remove from all further calculations
+ *
+ * @param description
+ * Description to be calculated
+ * @return
+ */
+ public ParCELEvaluationResult getAccuracyAndCorrectness3(OWLClassExpression description) {
+
+ int notCoveredNeg = 0;
+ Set coveredPositiveExamples = new HashSet<>();
+
+ // create a new set which contains all members of the uncovered positive examples
+ Set localUncoveredPositiveExamples = null;
+
+ if (this.uncoveredPositiveExamples != null) {
+ synchronized (this.uncoveredPositiveExamples) {
+ localUncoveredPositiveExamples = new HashSet<>(
+ this.uncoveredPositiveExamples);
+ }
+ } else
+ localUncoveredPositiveExamples = new HashSet<>(this.positiveExamples);
+
+ // calculate the covered positive examples, we do
+ for (OWLIndividual example : localUncoveredPositiveExamples) {
+ if (reasoner.hasType(description, example))
+ coveredPositiveExamples.add(example);
+ }
+
+ if (coveredPositiveExamples.size() > 0) {
+ notCoveredNeg = negativeExamples.size()
+ - getNumberOfCoveredNegativeExamples(description);
+
+ double correctness = (double) notCoveredNeg / (double) negativeExamples.size();
+ double completeness = (double) coveredPositiveExamples.size()
+ / localUncoveredPositiveExamples.size();
+
+ // double accuracy = (positiveExamples.size() - notCoveredPos +
+ // notCoveredNeg)/(double)(positiveExamples.size() + negativeExamples.size());
+ double accuracy = (coveredPositiveExamples.size() + notCoveredNeg)
+ / (double) (localUncoveredPositiveExamples.size() + negativeExamples.size());
+ // accuracy = (covered positive examples + not covered negative examples) / all examples
+ // (completeness + correctness)
+
+ if (correctness < 1.0d)
+ coveredPositiveExamples = null;
+
+ return new ParCELEvaluationResult(accuracy, correctness, completeness,
+ coveredPositiveExamples);
+
+ } else {
+ // a node will be considered as "weak" if it covers none of the positive example and
+ // the accuracy will be assigned -1
+ return new ParCELEvaluationResult(-1, 0, 0);
+ }
+
+ }
+
+ public ParCELEvaluationResult getAccuracyAndCorrectness4(OWLClassExpression description) {
+ Set coveredPositiveExamples = reasoner.hasType(description, positiveExamples);
+ Set coveredNegativeExamples = reasoner.hasType(description, negativeExamples);
+
+ return getAccuracyAndCorrectness4(coveredPositiveExamples, coveredNegativeExamples);
+ }
+
+ public ParCELEvaluationResult getAccuracyAndCorrectness4(Set coveredPositiveExamples, Set coveredNegativeExamples) {
+ if (coveredPositiveExamples.isEmpty()) {
+ return new ParCELEvaluationResult(-1, 0, 0);
+ }
+
+ return getAccuracyAndCorrectnessNoChecks(coveredPositiveExamples, coveredNegativeExamples);
+ }
+
+ public ParCELEvaluationResult getAccuracyAndCorrectness5(
+ OWLClassExpression description,
+ Set potentiallyCoveredPositiveExamples, Set potentiallyCoveredNegativeExamples
+ ) {
+ Set uncoveredPositives;
+
+ if (uncoveredPositiveExamples != null) {
+ synchronized (uncoveredPositiveExamples) {
+ uncoveredPositives = new HashSet<>(uncoveredPositiveExamples);
+ }
+ } else {
+ uncoveredPositives = new HashSet<>(positiveExamples);
+ }
+
+ Set potentiallyCoveredUncoveredPositives = new HashSet<>();
+ Set potentiallyCoveredCoveredPositives = new HashSet<>();
+
+ for (OWLIndividual ex : potentiallyCoveredPositiveExamples) {
+ if (uncoveredPositives.contains(ex)) {
+ potentiallyCoveredUncoveredPositives.add(ex);
+ } else {
+ potentiallyCoveredCoveredPositives.add(ex);
+ }
+ }
+
+ Set coveredPositives = reasoner.hasType(description, potentiallyCoveredUncoveredPositives);
+
+ if (coveredPositives.isEmpty()) {
+ return new ParCELEvaluationResult(-1, 0, 0);
+ }
+
+ coveredPositives.addAll(reasoner.hasType(description, potentiallyCoveredCoveredPositives));
+ Set coveredNegatives = reasoner.hasType(description, potentiallyCoveredNegativeExamples);
+
+ return getAccuracyAndCorrectness4(coveredPositives, coveredNegatives);
+ }
+
+ public double getAccuracy(int tp, int tn) {
+ return (tp + tn) / (double) (positiveExamples.size() + negativeExamples.size());
+ }
+
+ /**
+ * Accuracy calculation for the exception learning which provide both covered positive and
+ * negative examples by the description
+ *
+ * - cp(D) = empty
+ *
+ * - cn(D) = empty: weak description ==> may be ignored
+ * - cn(D) != empty: counter partial definition, especially used in learning with exceptions
+ *
+ * - cp(D) != empty
+ *
+ * - cn(D) = empty: partial definition
+ * - cn(D) != empty: potential description
+ *
+ *
+ *
+ *
+ * @param description
+ * Description to be calculated
+ * @return
+ */
+ public ParCELEvaluationResult getAccuracyAndCorrectnessEx(OWLClassExpression description) {
+ Set coveredPositiveExamples = new HashSet<>();
+ Set coveredNegativeExamples = new HashSet<>();
+
+ // calculate the set of positive examples covered by the description
+ for (OWLIndividual example : positiveExamples) {
+ if (reasoner.hasType(description, example))
+ coveredPositiveExamples.add(example);
+ }
+
+ // calculate the set of negative examples covered by the description
+ for (OWLIndividual example : negativeExamples) {
+ if (reasoner.hasType(description, example))
+ coveredNegativeExamples.add(example);
+ }
+
+ return getAccuracyAndCorrectnessNoChecks(coveredPositiveExamples, coveredNegativeExamples);
+ }
+
+ public ParCELEvaluationResult getAccuracyAndCorrectnessNoChecks(Set coveredPositiveExamples, Set coveredNegativeExamples) {
+ ParCELEvaluationResult result = new ParCELEvaluationResult();
+
+ int cp = coveredPositiveExamples.size();
+ int un = negativeExamples.size() - coveredNegativeExamples.size();
+
+ result.accuracy = (cp + un) / (double) (positiveExamples.size() + negativeExamples.size());
+ result.correctness = un / (double) negativeExamples.size();
+ result.completeness = cp / (double) positiveExamples.size();
+
+ result.coveredPositiveExamples = coveredPositiveExamples;
+ result.coveredNegativeExamples = coveredNegativeExamples;
+
+ return result;
+ }
+
+ public static String getName() {
+ return "PDLL pos neg learning problem";
+ }
+
+ /**
+ * PDLLScore = {accuracy, correctness}
+ */
+ @Override
+ public ParCELScore computeScore(OWLClassExpression description) {
+ double correctness = this.correctness_cal(description);
+ double accuracy = this.accuracy_cal(description);
+
+ return new ParCELScore(accuracy, correctness);
+ }
+
+ @Override
+ public ParCELScore computeScore(OWLClassExpression description, double noise) {
+ double correctness = this.correctness_cal(description);
+ double accuracy = this.accuracy_cal(description);
+
+ return new ParCELScore(accuracy, correctness);
+ }
+
+
+ /**
+ * Create evaluated description
+ */
+ @Override
+ public EvaluatedDescription evaluate(OWLClassExpression description) {
+ ParCELScore score = this.computeScore(description);
+
+ return new EvaluatedDescription(description, score);
+ }
+
+ public double getAccuracy(OWLClassExpression description) {
+ return accuracy_cal(description);
+ }
+
+ public double getTestAccuracy(OWLClassExpression description) {
+ return testAccuracy_cal(description);
+ }
+
+ @Override
+ public double getAccuracyOrTooWeak(OWLClassExpression description, double noise) {
+ throw new RuntimeException("getAccuracyOrTooWeak() is not supported by PDLLPosNegLP");
+ }
+
+ @Override
+ public void init() throws ComponentInitException {
+ // super.init();
+ }
+
+ public Set getPositiveExamples() {
+ return this.positiveExamples;
+ }
+
+ public void setPositiveExamples(Set positiveExamples) {
+ this.positiveExamples = positiveExamples;
+ }
+
+ public Set getNegativeExamples() {
+ return this.negativeExamples;
+ }
+
+ public void setNegativeExamples(Set negativeExamples) {
+ this.negativeExamples = negativeExamples;
+ }
+
+ public void setUncoveredPositiveExamples(Set uncoveredPositiveExamples) {
+ if (this.uncoveredPositiveExamples == null) {
+ this.uncoveredPositiveExamples = uncoveredPositiveExamples;
+ }
+ }
+
+ public void setPositiveTestExamples(Set positiveTestExamples) {
+ this.positiveTestExamples = positiveTestExamples;
+ }
+
+ public void setNegativeTestExamples(Set negativeTestExamples) {
+ this.negativeTestExamples = negativeTestExamples;
+ }
+
+ public Set getPositiveTestExamples() {
+ return this.positiveTestExamples;
+ }
+
+ public Set getNegativeTestExamples() {
+ return this.negativeTestExamples;
+ }
+
+ public void printTestEvaluation(Set partialDefinitions) {
+ Set tp = new TreeSet<>();
+
+ for (OWLClassExpression def : partialDefinitions) {
+ tp.addAll(reasoner.hasType(def, positiveTestExamples));
+ }
+
+ Set fn = new TreeSet<>(positiveTestExamples);
+ fn.removeAll(tp);
+
+ Set fp = new TreeSet<>();
+
+ for (OWLClassExpression def : partialDefinitions) {
+ Set defFP = reasoner.hasType(def, negativeTestExamples);
+
+ for (OWLIndividual ex : defFP) {
+ logger.info("Partial definition: " + def);
+ logger.info("False positive: " + ex.toStringID());
+ }
+
+ fp.addAll(defFP);
+ }
+
+ Set tn = new TreeSet<>(negativeTestExamples);
+ tn.removeAll(fp);
+
+ double acc = (tp.size() + tn.size()) / (double) (positiveTestExamples.size() + negativeTestExamples.size());
+ double precision = tp.size() / (double) (tp.size() + fp.size());
+ double rec = tp.size() / (double) (tp.size() + fn.size());
+ double spec = tn.size() / (double) (fp.size() + tn.size());
+ double fpr = fp.size() / (double) (fp.size() + tn.size());
+ double fm = 2 / ((1 / precision) + (1 / rec));
+
+ logger.info("======================================================");
+ logger.info("Test evaluation.");
+ logger.info("True positives: " + tp.size());
+ logger.info("True negatives: " + tn.size());
+ logger.info("False positives: " + fp.size());
+ logger.info("False negatives: " + fn.size());
+
+ logger.info("Accuracy: " + acc);
+ logger.info("Precission: " + precision);
+ logger.info("Recall: " + rec);
+ logger.info("Specificity: " + spec);
+ logger.info("FP rate: " + fpr);
+ logger.info("F-measure: " + fm);
+ }
+}
+
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELRefinementOperatorFactory.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELRefinementOperatorFactory.java
new file mode 100644
index 0000000000..b1b1b8e4fc
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELRefinementOperatorFactory.java
@@ -0,0 +1,210 @@
+package org.dllearner.algorithms.parcel;
+
+/**
+ * Refinement operator factory (RhoDRDown2008)
+ *
+ * @author An C. Tran
+ */
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.pool2.BasePooledObjectFactory;
+import org.apache.commons.pool2.PooledObject;
+import org.apache.commons.pool2.impl.DefaultPooledObject;
+import org.apache.log4j.Logger;
+import org.dllearner.algorithms.parcel.split.ParCELDoubleSplitterAbstract;
+import org.dllearner.core.AbstractReasonerComponent;
+import org.dllearner.core.owl.ClassHierarchy;
+import org.dllearner.refinementoperators.RefinementOperator;
+import org.dllearner.refinementoperators.RhoDRDown;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLDataFactory;
+import org.semanticweb.owlapi.model.OWLDataProperty;
+import org.semanticweb.owlapi.model.OWLLiteral;
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
+public class ParCELRefinementOperatorFactory extends BasePooledObjectFactory {
+
+ private final AbstractReasonerComponent reasoner;
+ private final ClassHierarchy classHierarchy;
+ private final OWLClassExpression startClass;
+ private Map> splits;
+ private int maxNoOfSplits;
+
+ private int cardinalityLimit = 5;
+ private boolean useNegation = true;
+ private boolean useRestrictedDisjunction = true;
+ private boolean useHasValue = true;
+ private boolean useHasData = false;
+ private boolean useCardinalityRestrictions = true;
+
+ private RhoDRDown operatorPrototype = null;
+
+ final Logger logger = Logger.getLogger(this.getClass());
+
+ public ParCELRefinementOperatorFactory(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy,
+ OWLClassExpression startClass, int maxNoOfSplits) {
+ this.reasoner = reasoner;
+ this.classHierarchy = classHierarchy;
+ this.startClass = startClass;
+ this.splits = null;
+ this.maxNoOfSplits = maxNoOfSplits;
+ }
+
+
+ public ParCELRefinementOperatorFactory(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy,
+ OWLClassExpression startClass, ParCELDoubleSplitterAbstract splitter) {
+ this.reasoner = reasoner;
+ this.classHierarchy = classHierarchy;
+ this.startClass = startClass;
+
+ OWLDataFactory df = new OWLDataFactoryImpl();
+ this.splits = splitter.computeSplits().entrySet().stream().collect(
+ Collectors.toMap(
+ Map.Entry::getKey,
+ e -> e.getValue().stream().map(df::getOWLLiteral).collect(Collectors.toList()))
+ );
+
+ if (logger.isDebugEnabled())
+ logger.debug("Splits is calculated: " + splits);
+ }
+
+
+ public ParCELRefinementOperatorFactory(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy,
+ OWLClassExpression startClass, Map> splits) {
+ this.reasoner = reasoner;
+ this.classHierarchy = classHierarchy;
+ this.startClass = startClass;
+
+ OWLDataFactory df = new OWLDataFactoryImpl();
+ this.splits = splits.entrySet().stream().collect(
+ Collectors.toMap(
+ Map.Entry::getKey,
+ e -> e.getValue().stream().map(df::getOWLLiteral).collect(Collectors.toList()))
+ );
+ }
+
+ public ParCELRefinementOperatorFactory(RhoDRDown operatorPrototype) {
+ reasoner = null;
+ classHierarchy = null;
+ startClass = null;
+ this.operatorPrototype = operatorPrototype.clone();
+ }
+
+ public ParCELRefinementOperatorFactory(RhoDRDown operatorPrototype, Map> splits) {
+ reasoner = null;
+ classHierarchy = null;
+ startClass = null;
+
+ OWLDataFactory df = new OWLDataFactoryImpl();
+ this.operatorPrototype = operatorPrototype.clone();
+ this.operatorPrototype.setSplits(
+ splits.entrySet().stream().collect(
+ Collectors.toMap(
+ Map.Entry::getKey,
+ e -> e.getValue().stream().map(df::getOWLLiteral).collect(Collectors.toList()))
+ )
+ );
+ }
+
+ @Override
+ public RefinementOperator create() throws Exception {
+ if (operatorPrototype == null) {
+ if (logger.isDebugEnabled())
+ logger.info("A new refinement operator had been created");
+
+ //create a new RhoDRDown and return
+ operatorPrototype = new RhoDRDown();
+ operatorPrototype.setReasoner(reasoner);
+ operatorPrototype.setClassHierarchy(classHierarchy);
+ operatorPrototype.setStartClass(startClass);
+ operatorPrototype.setUseDisjunction(false);
+ operatorPrototype.setUseRestrictedDisjunction(useRestrictedDisjunction);
+ operatorPrototype.setUseNegation(useNegation);
+ operatorPrototype.setUseDataHasValueConstructor(useHasData);
+ operatorPrototype.setUseHasValueConstructor(useHasValue);
+ operatorPrototype.setCardinalityLimit(cardinalityLimit);
+ operatorPrototype.setMaxNrOfSplits(maxNoOfSplits);
+ operatorPrototype.setCardinalityLimit(cardinalityLimit);
+ operatorPrototype.setUseCardinalityRestrictions(useCardinalityRestrictions);
+
+ if (this.splits != null) {
+ operatorPrototype.setSplits(splits);
+ }
+ }
+
+ if (!operatorPrototype.isInitialized()) {
+ operatorPrototype.init();
+ }
+
+ RhoDRDown refinementOperator = operatorPrototype.clone();
+
+ // init the refinement operator;
+ refinementOperator.init();
+
+ return refinementOperator;
+ }
+
+ @Override
+ public PooledObject wrap(RefinementOperator refinementOperator) {
+ return new DefaultPooledObject<>(refinementOperator);
+ }
+
+
+ public boolean isUseNegation() {
+ return useNegation;
+ }
+
+
+ public void setUseNegation(boolean useNegation) {
+ this.useNegation = useNegation;
+ }
+
+
+ public boolean isUseRestrictedDisjunction() {
+ return useRestrictedDisjunction;
+ }
+
+
+ public void setUseRestrictedDisjunction(boolean useRestrictedDisjunction) {
+ this.useRestrictedDisjunction = useRestrictedDisjunction;
+ }
+
+ public void setUseHasValue(boolean useHasValue) {
+ this.useHasValue = useHasValue;
+ }
+
+ public boolean getUseHasValue() {
+ return this.useHasValue;
+ }
+
+ public void setUseHasData(boolean useData) {
+ this.useHasData = useData;
+ }
+
+ public boolean getUseHasData() {
+ return this.useHasData;
+ }
+
+ public int getCardinalityLimit() {
+ return this.cardinalityLimit;
+ }
+
+ public void setCardinalityLimit(int cardinality) {
+ this.cardinalityLimit = cardinality;
+ }
+
+ public void setUseCardinalityRestrictions(boolean useCardinalityRestrictions) {
+ this.useCardinalityRestrictions = useCardinalityRestrictions;
+ }
+
+ public boolean getUseCardinalityRestrictions() {
+ return this.useCardinalityRestrictions;
+ }
+
+ public RefinementOperator getOperatorPrototype() {
+ return operatorPrototype;
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELRefinementOperatorPool.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELRefinementOperatorPool.java
new file mode 100644
index 0000000000..278cca519e
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELRefinementOperatorPool.java
@@ -0,0 +1,112 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.pool2.impl.GenericObjectPool;
+import org.dllearner.algorithms.parcel.split.ParCELDoubleSplitterAbstract;
+import org.dllearner.core.AbstractReasonerComponent;
+import org.dllearner.core.owl.ClassHierarchy;
+import org.dllearner.refinementoperators.RefinementOperator;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLDataProperty;
+
+/**
+ * Refinement operator pool
+ *
+ * @author An C. Tran
+ */
+public class ParCELRefinementOperatorPool extends GenericObjectPool {
+
+ /**
+ * Create refinement operator pool given max number of idle object without splitter
+ *
+ * @param reasoner
+ * @param classHierarchy
+ * @param startclass
+ * @param maxIdle
+ */
+ public ParCELRefinementOperatorPool(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy,
+ OWLClassExpression startclass, int maxIdle, int maxNoOfSplits) {
+ super(new ParCELRefinementOperatorFactory(reasoner, classHierarchy, startclass, maxNoOfSplits));
+ setMaxIdle(maxIdle);
+ }
+
+ /**
+ * Create refinement operator pool given max number of idle object and splitter
+ *
+ * @param reasoner
+ * @param classHierarchy
+ * @param startclass
+ * @param splits Splitter used to calculate the splits
+ * @param maxIdle
+ */
+ public ParCELRefinementOperatorPool(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy, OWLClassExpression startclass,
+ Map> splits, int maxIdle) {
+ super(new ParCELRefinementOperatorFactory(reasoner, classHierarchy, startclass, splits));
+ setMaxIdle(maxIdle);
+ }
+
+
+ public ParCELRefinementOperatorPool(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy, OWLClassExpression startclass,
+ ParCELDoubleSplitterAbstract splitter, int maxIdle) {
+ super(new ParCELRefinementOperatorFactory(reasoner, classHierarchy, startclass, splitter));
+ setMaxIdle(maxIdle);
+ }
+
+
+ public ParCELRefinementOperatorPool(ParCELRefinementOperatorFactory parcelRefinementFactory) {
+ super(parcelRefinementFactory);
+ }
+
+ /**
+ * Create refinement operator pool given max number of idle object, max capacity without splitter
+ *
+ * @param reasoner
+ * @param classHierarchy
+ * @param startclass
+ * @param maxIdle
+ * @param maxIdleCapacity
+ */
+ public ParCELRefinementOperatorPool(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy,
+ OWLClassExpression startclass, int maxIdle, int maxIdleCapacity,
+ int maxNoOfSplits) {
+ super(new ParCELRefinementOperatorFactory(reasoner, classHierarchy, startclass, maxNoOfSplits));
+ setMaxIdle(maxIdle);
+ setMaxTotal(maxIdleCapacity);
+ }
+
+
+ /**
+ * Create refinement operator pool given max number of idle object, max capacity and splitter
+ *
+ * @param reasoner
+ * @param classHierarchy
+ * @param startclass
+ * @param splitter
+ * @param maxIdle
+ * @param maxIdleCapacity
+ */
+ public ParCELRefinementOperatorPool(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy,
+ OWLClassExpression startclass, ParCELDoubleSplitterAbstract splitter,
+ int maxIdle, int maxIdleCapacity) {
+ super(new ParCELRefinementOperatorFactory(reasoner, classHierarchy, startclass, splitter));
+ setMaxIdle(maxIdle);
+ setMaxTotal(maxIdleCapacity);
+ }
+
+
+ public ParCELRefinementOperatorPool(AbstractReasonerComponent reasoner, ClassHierarchy classHierarchy,
+ OWLClassExpression startclass, Map> splits,
+ int maxIdle, int maxIdleCapacity) {
+ super(new ParCELRefinementOperatorFactory(reasoner, classHierarchy, startclass, splits));
+ setMaxIdle(maxIdle);
+ setMaxTotal(maxIdleCapacity);
+ }
+
+
+ public ParCELRefinementOperatorFactory getFactory() {
+ return (ParCELRefinementOperatorFactory) super.getFactory();
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELScore.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELScore.java
new file mode 100644
index 0000000000..689ebbd2c1
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELScore.java
@@ -0,0 +1,38 @@
+package org.dllearner.algorithms.parcel;
+
+import org.dllearner.core.Score;
+
+/**
+ * This class implement methods that will be used to create an EvaluatedDescription
+ * (this provide information for sorting the node in search tree)
+ *
+ * @author An C. Tran
+ *
+ */
+@SuppressWarnings("serial")
+public class ParCELScore extends Score {
+
+ final double accuracy;
+ final double correctness;
+
+ public ParCELScore(ParCELNode node) {
+ this.accuracy = node.getAccuracy();
+ this.correctness = node.getCorrectness();
+ }
+
+ public ParCELScore(double accuracy, double correctness) {
+ this.accuracy = accuracy;
+ this.correctness = correctness;
+ }
+
+ @Override
+ public double getAccuracy() {
+ return this.accuracy;
+ }
+
+ public double getCorrectness() {
+ return this.correctness;
+ }
+
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELStringUtilities.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELStringUtilities.java
new file mode 100644
index 0000000000..1462b840c5
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELStringUtilities.java
@@ -0,0 +1,48 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.Map;
+import java.util.Map.Entry;
+
+public class ParCELStringUtilities {
+ /**
+ * =====================================================================================
+ * This method is used to add padding "0" before a string so that the string has the expected
+ * length.
+ *
+ * @param s
+ * String that need to be padded with zero ahead "0"
+ *
+ * @return A zero "0" padding string
+ */
+ public static String zeroPad(String s, int len) {
+ String result = s;
+ for (int i = s.length(); i < len; i++)
+ result = "0".concat(result);
+
+ return result;
+ }
+
+ /**
+ * =====================================================================================
+ * Shorten an URI by removing all bases or using its prefixes
+ *
+ * @param uri
+ * String need to be shortened
+ * @param baseURI
+ * Base URI. Null if we don't want to used base uri
+ * @param prefixes
+ * List of prefixes
+ * @return
+ */
+ public static String replaceString(String uri, String baseURI, Map prefixes) {
+ if (baseURI != null && uri.startsWith(baseURI)) {
+ return uri.replace(baseURI, "");
+ } else {
+ if (prefixes != null) {
+ for (Entry prefix : prefixes.entrySet())
+ uri = uri.replace(prefix.getValue(), prefix.getKey());
+ }
+ return uri;
+ }
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorker.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorker.java
new file mode 100644
index 0000000000..fd499635f3
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorker.java
@@ -0,0 +1,209 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.google.common.collect.Sets;
+import org.dllearner.refinementoperators.RefinementOperator;
+import org.dllearner.utilities.owl.OWLClassExpressionLengthCalculator;
+import org.mindswap.pellet.exceptions.InternalReasonerException;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * ParCEL worker which find and evaluate the refinements for a given node.
+ * It returns partial definitions and/or new description to the learner if any.
+ *
+ * @author An C. Tran
+ */
+public class ParCELWorker extends ParCELWorkerAbstract {
+
+ /**
+ * Constructor for Worker class. A worker needs the following things: i) reducer (reference),
+ * ii) refinement operator, iii) start description, iv) worker name
+ *
+ * @param learner A reference to reducer which will be used to make a callback to return the result
+ * to
+ * @param refinementOperatorPool Refinement operator pool used to refine the given node
+ * @param learningProblem A learning problem used to calculate description accuracy, correctness, etc.
+ * @param nodeToProcess Node will being processed
+ * @param name Name of the worker, assigned by reduce (for tracing purpose only)
+ */
+ public ParCELWorker(ParCELearner learner, ParCELRefinementOperatorPool refinementOperatorPool,
+ ParCELPosNegLP learningProblem, ParCELNode nodeToProcess, String name) {
+ super(learner, refinementOperatorPool, learningProblem, nodeToProcess, name);
+ }
+
+
+ /**
+ * Constructor for Worker class. A worker needs the following things: i) reducer (reference),
+ * ii) refinement operator, iii) start description, iv) worker name
+ *
+ * @param learner A reference to reducer which will be used to make a callback to return the result
+ * to
+ * @param refinementOperator Refinement operator used to refine the given node
+ * @param learningProblem A learning problem used to calculate description accuracy, correctness, etc.
+ * @param nodeToProcess Node will being processed
+ * @param name Name of the worker, assigned by reduce (for tracing purpose only)
+ */
+ public ParCELWorker(ParCELearner learner, RefinementOperator refinementOperator,
+ ParCELPosNegLP learningProblem, ParCELNode nodeToProcess, String name) {
+ super(learner, refinementOperator, learningProblem, nodeToProcess, name);
+ }
+
+ /**
+ * Start the worker: Call the methods processNode() for processing the current node given by
+ * reducer
+ */
+ @Override
+ public void run() {
+
+ if (logger.isTraceEnabled())
+ logger.trace("[ParCEL-Worker] Processing node ("
+ + ParCELStringUtilities.replaceString(nodeToProcess.toString(), this.baseURI,
+ this.prefix));
+
+ if (!learner.canIncreaseCoverage(nodeToProcess)) {
+ // we can safely remove the node from the tree, since it is no longer valuable
+ // TODO: check that it is sufficient to remove it from the parent's children
+ nodeToProcess.getParent().getChildren().remove(nodeToProcess);
+ return;
+ }
+
+ HashSet definitionsFound = new HashSet<>(); // hold the
+ // partial
+ // definitions
+ // if any
+ HashSet newNodes = new HashSet<>(); // hold the refinements that are
+ // not partial definitions
+ // (descriptions)
+
+ int horizExp = nodeToProcess.getHorizontalExpansion();
+
+ // 1. refine node
+ TreeSet refinements = refineNode(nodeToProcess);
+// System.out.println("ParCEL Worker " + name + ":" + refinements);
+
+ if (refinements != null) {
+ if (logger.isTraceEnabled())
+ logger.trace("Refinement result ("
+ + refinements.size()
+ + "): "
+ + ParCELStringUtilities.replaceString(refinements.toString(), this.baseURI,
+ this.prefix));
+ }
+
+ // 2. process the refinement result: calculate the accuracy and completeness and add the new
+ // expression into the search tree
+ while (refinements != null && refinements.size() > 0) {
+ OWLClassExpression refinement = refinements.pollFirst();
+
+ int refinementLength = new OWLClassExpressionLengthCalculator().getLength(refinement);
+
+ // we ignore all refinements with lower length (may it happen?)
+ // (this also avoids duplicate children)
+ if (refinementLength >= horizExp) {
+
+ // calculate accuracy, correctness, positive examples covered by the description,
+ // resulted in a node
+ long starttime = System.currentTimeMillis();
+ ParCELExtraNode addedNode = null;
+
+ try {
+ addedNode = checkAndCreateNewNode(refinement, nodeToProcess);
+ } catch (InternalReasonerException | NullPointerException e) {}
+
+ // make decision on the new node (new search tree node or new partial definition)
+ if (addedNode != null) {
+
+ // PARTIAL DEFINITION (correct and not necessary to be complete)
+ if (addedNode.getCorrectness() >= 1.0d - learner.getNoiseAllowed() && addsMorePositives(addedNode)) {
+ addedNode.setGenerationTime(System.currentTimeMillis());
+ addedNode.setExtraInfo(learner.getTotalDescriptions());
+ definitionsFound.add(addedNode);
+ }
+ // DESCRIPTION
+ else
+ newNodes.add((ParCELNode) addedNode);
+ } // if (node != null), i.e. weak description
+ }
+ } // while (refinements.size > 0)
+
+ horizExp = nodeToProcess.getHorizontalExpansion();
+
+ learner.updateMaxHorizontalExpansion(horizExp);
+
+ newNodes.add(nodeToProcess);
+
+ learner.newRefinementDescriptions(newNodes);
+
+ if (definitionsFound.size() > 0)
+ learner.newPartialDefinitionsFound(definitionsFound);
+ }
+
+ /**
+ * Calculate accuracy, correctness of a description and examples that are covered by this
+ * description
+ *
+ * @param description Description which is being calculated
+ * @param parentNode The node which contains the description which is used in the refinement that
+ * result the input description
+ * @return Null if the description is processed before, or a node which contains the description
+ */
+ private ParCELExtraNode checkAndCreateNewNode(OWLClassExpression description, ParCELNode parentNode) {
+
+ // redundancy check
+ boolean nonRedundant = learner.addDescription(description);
+ if (!nonRedundant)
+ return null; // false, node cannot be added
+
+ // currently, noise is not processed. it should be processed later
+ ParCELEvaluationResult accuracyAndCorrectness = learner.getAccuracyAndCorrectness(parentNode, description);
+
+ // description is too weak, i.e. covered no positive example
+ if (accuracyAndCorrectness.accuracy == -1.0d)
+ return null;
+
+ ParCELExtraNode newNode = new ParCELExtraNode(
+ parentNode, description,
+ accuracyAndCorrectness.accuracy, accuracyAndCorrectness.correctness,
+ accuracyAndCorrectness.completeness,
+ accuracyAndCorrectness.coveredPositiveExamples,
+ accuracyAndCorrectness.coveredNegativeExamples
+ );
+
+ if (parentNode != null)
+ parentNode.addChild(newNode);
+
+ return newNode;
+
+ } // addNode()
+
+ private boolean addsMorePositives(ParCELNode node) {
+ if (node.getCorrectness() == 1) {
+ return true;
+ }
+
+ int tp, fp;
+
+ synchronized (learner.uncoveredPositiveExamples) {
+ tp = Sets.intersection(node.getCoveredPositiveExamples(), learner.uncoveredPositiveExamples).size();
+ }
+
+ synchronized (learner.coveredNegativeExamples) {
+ fp = Sets.difference(node.getCoveredNegativeExamples(), learner.coveredNegativeExamples).size();
+ }
+
+ return tp > fp;
+ }
+
+ /**
+ * Get the node which is currently being processed
+ *
+ * @return The node currently being processed
+ */
+ public ParCELNode getProcessingNode() {
+ return this.nodeToProcess;
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorkerAbstract.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorkerAbstract.java
new file mode 100644
index 0000000000..6d1685383a
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorkerAbstract.java
@@ -0,0 +1,162 @@
+package org.dllearner.algorithms.parcel;
+
+import org.apache.log4j.Logger;
+import org.dllearner.refinementoperators.LengthLimitedRefinementOperator;
+import org.dllearner.refinementoperators.RefinementOperator;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+
+import java.util.Map;
+import java.util.TreeSet;
+
+public abstract class ParCELWorkerAbstract implements Runnable {
+
+ // name of worker (for debugging purpose)
+ protected String name;
+
+ // refinement operator used in refinement
+ protected final ParCELRefinementOperatorPool refinementOperatorPool;
+ protected RefinementOperator refinementOperator;
+
+ // reducer, used to make the callback to pass the result and get the next description for
+ // processing
+ protected final L learner;
+
+ // learning proble, provides accuracy & correctness calculation
+ protected final ParCELPosNegLP learningProblem;
+
+ // the node to be processed
+ protected final ParCELNode nodeToProcess;
+
+ protected final Logger logger = Logger.getLogger(this.getClass());
+
+ // these properties can be referred in Reducer. However, we put it here for faster access
+ protected final String baseURI;
+ protected final Map prefix;
+
+ /**
+ * Constructor for Worker class. A worker needs the following things: i) reducer (reference),
+ * ii) refinement operator, iii) start description, iv) worker name
+ *
+ * @param learner A reference to reducer which will be used to make a callback to return the result
+ * to
+ * @param refinementOperatorPool Refinement operator pool used to refine the given node
+ * @param learningProblem A learning problem used to calculate description accuracy, correctness, etc.
+ * @param nodeToProcess Node will being processed
+ * @param name Name of the worker, assigned by reduce (for tracing purpose only)
+ */
+ public ParCELWorkerAbstract(L learner, ParCELRefinementOperatorPool refinementOperatorPool,
+ ParCELPosNegLP learningProblem, ParCELNode nodeToProcess, String name) {
+ this.learner = learner;
+ this.refinementOperatorPool = refinementOperatorPool;
+ this.refinementOperator = null;
+
+ this.learningProblem = learningProblem;
+
+ this.nodeToProcess = nodeToProcess;
+ this.name = name;
+
+ this.baseURI = learner.getBaseURI();
+ this.prefix = learner.getPrefix();
+ }
+
+ /**
+ * Constructor for Worker class. A worker needs the following things: i) reducer (reference),
+ * ii) refinement operator, iii) start description, iv) worker name
+ *
+ * @param learner A reference to reducer which will be used to make a callback to return the result
+ * to
+ * @param refinementOperator Refinement operator used to refine the given node
+ * @param learningProblem A learning problem used to calculate description accuracy, correctness, etc.
+ * @param nodeToProcess Node will being processed
+ * @param name Name of the worker, assigned by reduce (for tracing purpose only)
+ */
+ public ParCELWorkerAbstract(L learner, RefinementOperator refinementOperator,
+ ParCELPosNegLP learningProblem, ParCELNode nodeToProcess, String name) {
+ this.learner = learner;
+ this.refinementOperator = refinementOperator;
+ this.refinementOperatorPool = null;
+
+ this.learningProblem = learningProblem;
+
+ this.nodeToProcess = nodeToProcess;
+ this.name = name;
+
+ this.baseURI = learner.getBaseURI();
+ this.prefix = learner.getPrefix();
+ }
+
+ /**
+ * Refine a node using RhoDRDown. The refined node will be increased the max horizontal
+ * expansion value by 1
+ *
+ * @param node Node to be refined
+ * @return Set of descriptions that are the results of refinement
+ */
+ protected TreeSet refineNode(ParCELNode node) {
+ int horizExp = node.getHorizontalExpansion();
+
+ if (logger.isTraceEnabled())
+ logger.trace("[" + this.name + "] Refining: "
+ + ParCELStringUtilities.replaceString(node.toString(), baseURI, prefix));
+
+ boolean refirementOperatorBorrowed = false;
+
+ // borrow refinement operator if necessary
+ if (this.refinementOperator == null) {
+ if (this.refinementOperatorPool == null) {
+ logger.error("Neither refinement operator nor refinement operator pool provided");
+ return null;
+ } else {
+ try {
+ // logger.info("borrowing a refinement operator (" +
+ // refinementOperatorPool.getNumIdle() + ")");
+ this.refinementOperator = this.refinementOperatorPool.borrowObject();
+ refirementOperatorBorrowed = true;
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ TreeSet refinements = null;
+ try {
+ // TODO that's odd, we should just restrict the whole code to LengthLimitedRefinementOperator
+ if (refinementOperator instanceof LengthLimitedRefinementOperator) {
+ refinements = (TreeSet) ((LengthLimitedRefinementOperator) refinementOperator).refine(node.getDescription(), horizExp);
+ } else {
+ refinements = (TreeSet) refinementOperator.refine(node.getDescription());
+ }
+
+ node.incHorizontalExpansion();
+ node.setRefinementCount(refinements.size());
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ // return the refinement operator
+ if (refirementOperatorBorrowed) {
+ try {
+ if (refinementOperator != null)
+ refinementOperatorPool.returnObject(refinementOperator);
+ else
+ logger.error("Cannot return the borrowed refinement operator");
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ return refinements;
+ }
+
+
+
+ /**
+ * Get the node which is currently being processed
+ *
+ * @return The node currently being processed
+ */
+ public ParCELNode getProcessingNode() {
+ return this.nodeToProcess;
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorkerThreadFactory.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorkerThreadFactory.java
new file mode 100644
index 0000000000..89c93f19de
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELWorkerThreadFactory.java
@@ -0,0 +1,20 @@
+package org.dllearner.algorithms.parcel;
+
+import java.util.concurrent.ThreadFactory;
+
+/**
+ * ParCEL worker factory
+ *
+ * @author An C. Tran
+ *
+ */
+
+public class ParCELWorkerThreadFactory implements ThreadFactory {
+ private int count=0;
+ final String idPrefix = "ParCELWorker-";
+
+ @Override
+ public Thread newThread(Runnable r) {
+ return new Thread(r, idPrefix + (count++));
+ }
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELearner.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELearner.java
new file mode 100644
index 0000000000..b826ecce81
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELearner.java
@@ -0,0 +1,393 @@
+package org.dllearner.algorithms.parcel;
+
+/**
+ * This class implements a Parallel Description Logic Learner (PDLL) using Worker/Reducer model.
+ * Basic configuration for a PDLL including:
+ *
+ * - numberOfWorkers: The number of workers. The default value for this parameter is 2.
+ * Basically, the number of workers should be 2 x number of cores
+ * - maxExecutionTimeInSecond: Timeout in ms. By default, there is no timeout for the learning
+ * - maxNoOfSplits: Maximal number os split used for numerical data properties. SHABDDoubleSplitter may be used.
+ *
+ *
+ * @author An C. Tran
+ */
+
+import org.dllearner.algorithms.celoe.OENode;
+import org.dllearner.algorithms.parcel.reducer.ParCELReducer;
+import org.dllearner.core.AbstractReasonerComponent;
+import org.dllearner.core.ComponentAnn;
+import org.dllearner.core.ComponentInitException;
+import org.dllearner.refinementoperators.DownwardRefinementOperator;
+import org.dllearner.utilities.owl.OWLAPIRenderers;
+import org.dllearner.utilities.owl.OWLClassExpressionLengthCalculator;
+import org.semanticweb.owlapi.model.OWLClassExpression;
+
+import javax.management.MBeanServer;
+import javax.management.ObjectName;
+import java.lang.management.ManagementFactory;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.concurrent.ConcurrentSkipListSet;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.stream.Collectors;
+
+@ComponentAnn(name = "ParCEL", shortName = "parcel", version = 0.1, description = "PARallel Class Expression Learning")
+public class ParCELearner extends ParCELAbstract implements ParCELearnerMBean {
+
+ /**
+ * ============================================================================================
+ * Constructor for PDLL learning algorithm
+ *
+ * @param learningProblem
+ * Must be a ParCELPosNegLP
+ * @param reasoningService
+ * A reasoner
+ */
+ public ParCELearner(ParCELPosNegLP learningProblem, AbstractReasonerComponent reasoningService) {
+ super(learningProblem, reasoningService);
+ }
+
+ /**
+ * This constructor can be used by SpringDefinition to create bean object Properties of new bean
+ * may be initialised later using setters
+ */
+ public ParCELearner() {
+ super();
+ }
+
+ /**
+ * ============================================================================================
+ * Get the name of this learning algorithm
+ *
+ * @return Name of this learning algorithm: PLLearning
+ */
+ public static String getName() {
+ return "ParCELearner";
+ }
+
+ /**
+ * ============================================================================================
+ * Initialize the learning algorithm:
+ */
+ @Override
+ public void init() throws ComponentInitException {
+
+ // check the learning problem, this learning algorithm support ParCELPosNegLP only
+ if (!(learningProblem instanceof ParCELPosNegLP))
+ throw new ComponentInitException(learningProblem.getClass() + " is not supported by '"
+ + getName() + "' learning algorithm. Only ParCELPosNegLP is supported.");
+
+ // get the positive and negative examples from the learning problem
+ positiveExamples = ((ParCELPosNegLP) learningProblem).getPositiveExamples();
+ negativeExamples = ((ParCELPosNegLP) learningProblem).getNegativeExamples();
+ positiveTestExamples = ((ParCELPosNegLP) learningProblem).getPositiveTestExamples();
+ negativeTestExamples = ((ParCELPosNegLP) learningProblem).getNegativeTestExamples();
+
+ // clone the positive examples for this set to avoid affecting the Learning Problem
+ // this will be used to check the coverage of the partial definition (completeness)
+ this.uncoveredPositiveExamples = new HashSet<>(this.positiveExamples);
+
+ this.coveredNegativeExamples = new HashSet<>();
+
+ // initial heuristic which will be used by reducer to sort the search tree
+ // the heuristic need to get some constant from the configurator for scoring the description
+ if (this.heuristic == null)
+ heuristic = new ParCELDefaultHeuristic();
+
+ // this will be revised later using least common super class of all observations
+ if (startClass == null) {
+ startClass = dataFactory.getOWLThing();
+ }
+
+ //TODO check this - what is noise? for positive or negative examples?
+ //----------------------
+ //this.uncoveredPositiveExampleAllowed = (int) Math.ceil(getNoisePercentage() * positiveExamples.size());
+ this.uncoveredPositiveExampleAllowed = 0;
+ noiseAllowed = this.noisePercentage/100d;
+ //----------------------
+
+ // initial the existing uncovered positive examples
+ ((ParCELPosNegLP) this.learningProblem).setUncoveredPositiveExamples(uncoveredPositiveExamples);
+
+ // ----------------------------------
+ // create refinement operator pool
+ // ----------------------------------
+ initOperatorIfAny();
+ createRefinementOperatorPool();
+
+ baseURI = reasoner.getBaseURI();
+ prefix = reasoner.getPrefixes();
+
+ // logging the information (will use slf4j)
+ if (logger.isInfoEnabled()) {
+ logger.info("Heuristic used: " + heuristic.getClass());
+ logger.info("Training -> Positive examples: " + positiveExamples.size()
+ + ", negative examples: " + negativeExamples.size());
+ logger.info("Testing -> Positive examples: " + positiveTestExamples.size()
+ + ", negative examples: " + negativeTestExamples.size());
+ }
+
+ minNumberOfWorker = maxNumberOfWorker = numberOfWorkers;
+
+ }
+
+ protected void reset() {
+ // register a MBean for debugging purpose
+ try {
+ ObjectName parCELearnerBean = new ObjectName(
+ "org.dllearner.algorithms.parcel.ParCELearnerMBean:type=ParCELearnerBean");
+ MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+ if (!mbs.isRegistered(parCELearnerBean))
+ mbs.registerMBean(this, parCELearnerBean);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ stop = false;
+ done = false;
+ timeout = false;
+
+ trainingTime = getCurrentCpuMillis();
+
+ //allDescriptions = new TreeSet(new ConceptComparator());
+ allDescriptions = new ConcurrentSkipListSet<>();
+
+ searchTree = new ConcurrentSkipListSet<>(heuristic.reversed());
+
+ partialDefinitions = new TreeSet<>(new ParCELCorrectnessComparator());
+
+ maxAccuracy = 0;
+ noOfCompactedPartialDefinition = 0;
+ noOfUncoveredPositiveExamples = this.positiveExamples.size();
+ }
+
+ /**
+ * ============================================================================================
+ * Start reducer:
+ * 1. Reset the status of reducer (stop, isRunning, done, timeout)
+ * 2. Reset the data structure (using reset() method)
+ * 3. Create a set of workers and add them into the worker pool
+ * NOTE: Each worker will have it own refinement operator
+ * 4. Prepare some data: pos/neg examples, uncovered positive examples, etc.
+ * 5. Start the learning progress:
+ * i) refine nodes in the (tree set)
+ * ii) evaluate nodes in unevaluated nodes (hash set)
+ *
+ */
+ /*
+ * (non-Javadoc)
+ * @see org.dllearner.core.LearningAlgorithm#start()
+ */
+ @Override
+ public void start() {
+ reset();
+
+ initSearchTree();
+
+ createWorkerPool();
+
+ // start time of the learner
+ miliStarttime = System.currentTimeMillis();
+
+ String timeStamp = new SimpleDateFormat("HH.mm.ss").format(new Date());
+ logger.info("Time " + getCurrentCpuMillis() / 1000.0 + "s; " + timeStamp);
+
+ // ----------------------------------------------------------
+ // perform the learning process until the conditions for
+ // termination meets
+ // ----------------------------------------------------------
+ while (!isTerminateCriteriaSatisfied()) {
+
+ ParCELNode nodeToProcess = searchTree.pollFirst();
+
+ // TODO: why this? why "blocking" concept does not help in this case?
+ // remove this checking will exploit the heap memory and no definition found
+ // NOTE: i) instead of using sleep, can we use semaphore here?
+ // ii) if using semaphore or blocking, timeout checking may not be performed on time?
+ while ((workerPool.getQueue().size() >= maxTaskQueueLength) && !done) {
+ try {
+ Thread.sleep(20);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+
+ //NOTE: the above WHILE loop and the following IF statement require checking "done"
+ // condition to prevent waiting for the full+finish job or the full+terminating workerpool
+
+ if ((nodeToProcess != null) && !done && !workerPool.isShutdown() && !workerPool.isTerminating()) {
+ try {
+ this.createNewTask(nodeToProcess);
+ } catch (RejectedExecutionException re) {
+ logger.error(re);
+
+ //if cannot submit the new task, return the node back to the search tree
+ this.searchTree.add(nodeToProcess);
+ }
+ }
+ } // while the algorithm is not finish
+
+ this.miliLearningTime = System.currentTimeMillis() - miliStarttime;
+
+ stop();
+
+ // -------------------------------
+ // post-learning processing
+ // -------------------------------
+ if (logger.isInfoEnabled()) {
+ synchronized (partialDefinitions) {
+ if (this.getCurrentlyOveralMaxCompleteness() == 1)
+ logger.info("Learning finishes in: " + this.miliLearningTime + "ms, with: "
+ + partialDefinitions.size() + " definitions");
+ else if (this.isTimeout()) {
+ logger.info("Learning timeout in " + this.maxExecutionTimeInSeconds
+ + "s. Overall completeness: "
+ + df.format(this.getCurrentlyOveralMaxCompleteness()));
+
+ logger.info("Uncovered positive examples left "
+ + this.uncoveredPositiveExamples.size()
+ + " - "
+ + ParCELStringUtilities.replaceString(
+ this.uncoveredPositiveExamples.toString(), this.baseURI,
+ this.prefix));
+ } else {
+ logger.info("Learning is manually terminated at " + this.miliLearningTime
+ + "ms. Overall completeness: "
+ + df.format(this.getCurrentlyOveralMaxCompleteness()));
+ logger.info("Uncovered positive examples left "
+ + this.uncoveredPositiveExamples.size()
+ + " - "
+ + ParCELStringUtilities.replaceString(
+ this.uncoveredPositiveExamples.toString(), this.baseURI,
+ this.prefix));
+ }
+
+ timeStamp = new SimpleDateFormat("HH.mm.ss").format(new Date());
+ logger.info("Time " + getCurrentCpuMillis() / 1000.0 + "s; " + timeStamp);
+
+ OWLClassExpression bestDescription = getUnionCurrentlyBestDescription();
+ double acc = computeAccuracy(bestDescription);
+ logger.info("Accuracy: " + acc);
+
+ logger.info("Total descriptions generated: " + allDescriptions.size()
+ + ", best description length: " + getCurrentlyBestDescriptionLength()
+ + ", max expansion length: " + getMaximumHorizontalExpansion());
+
+ logger.info("Compacted partial definitions:");
+ TreeSet compactedDefinitions = (TreeSet) this
+ .getReducedPartialDefinition();
+ this.noOfCompactedPartialDefinition = compactedDefinitions.size();
+ int count = 1;
+ for (ParCELExtraNode def : compactedDefinitions) {
+ int tpTest = learningProblem instanceof ParCELPosNegLP
+ ? ((ParCELPosNegLP) learningProblem).getNumberOfCoveredPositiveTestExamples(def.getDescription())
+ : 0;
+
+ logger.info(count++ + ". "
+ + OWLAPIRenderers.toManchesterOWLSyntax(def.getDescription())
+ + " (length:" + new OWLClassExpressionLengthCalculator().getLength(def.getDescription())
+ + ", accuracy: " + df.format(def.getAccuracy()) + " / " + computeTestAccuracy(def.getDescription())
+ + ", coverage: " + def.getNumberOfCoveredPositiveExamples() + " / " + tpTest + ")");
+
+ // print out the learning tree
+ /*
+ * if (logger.isDebugEnabled()) { OENode parent = def.getParent(); while (parent
+ * != null) { logger.debug(" <-- " +
+ * parent.getDescription().toManchesterSyntaxString(baseURI, prefix)); // print
+ * out the children nodes List children = parent.getChildren(); for
+ * (OENode child : children) logger.debug(" --> " +
+ * child.getDescription().toManchesterSyntaxString(baseURI, prefix)); parent =
+ * parent.getParent(); } }
+ */
+
+ }
+
+ if (learningProblem instanceof ParCELPosNegLP) {
+ Set partialDefs = getReducedPartialDefinition()
+ .stream().map(OENode::getDescription).collect(Collectors.toSet());
+
+ ((ParCELPosNegLP) learningProblem).printTestEvaluation(partialDefs);
+ }
+
+ printBestConceptsTimesAndAccuracies();
+ }
+ }
+
+ }
+
+ //create a new task given a PDLLNode
+ private void createNewTask(ParCELNode nodeToProcess) {
+ workerPool.execute(new ParCELWorker(this, this.refinementOperatorPool,
+ (ParCELPosNegLP) learningProblem, nodeToProcess, "ParCELTask-" + (noOfTask++)));
+ }
+
+ /**
+ * Determines whether the refinements of the description represented by a given node can cover any uncovered positive
+ * examples.
+ *
+ * @param nodeToProcess the node to investigate
+ * @return true if an increase is possible, false otherwise
+ */
+ protected boolean canIncreaseCoverage(ParCELNode nodeToProcess) {
+ if (refinementOperatorPool.getFactory().getOperatorPrototype() instanceof DownwardRefinementOperator) {
+ synchronized (uncoveredPositiveExamples) {
+ return nodeToProcess.getCoveredPositiveExamples().stream().anyMatch(uncoveredPositiveExamples::contains);
+ }
+ } else {
+ return true;
+ }
+ }
+
+ /**
+ * ============================================================================================
+ * Get the overall completeness of all partial definition found
+ *
+ * @return Overall completeness so far
+ */
+ public double getCurrentlyOveralMaxCompleteness() {
+ return 1 - (uncoveredPositiveExamples.size() / (double) positiveExamples.size());
+ }
+
+ // methods related to the compactness: get compact definition, set compactor
+ public SortedSet getReducedPartialDefinition(ParCELReducer reducer) {
+ return reducer.reduce(partialDefinitions, positiveExamples,
+ uncoveredPositiveExamples.size());
+ }
+
+ public SortedSet getReducedPartialDefinition() {
+ return this.getReducedPartialDefinition(this.reducer);
+ }
+
+ public void setCompactor(ParCELReducer newCompactor) {
+ this.reducer = newCompactor;
+ }
+
+
+ // =============== MBean section =====================
+ /*
+ public int getActiveCount() {
+ return this.workerPool.getActiveCount();
+ }
+
+ public long getCompleteTaskCount() {
+ return this.workerPool.getCompletedTaskCount();
+ }
+
+ public long getTaskCount() {
+ return this.workerPool.getTaskCount();
+ }
+
+ public boolean isTerminiated() {
+ return this.workerPool.isTerminated();
+ }
+
+ public boolean isShutdown() {
+ return this.workerPool.isShutdown();
+ }
+
+ public int getUncoveredPositiveExamples() {
+ return this.noOfUncoveredPositiveExamples;
+ }
+ */
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELearnerMBean.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELearnerMBean.java
new file mode 100644
index 0000000000..c3adb7772a
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/ParCELearnerMBean.java
@@ -0,0 +1,18 @@
+package org.dllearner.algorithms.parcel;
+
+
+/**
+ * Interface for a ParCELearner Bean
+ *
+ * @author An C. Tran
+ *
+ */
+public interface ParCELearnerMBean {
+
+ long getTotalDescriptions();
+ int getCurrentlyBestDescriptionLength();
+ double getCurrentlyBestAccuracy();
+ int getWorkerPoolSize();
+ int getSearchTreeSize();
+ int getCurrentlyMaxExpansion();
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/celoe/CELOEPartial.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/celoe/CELOEPartial.java
new file mode 100644
index 0000000000..7ada3d7ae2
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/celoe/CELOEPartial.java
@@ -0,0 +1,1290 @@
+/**
+ * Copyright (C) 2007 - 2016, Jens Lehmann
+ *
+ * This file is part of DL-Learner.
+ *
+ * DL-Learner is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * DL-Learner is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.dllearner.algorithms.parcel.celoe;
+
+import java.io.File;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.collect.Sets;
+import com.jamonapi.Monitor;
+import com.jamonapi.MonitorFactory;
+import org.dllearner.algorithms.celoe.OEHeuristicRuntime;
+import org.dllearner.algorithms.celoe.OENode;
+import org.dllearner.core.*;
+import org.dllearner.core.config.ConfigOption;
+import org.dllearner.core.owl.ClassHierarchy;
+import org.dllearner.core.owl.DatatypePropertyHierarchy;
+import org.dllearner.core.owl.ObjectPropertyHierarchy;
+import org.dllearner.kb.OWLAPIOntology;
+import org.dllearner.learningproblems.ClassAsInstanceLearningProblem;
+import org.dllearner.learningproblems.ClassLearningProblem;
+import org.dllearner.learningproblems.PosNegLP;
+import org.dllearner.learningproblems.PosOnlyLP;
+import org.dllearner.reasoning.ClosedWorldReasoner;
+import org.dllearner.reasoning.OWLAPIReasoner;
+import org.dllearner.reasoning.ReasonerImplementation;
+import org.dllearner.reasoning.SPARQLReasoner;
+import org.dllearner.refinementoperators.*;
+import org.dllearner.utilities.*;
+import org.dllearner.utilities.datastructures.SearchTree;
+import org.dllearner.utilities.owl.*;
+import org.semanticweb.owlapi.apibinding.OWLManager;
+import org.semanticweb.owlapi.model.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Marker;
+import org.slf4j.MarkerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import uk.ac.manchester.cs.owl.owlapi.OWLClassImpl;
+import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
+
+/**
+ * The CELOE (Class Expression Learner for Ontology Engineering) algorithm.
+ * It adapts and extends the standard supervised learning algorithm for the
+ * ontology engineering use case.
+ *
+ * @author Jens Lehmann
+ *
+ */
+@SuppressWarnings("CloneDoesntCallSuperClone")
+@ComponentAnn(name="CELOEP", shortName="celoep", version=1.0, description="CELOE is an adapted and extended version of the OCEL algorithm applied for the ontology engineering use case. See http://jens-lehmann.org/files/2011/celoe.pdf for reference.")
+public class CELOEPartial extends AbstractCELA implements Cloneable{
+
+ private static final Logger logger = LoggerFactory.getLogger(CELOEPartial.class);
+ private static final Marker sparql_debug = MarkerFactory.getMarker("SD");
+
+ private boolean isRunning = false;
+ private boolean stop = false;
+
+// private OEHeuristicStable heuristicStable = new OEHeuristicStable();
+// private OEHeuristicRuntime heuristicRuntime = new OEHeuristicRuntime();
+
+ @ConfigOption(description = "the refinement operator instance to use")
+ private LengthLimitedRefinementOperator operator;
+
+ private SearchTree searchTree;
+ @ConfigOption(defaultValue="celoe_heuristic")
+ private AbstractHeuristic heuristic; // = new OEHeuristicRuntime();
+ // the class with which we start the refinement process
+ @ConfigOption(defaultValue = "owl:Thing",
+ description = "You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax either with full IRIs or prefixed IRIs.",
+ exampleValue = "ex:Male or http://example.org/ontology/Female")
+ private OWLClassExpression startClass;
+
+ // all descriptions in the search tree plus those which were too weak (for fast redundancy check)
+ private TreeSet descriptions;
+
+
+ // if true, then each solution is evaluated exactly instead of approximately
+ // private boolean exactBestDescriptionEvaluation = false;
+ @ConfigOption(defaultValue="false", description="Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.")
+ private boolean singleSuggestionMode;
+ private OWLClassExpression bestDescription;
+ private double bestAccuracy = Double.MIN_VALUE;
+
+ private OWLClass classToDescribe;
+ // examples are either 1.) instances of the class to describe 2.) positive examples
+ // 3.) union of pos.+neg. examples depending on the learning problem at hand
+ private Set examples;
+
+ // CELOE was originally created for learning classes in ontologies, but also
+ // works for other learning problem types
+ private boolean isClassLearningProblem;
+ private boolean isEquivalenceProblem;
+
+ // important parameters (non-config options but internal)
+ private double noise;
+
+ private boolean filterFollowsFromKB = false;
+
+ // less important parameters
+ // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity
+ // but it can also suppress quite useful expressions
+ private boolean forceMutualDifference = false;
+
+ // utility variables
+
+ // statistical variables
+ private int expressionTests = 0;
+ private int minHorizExp = 1;
+ private int maxHorizExp = 0;
+ private long totalRuntimeNs = 0;
+
+ // TODO: turn those into config options
+
+
+ // important: do not initialise those with empty sets
+ // null = no settings for allowance / ignorance
+ // empty set = allow / ignore nothing (it is often not desired to allow no class!)
+ @ConfigOption(defaultValue="false", description="specifies whether to write a search tree")
+ private boolean writeSearchTree = false;
+
+ @ConfigOption(defaultValue="log/searchTree.txt", description="file to use for the search tree")
+ private String searchTreeFile = "log/searchTree.txt";
+
+ @ConfigOption(defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree")
+ private boolean replaceSearchTree = false;
+
+ @ConfigOption(defaultValue="10", description="Sets the maximum number of results one is interested in. (Setting this to a lower value may increase performance as the learning algorithm has to store/evaluate/beautify less descriptions).")
+ private int maxNrOfResults = 10;
+
+ @ConfigOption(defaultValue="0.0", description="the (approximated) percentage of noise within the examples")
+ private double noisePercentage = 0.0;
+
+ @ConfigOption(defaultValue="false", description="If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.")
+ private boolean filterDescriptionsFollowingFromKB = false;
+
+ @ConfigOption(defaultValue="false", description="If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.")
+ private boolean reuseExistingDescription = false;
+
+ @ConfigOption(defaultValue="0", description="The maximum number of candidate hypothesis the algorithm is allowed to test (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)")
+ private int maxClassExpressionTests = 0;
+
+ @ConfigOption(defaultValue="0", description = "The maximum number of candidate hypothesis the algorithm is allowed after an improvement in accuracy (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)")
+ private int maxClassExpressionTestsAfterImprovement = 0;
+
+ @ConfigOption(defaultValue = "0", description = "maximum execution of the algorithm in seconds after last improvement")
+ private int maxExecutionTimeInSecondsAfterImprovement = 0;
+
+ @ConfigOption(defaultValue="false", description="specifies whether to terminate when noise criterion is met")
+ private boolean terminateOnNoiseReached = false;
+
+ @ConfigOption(defaultValue="7", description="maximum depth of description")
+ private double maxDepth = 7;
+
+ @ConfigOption(defaultValue="false", description="algorithm will terminate immediately when a correct definition is found")
+ private boolean stopOnFirstDefinition = false;
+
+ private int expressionTestCountLastImprovement;
+
+
+ @SuppressWarnings("unused")
+ private long timeLastImprovement = 0;
+ @ConfigOption(defaultValue = "false", description = "whether to try and refine solutions which already have accuracy value of 1")
+ private boolean expandAccuracy100Nodes = false;
+ private double currentHighestAccuracy;
+
+ // option to keep track of best score during algorithm run
+ private boolean keepTrackOfBestScore = false;
+ private SortedMap runtimeVsBestScore = new TreeMap<>();
+
+
+ // PARCEL stuff
+ private int posSize, negSize;
+ private HashSet partialDefinitions;
+ private HashSet unCoveredPos;
+ private boolean allPosCovered = false;
+ private long searchtreeSizeForBestDescription = 0;
+ private long learningtimeForBestDescription = 0;
+
+ public CELOEPartial() {}
+
+ public CELOEPartial(CELOEPartial celoe){
+ setReasoner(celoe.reasoner);
+ setLearningProblem(celoe.learningProblem);
+
+ setAllowedConcepts(celoe.getAllowedConcepts());
+ setAllowedObjectProperties(celoe.getAllowedObjectProperties());
+ setAllowedDataProperties(celoe.getAllowedDataProperties());
+
+ setIgnoredConcepts(celoe.ignoredConcepts);
+ setIgnoredObjectProperties(celoe.getIgnoredObjectProperties());
+ setIgnoredDataProperties(celoe.getIgnoredDataProperties());
+
+ setExpandAccuracy100Nodes(celoe.expandAccuracy100Nodes);
+ setFilterDescriptionsFollowingFromKB(celoe.filterDescriptionsFollowingFromKB);
+ setHeuristic(celoe.heuristic);
+
+ setMaxClassExpressionTests(celoe.maxClassExpressionTests);
+ setMaxClassExpressionTestsAfterImprovement(celoe.maxClassExpressionTestsAfterImprovement);
+ setMaxDepth(celoe.maxDepth);
+ setMaxExecutionTimeInSeconds(celoe.getMaxExecutionTimeInSeconds());
+ setMaxExecutionTimeInSecondsAfterImprovement(celoe.maxExecutionTimeInSecondsAfterImprovement);
+ setMaxNrOfResults(celoe.maxNrOfResults);
+ setNoisePercentage(celoe.noisePercentage);
+
+ LengthLimitedRefinementOperator op = new RhoDRDown((RhoDRDown)celoe.operator);
+ try {
+ op.init();
+ } catch (ComponentInitException e) {
+ e.printStackTrace();
+ }
+ setOperator(op);
+
+
+ setReuseExistingDescription(celoe.reuseExistingDescription);
+ setSingleSuggestionMode(celoe.singleSuggestionMode);
+ setStartClass(celoe.startClass);
+ setStopOnFirstDefinition(celoe.stopOnFirstDefinition);
+ setTerminateOnNoiseReached(celoe.terminateOnNoiseReached);
+ setUseMinimizer(celoe.isUseMinimizer());
+
+ setWriteSearchTree(celoe.writeSearchTree);
+ setReplaceSearchTree(celoe.replaceSearchTree);
+ }
+
+ public CELOEPartial(AbstractClassExpressionLearningProblem problem, AbstractReasonerComponent reasoner) {
+ super(problem, reasoner);
+ }
+
+ public static Collection> supportedLearningProblems() {
+ Collection> problems = new LinkedList<>();
+ problems.add(AbstractClassExpressionLearningProblem.class);
+ return problems;
+ }
+
+ @Override
+ public void init() throws ComponentInitException {
+ baseURI = reasoner.getBaseURI();
+ prefixes = reasoner.getPrefixes();
+
+ if(maxExecutionTimeInSeconds != 0 && maxExecutionTimeInSecondsAfterImprovement != 0) {
+ maxExecutionTimeInSeconds = Math.min(maxExecutionTimeInSeconds, maxExecutionTimeInSecondsAfterImprovement);
+ }
+
+ // TODO add comment
+ ClassHierarchy classHierarchy = initClassHierarchy();
+ ObjectPropertyHierarchy objectPropertyHierarchy = initObjectPropertyHierarchy();
+ DatatypePropertyHierarchy datatypePropertyHierarchy = initDataPropertyHierarchy();
+
+ // if no one injected a heuristic, we use a default one
+ if(heuristic == null) {
+ heuristic = new OEHeuristicRuntime();
+ heuristic.init();
+ }
+
+ minimizer = new OWLClassExpressionMinimizer(dataFactory, reasoner);
+
+ if (writeSearchTree) {
+ File f = new File(searchTreeFile);
+ if (f.getParentFile() != null) {
+ f.getParentFile().mkdirs();
+ }
+ Files.clearFile(f);
+ }
+
+ // start at owl:Thing by default
+ startClass = OWLAPIUtils.classExpressionPropertyExpanderChecked(this.startClass, reasoner, dataFactory, this::computeStartClass, logger);
+
+ bestEvaluatedDescriptions = new EvaluatedDescriptionSet(maxNrOfResults);
+
+ isClassLearningProblem = (learningProblem instanceof ClassLearningProblem);
+
+ // we put important parameters in class variables
+ noise = noisePercentage/100d;
+
+ // (filterFollowsFromKB is automatically set to false if the problem
+ // is not a class learning problem
+ filterFollowsFromKB = filterDescriptionsFollowingFromKB && isClassLearningProblem;
+
+ // actions specific to ontology engineering
+ if(isClassLearningProblem) {
+ ClassLearningProblem problem = (ClassLearningProblem) learningProblem;
+ classToDescribe = problem.getClassToDescribe();
+ isEquivalenceProblem = problem.isEquivalenceProblem();
+
+ examples = reasoner.getIndividuals(classToDescribe);
+ } else if(learningProblem instanceof PosOnlyLP) {
+ examples = ((PosOnlyLP)learningProblem).getPositiveExamples();
+ } else if(learningProblem instanceof PosNegLP) {
+ examples = Sets.union(((PosNegLP)learningProblem).getPositiveExamples(),((PosNegLP)learningProblem).getNegativeExamples());
+ posSize = (((PosNegLP)learningProblem).getPositiveExamples()).size();
+ negSize = (((PosNegLP)learningProblem).getNegativeExamples()).size();
+ }
+
+ // create a refinement operator and pass all configuration
+ // variables to it
+ if (operator == null) {
+ // we use a default operator and inject the class hierarchy for now
+ operator = new RhoDRDown();
+ ((CustomStartRefinementOperator) operator).setStartClass(startClass);
+ ((ReasoningBasedRefinementOperator) operator).setReasoner(reasoner);
+ }
+ if (operator instanceof CustomHierarchyRefinementOperator) {
+ ((CustomHierarchyRefinementOperator) operator).setClassHierarchy(classHierarchy);
+ ((CustomHierarchyRefinementOperator) operator).setObjectPropertyHierarchy(objectPropertyHierarchy);
+ ((CustomHierarchyRefinementOperator) operator).setDataPropertyHierarchy(datatypePropertyHierarchy);
+ }
+
+ if (!((AbstractRefinementOperator) operator).isInitialized())
+ operator.init();
+
+ initialized = true;
+ }
+
+ @Override
+ public void start() {
+ partialDefinitions = new HashSet<>();
+ unCoveredPos = new HashSet<>();
+
+ stop = false;
+ isRunning = true;
+ reset();
+ nanoStartTime = System.nanoTime();
+
+ unCoveredPos.addAll(((PosNegLP)learningProblem).getPositiveExamples());
+ allPosCovered = (unCoveredPos.size() == 0);
+
+ currentHighestAccuracy = 0.0;
+ OENode nextNode;
+
+ logger.info("start class:" + startClass);
+ addNode(startClass, null);
+
+ while (!terminationCriteriaSatisfied()) {
+ showIfBetterSolutionsFound();
+
+ // chose best node according to heuristics
+ nextNode = getNextNodeToExpand();
+ int horizExp = nextNode.getHorizontalExpansion();
+
+ // apply refinement operator
+ TreeSet refinements = refineNode(nextNode);
+
+ while(!refinements.isEmpty() && !terminationCriteriaSatisfied()) {
+ // pick element from set
+ OWLClassExpression refinement = refinements.pollFirst();
+
+ // get length of class expression
+ int length = OWLClassExpressionUtils.getLength(refinement);
+
+ // we ignore all refinements with lower length and too high depth
+ // (this also avoids duplicate node children)
+ if(length >= horizExp && OWLClassExpressionUtils.getDepth(refinement) <= maxDepth) {
+ // add node to search tree
+ addNode(refinement, nextNode);
+ }
+ }
+
+ showIfBetterSolutionsFound();
+
+ // update the global min and max horizontal expansion values
+ updateMinMaxHorizExp(nextNode);
+
+ // write the search tree (if configured)
+ if (writeSearchTree) {
+ writeSearchTree(refinements);
+ }
+ }
+
+ if(singleSuggestionMode) {
+ bestEvaluatedDescriptions.add(bestDescription, bestAccuracy, learningProblem);
+ }
+
+ // print some stats
+ printAlgorithmRunStats();
+
+ // print solution(s)
+ logger.info("solutions:\n" + getSolutionString());
+
+ isRunning = false;
+ }
+
+ /*
+ * Compute the start class in the search space from which the refinement will start.
+ * We use the intersection of super classes for definitions (since it needs to
+ * capture all instances), but owl:Thing for learning subclasses (since it is
+ * superfluous to add super classes in this case)
+ */
+ private OWLClassExpression computeStartClass() {
+ OWLClassExpression startClass = dataFactory.getOWLThing();
+
+ if(isClassLearningProblem) {
+ if(isEquivalenceProblem) {
+ Set existingDefinitions = reasoner.getAssertedDefinitions(classToDescribe);
+ if(reuseExistingDescription && (existingDefinitions.size() > 0)) {
+ // the existing definition is reused, which in the simplest case means to
+ // use it as a start class or, if it is already too specific, generalise it
+
+ // pick the longest existing definition as candidate
+ OWLClassExpression existingDefinition = null;
+ int highestLength = 0;
+ for(OWLClassExpression exDef : existingDefinitions) {
+ if(OWLClassExpressionUtils.getLength(exDef) > highestLength) {
+ existingDefinition = exDef;
+ highestLength = OWLClassExpressionUtils.getLength(exDef);
+ }
+ }
+
+ LinkedList startClassCandidates = new LinkedList<>();
+ startClassCandidates.add(existingDefinition);
+ // hack for RhoDRDown
+ if(operator instanceof RhoDRDown) {
+ ((RhoDRDown)operator).setDropDisjuncts(true);
+ }
+ LengthLimitedRefinementOperator upwardOperator = new OperatorInverter(operator);
+
+ // use upward refinement until we find an appropriate start class
+ boolean startClassFound = false;
+ OWLClassExpression candidate;
+ do {
+ candidate = startClassCandidates.pollFirst();
+ if(((ClassLearningProblem)learningProblem).getRecall(candidate)<1.0) {
+ // add upward refinements to list
+ Set refinements = upwardOperator.refine(candidate, OWLClassExpressionUtils.getLength(candidate));
+// System.out.println("ref: " + refinements);
+ LinkedList refinementList = new LinkedList<>(refinements);
+// Collections.reverse(refinementList);
+// System.out.println("list: " + refinementList);
+ startClassCandidates.addAll(refinementList);
+// System.out.println("candidates: " + startClassCandidates);
+ } else {
+ startClassFound = true;
+ }
+ } while(!startClassFound);
+ startClass = candidate;
+
+ if(startClass.equals(existingDefinition)) {
+ logger.info("Reusing existing class expression " + OWLAPIRenderers.toManchesterOWLSyntax(startClass) + " as start class for learning algorithm.");
+ } else {
+ logger.info("Generalised existing class expression " + OWLAPIRenderers.toManchesterOWLSyntax(existingDefinition) + " to " + OWLAPIRenderers.toManchesterOWLSyntax(startClass) + ", which is used as start class for the learning algorithm.");
+ }
+
+ if(operator instanceof RhoDRDown) {
+ ((RhoDRDown)operator).setDropDisjuncts(false);
+ }
+
+ } else {
+ Set superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe, true);
+ if(superClasses.size() > 1) {
+ startClass = dataFactory.getOWLObjectIntersectionOf(superClasses);
+ } else if(superClasses.size() == 1){
+ startClass = (OWLClassExpression) superClasses.toArray()[0];
+ } else {
+ startClass = dataFactory.getOWLThing();
+ logger.warn(classToDescribe + " is equivalent to owl:Thing. Usually, it is not " +
+ "sensible to learn a class expression in this case.");
+ }
+ }
+ }
+ }
+ return startClass;
+ }
+
+ private OENode getNextNodeToExpand() {
+ // we expand the best node of those, which have not achieved 100% accuracy
+ // already and have a horizontal expansion equal to their length
+ // (rationale: further extension is likely to add irrelevant syntactical constructs)
+ Iterator it = searchTree.descendingIterator();
+ if (logger.isTraceEnabled()) {
+ for (OENode N:searchTree.getNodeSet()) {
+ logger.trace(sparql_debug,"`getnext:"+N);
+ }
+ }
+
+ while(it.hasNext()) {
+ OENode node = it.next();
+ logger.trace(sparql_debug,"``"+node+node.getAccuracy());
+ if (isExpandAccuracy100Nodes() && node.getHorizontalExpansion() < OWLClassExpressionUtils.getLength(node.getDescription())) {
+ return node;
+ } else {
+ if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < OWLClassExpressionUtils.getLength(node.getDescription())) {
+ return node;
+ }
+ }
+ }
+
+ // this should practically never be called, since for any reasonable learning
+ // task, we will always have at least one node with less than 100% accuracy
+ throw new RuntimeException("CELOE could not find any node with lesser accuracy.");
+ }
+
+ // expand node horizontically
+ private TreeSet refineNode(OENode node) {
+ logger.trace(sparql_debug,"REFINE NODE " + node);
+ MonitorFactory.getTimeMonitor("refineNode").start();
+ // we have to remove and add the node since its heuristic evaluation changes through the expansion
+ // (you *must not* include any criteria in the heuristic which are modified outside of this method,
+ // otherwise you may see rarely occurring but critical false ordering in the nodes set)
+ searchTree.updatePrepare(node);
+ int horizExp = node.getHorizontalExpansion();
+ TreeSet refinements = (TreeSet) operator.refine(node.getDescription(), horizExp);
+// System.out.println("refinements: " + refinements);
+ node.incHorizontalExpansion();
+ node.setRefinementCount(refinements.size());
+// System.out.println("refined node: " + node);
+ searchTree.updateDone(node);
+ MonitorFactory.getTimeMonitor("refineNode").stop();
+ return refinements;
+ }
+
+ /**
+ * Add node to search tree if it is not too weak.
+ * @return TRUE if node was added and FALSE otherwise
+ */
+ private boolean addNode(OWLClassExpression description, OENode parentNode) {
+ String sparql_debug_out = "";
+ if (logger.isTraceEnabled()) sparql_debug_out = "DESC: " + description;
+ MonitorFactory.getTimeMonitor("addNode").start();
+
+ // redundancy check (return if redundant)
+ boolean nonRedundant = descriptions.add(description);
+ if(!nonRedundant) {
+ logger.trace(sparql_debug, sparql_debug_out + "REDUNDANT");
+ return false;
+ }
+
+ // check whether the class expression is allowed
+ if(!isDescriptionAllowed(description, parentNode)) {
+ logger.trace(sparql_debug, sparql_debug_out + "NOT ALLOWED");
+ return false;
+ }
+
+ // quality of class expression (return if too weak)
+ Monitor mon = MonitorFactory.start("lp");
+ logger.trace(sparql_debug, sparql_debug_out);
+ double accuracy = learningProblem.getAccuracyOrTooWeak(description, noise);
+ logger.trace(sparql_debug, "`acc:"+accuracy);
+ mon.stop();
+
+ // issue a warning if accuracy is not between 0 and 1 or -1 (too weak)
+ if(accuracy > 1.0 || (accuracy < 0.0 && accuracy != -1)) {
+ throw new RuntimeException("Invalid accuracy value " + accuracy + " for class expression " + description +
+ ". This could be caused by a bug in the heuristic measure and should be reported to the DL-Learner bug tracker.");
+ }
+
+ //if the noise is enable and the new description is correct (i.e. correct + noise description)
+ if (accuracy >= 1) {
+ //System.out.println("** Partial definition found: " + description.toManchesterSyntaxString(baseURI, prefixes) +
+ // ", acc:" + dfPercent.format(accuracy-1));
+ if (accuracy > 1)
+ accuracy -= 1;
+
+ double coverage = (accuracy * (posSize + negSize) - negSize)/posSize;
+ partialDefinitions.add(new PartialDefinition(description, coverage));
+
+ //update the uncovered positive examples list
+ Set coveredPosTmp = new HashSet<>();
+ for (OWLIndividual ind : unCoveredPos) {
+ if (reasoner.hasType(description, ind))
+ coveredPosTmp.add(ind);
+ }
+
+ if (coveredPosTmp.size() > 0) {
+ unCoveredPos.removeAll(coveredPosTmp);
+ allPosCovered = (unCoveredPos.size() == 0);
+ }
+ }
+
+
+ expressionTests++;
+
+ // return FALSE if 'too weak'
+ if(accuracy == -1) {
+ return false;
+ }
+
+ OENode node = new OENode(description, accuracy);
+ searchTree.addNode(parentNode, node);
+
+ // in some cases (e.g. mutation) fully evaluating even a single class expression is too expensive
+ // due to the high number of examples -- so we just stick to the approximate accuracy
+ if(singleSuggestionMode) {
+ if(accuracy > bestAccuracy) {
+ bestAccuracy = accuracy;
+ bestDescription = description;
+ logger.info("more accurate (" + dfPercent.format(bestAccuracy) + ") class expression found: " + descriptionToString(bestDescription)); // + getTemporaryString(bestDescription));
+ }
+ return true;
+ }
+
+ // maybe add to best descriptions (method keeps set size fixed);
+ // we need to make sure that this does not get called more often than
+ // necessary since rewriting is expensive
+ boolean isCandidate = !bestEvaluatedDescriptions.isFull();
+ if(!isCandidate) {
+ EvaluatedDescription extends Score> worst = bestEvaluatedDescriptions.getWorst();
+ double accThreshold = worst.getAccuracy();
+ isCandidate =
+ (accuracy > accThreshold ||
+ (accuracy >= accThreshold && OWLClassExpressionUtils.getLength(description) < worst.getDescriptionLength()));
+ }
+
+ if(isCandidate) {
+ OWLClassExpression niceDescription = rewrite(node.getExpression());
+
+ if(niceDescription.equals(classToDescribe)) {
+ return false;
+ }
+
+ if(!isDescriptionAllowed(niceDescription, node)) {
+ return false;
+ }
+
+ // another test: none of the other suggested descriptions should be
+ // a subdescription of this one unless accuracy is different
+ // => comment: on the one hand, this appears to be too strict, because once A is a solution then everything containing
+ // A is not a candidate; on the other hand this suppresses many meaningless extensions of A
+ boolean shorterDescriptionExists = false;
+ if(forceMutualDifference) {
+ for(EvaluatedDescription extends Score> ed : bestEvaluatedDescriptions.getSet()) {
+ if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) {
+// System.out.println("shorter: " + ed.getDescription());
+ shorterDescriptionExists = true;
+ break;
+ }
+ }
+ }
+
+// System.out.println("shorter description? " + shorterDescriptionExists + " nice: " + niceDescription);
+
+ if(!shorterDescriptionExists) {
+ if(!filterFollowsFromKB || !((ClassLearningProblem)learningProblem).followsFromKB(niceDescription)) {
+// System.out.println(node + "->" + niceDescription);
+ bestEvaluatedDescriptions.add(niceDescription, accuracy, learningProblem);
+// System.out.println("acc: " + accuracy);
+// System.out.println(bestEvaluatedDescriptions);
+ }
+ }
+
+// bestEvaluatedDescriptions.add(node.getDescription(), accuracy, learningProblem);
+
+// System.out.println(bestEvaluatedDescriptions.getSet().size());
+ }
+
+ return true;
+ }
+
+ // checks whether the class expression is allowed
+ private boolean isDescriptionAllowed(OWLClassExpression description, OENode parentNode) {
+ if(isClassLearningProblem) {
+ if(isEquivalenceProblem) {
+ // the class to learn must not appear on the outermost property level
+ if(occursOnFirstLevel(description, classToDescribe)) {
+ return false;
+ }
+ if(occursOnSecondLevel(description, classToDescribe)) {
+ return false;
+ }
+ } else {
+ // none of the superclasses of the class to learn must appear on the
+ // outermost property level
+ TreeSet toTest = new TreeSet<>();
+ toTest.add(classToDescribe);
+ while(!toTest.isEmpty()) {
+ OWLClassExpression d = toTest.pollFirst();
+ if(occursOnFirstLevel(description, d)) {
+ return false;
+ }
+ toTest.addAll(reasoner.getClassHierarchy().getSuperClasses(d));
+ }
+ }
+ } else if (learningProblem instanceof ClassAsInstanceLearningProblem) {
+ return true;
+ }
+
+ // perform forall sanity tests
+ if (parentNode != null &&
+ (ConceptTransformation.getForallOccurences(description) > ConceptTransformation.getForallOccurences(parentNode.getDescription()))) {
+ // we have an additional \forall construct, so we now fetch the contexts
+ // in which it occurs
+ SortedSet contexts = ConceptTransformation.getForallContexts(description);
+ SortedSet parentContexts = ConceptTransformation.getForallContexts(parentNode.getDescription());
+ contexts.removeAll(parentContexts);
+// System.out.println("parent description: " + parentNode.getDescription());
+// System.out.println("description: " + description);
+// System.out.println("contexts: " + contexts);
+ // we now have to perform sanity checks: if \forall is used, then there
+ // should be at least on class instance which has a filler at the given context
+ for(PropertyContext context : contexts) {
+ // transform [r,s] to \exists r.\exists s.\top
+ OWLClassExpression existentialContext = context.toExistentialContext();
+ boolean fillerFound = false;
+ if(reasoner instanceof SPARQLReasoner) {
+ SortedSet individuals = reasoner.getIndividuals(existentialContext);
+ fillerFound = !Sets.intersection(individuals, examples).isEmpty();
+ } else {
+ for(OWLIndividual instance : examples) {
+ if(reasoner.hasType(existentialContext, instance)) {
+ fillerFound = true;
+ break;
+ }
+ }
+ }
+
+ // if we do not find a filler, this means that putting \forall at
+ // that position is not meaningful
+ if(!fillerFound) {
+ return false;
+ }
+ }
+ }
+
+ // we do not want to have negations of sibling classes on the outermost level
+ // (they are expressed more naturally by saying that the siblings are disjoint,
+ // so it is reasonable not to include them in solutions)
+// Set siblingClasses = reasoner.getClassHierarchy().getSiblingClasses(classToDescribe);
+// for now, we just disable negation
+
+ return true;
+ }
+
+ // determine whether a named class occurs on the outermost level, i.e. property depth 0
+ // (it can still be at higher depth, e.g. if intersections are nested in unions)
+ private boolean occursOnFirstLevel(OWLClassExpression description, OWLClassExpression cls) {
+ return !cls.isOWLThing() && (description instanceof OWLNaryBooleanClassExpression && ((OWLNaryBooleanClassExpression) description).getOperands().contains(cls));
+// return description.containsConjunct(cls) ||
+// (description instanceof OWLObjectUnionOf && ((OWLObjectUnionOf) description).getOperands().contains(cls));
+ }
+
+ // determine whether a named class occurs on the outermost level, i.e. property depth 0
+ // (it can still be at higher depth, e.g. if intersections are nested in unions)
+ private boolean occursOnSecondLevel(OWLClassExpression description, OWLClassExpression cls) {
+// SortedSet superClasses = reasoner.getSuperClasses(cls);
+// if(description instanceof OWLObjectIntersectionOf) {
+// List operands = ((OWLObjectIntersectionOf) description).getOperandsAsList();
+//
+// for (OWLClassExpression op : operands) {
+// if(superClasses.contains(op) ||
+// (op instanceof OWLObjectUnionOf && !Sets.intersection(((OWLObjectUnionOf)op).getOperands(),superClasses).isEmpty())) {
+// for (OWLClassExpression op2 : operands) {
+// if((op2 instanceof OWLObjectUnionOf && ((OWLObjectUnionOf)op2).getOperands().contains(cls))) {
+// return true;
+// }
+// }
+// }
+// }
+//
+// for (OWLClassExpression op1 : operands) {
+// for (OWLClassExpression op2 : operands) {
+// if(!op1.isAnonymous() && op2 instanceof OWLObjectUnionOf) {
+// for (OWLClassExpression op3 : ((OWLObjectUnionOf)op2).getOperands()) {
+// if(!op3.isAnonymous()) {// A AND B with Disj(A,B)
+// if(reasoner.isDisjoint(op1.asOWLClass(), op3.asOWLClass())) {
+// return true;
+// }
+// } else {// A AND NOT A
+// if(op3 instanceof OWLObjectComplementOf && ((OWLObjectComplementOf)op3).getOperand().equals(op1)) {
+// return true;
+// }
+// }
+// }
+// }
+// }
+// }
+// }
+
+ return false;
+ }
+
+ private boolean terminationCriteriaSatisfied() {
+ return
+ stop ||
+ (maxClassExpressionTestsAfterImprovement != 0 && (expressionTests - expressionTestCountLastImprovement >= maxClassExpressionTestsAfterImprovement)) ||
+ (maxClassExpressionTests != 0 && (expressionTests >= maxClassExpressionTests)) ||
+ (maxExecutionTimeInSecondsAfterImprovement != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSecondsAfterImprovement* 1000000000L))) ||
+ (maxExecutionTimeInSeconds != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds* 1000000000L))) ||
+ (terminateOnNoiseReached && (100*getCurrentlyBestAccuracy()>=100-noisePercentage)) ||
+ (stopOnFirstDefinition && (getCurrentlyBestAccuracy() >= 1));
+ }
+
+ private void reset() {
+ // set all values back to their default values (used for running
+ // the algorithm more than once)
+ searchTree = new SearchTree<>(heuristic);
+ descriptions = new TreeSet<>();
+ bestEvaluatedDescriptions.getSet().clear();
+ expressionTests = 0;
+ runtimeVsBestScore.clear();
+ }
+
+ private void printAlgorithmRunStats() {
+ if (stop) {
+ logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + searchTree.size() + " nodes in the search tree.\n");
+ } else {
+ totalRuntimeNs = System.nanoTime()-nanoStartTime;
+ logger.info("Algorithm terminated successfully (time: " + Helper.prettyPrintNanoSeconds(totalRuntimeNs) + ", "+expressionTests+" descriptions tested, " + searchTree.size() + " nodes in the search tree).\n");
+ logger.info(reasoner.toString());
+ }
+ }
+
+ private void showIfBetterSolutionsFound() {
+ if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > currentHighestAccuracy) {
+ currentHighestAccuracy = bestEvaluatedDescriptions.getBestAccuracy();
+ expressionTestCountLastImprovement = expressionTests;
+ timeLastImprovement = System.nanoTime();
+ long durationInMillis = getCurrentRuntimeInMilliSeconds();
+ String durationStr = getDurationAsString(durationInMillis);
+
+ // track new best accuracy if enabled
+ if(keepTrackOfBestScore) {
+ runtimeVsBestScore.put(getCurrentRuntimeInMilliSeconds(), currentHighestAccuracy);
+ }
+ logger.info("more accurate (" + dfPercent.format(currentHighestAccuracy) + ") class expression found after " + durationStr + ": " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription()));
+ }
+ }
+
+ private void writeSearchTree(TreeSet refinements) {
+ StringBuilder treeString = new StringBuilder("best node: ").append(bestEvaluatedDescriptions.getBest()).append("\n");
+ if (refinements.size() > 1) {
+ treeString.append("all expanded nodes:\n");
+ for (OWLClassExpression ref : refinements) {
+ treeString.append(" ").append(ref).append("\n");
+ }
+ }
+ treeString.append(TreeUtils.toTreeString(searchTree)).append("\n");
+
+ // replace or append
+ if (replaceSearchTree) {
+ Files.createFile(new File(searchTreeFile), treeString.toString());
+ } else {
+ Files.appendToFile(new File(searchTreeFile), treeString.toString());
+ }
+ }
+
+ private void updateMinMaxHorizExp(OENode node) {
+ int newHorizExp = node.getHorizontalExpansion();
+
+ // update maximum value
+ maxHorizExp = Math.max(maxHorizExp, newHorizExp);
+
+ // we just expanded a node with minimum horizontal expansion;
+ // we need to check whether it was the last one
+ if(minHorizExp == newHorizExp - 1) {
+
+ // the best accuracy that a node can achieve
+ double scoreThreshold = heuristic.getNodeScore(node) + 1 - node.getAccuracy();
+
+ for(OENode n : searchTree.descendingSet()) {
+ if(n != node) {
+ if(n.getHorizontalExpansion() == minHorizExp) {
+ // we can stop instantly when another node with min.
+ return;
+ }
+ if(heuristic.getNodeScore(n) < scoreThreshold) {
+ // we can stop traversing nodes when their score is too low
+ break;
+ }
+ }
+ }
+
+ // inc. minimum since we found no other node which also has min. horiz. exp.
+ minHorizExp++;
+
+// System.out.println("minimum horizontal expansion is now " + minHorizExp);
+ }
+ }
+
+ @Override
+ public OWLClassExpression getCurrentlyBestDescription() {
+ EvaluatedDescription extends Score> ed = getCurrentlyBestEvaluatedDescription();
+ return ed == null ? null : ed.getDescription();
+ }
+
+ @Override
+ public List getCurrentlyBestDescriptions() {
+ return bestEvaluatedDescriptions.toDescriptionList();
+ }
+
+ @Override
+ public EvaluatedDescription extends Score> getCurrentlyBestEvaluatedDescription() {
+ return bestEvaluatedDescriptions.getBest();
+ }
+
+ @Override
+ public NavigableSet extends EvaluatedDescription extends Score>> getCurrentlyBestEvaluatedDescriptions() {
+ return bestEvaluatedDescriptions.getSet();
+ }
+
+ public double getCurrentlyBestAccuracy() {
+ return bestEvaluatedDescriptions.getBest().getAccuracy();
+ }
+
+ @Override
+ public boolean isRunning() {
+ return isRunning;
+ }
+
+ @Override
+ public void stop() {
+ stop = true;
+ }
+
+ public int getMaximumHorizontalExpansion() {
+ return maxHorizExp;
+ }
+
+ public int getMinimumHorizontalExpansion() {
+ return minHorizExp;
+ }
+
+ /**
+ * @return the expressionTests
+ */
+ public int getClassExpressionTests() {
+ return expressionTests;
+ }
+
+ public LengthLimitedRefinementOperator getOperator() {
+ return operator;
+ }
+
+ @Autowired(required=false)
+ public void setOperator(LengthLimitedRefinementOperator operator) {
+ this.operator = operator;
+ }
+
+ public OWLClassExpression getStartClass() {
+ return startClass;
+ }
+
+ public void setStartClass(OWLClassExpression startClass) {
+ this.startClass = startClass;
+ }
+
+ public boolean isWriteSearchTree() {
+ return writeSearchTree;
+ }
+
+ public void setWriteSearchTree(boolean writeSearchTree) {
+ this.writeSearchTree = writeSearchTree;
+ }
+
+ public String getSearchTreeFile() {
+ return searchTreeFile;
+ }
+
+ public void setSearchTreeFile(String searchTreeFile) {
+ this.searchTreeFile = searchTreeFile;
+ }
+
+ public int getMaxNrOfResults() {
+ return maxNrOfResults;
+ }
+
+ public void setMaxNrOfResults(int maxNrOfResults) {
+ this.maxNrOfResults = maxNrOfResults;
+ }
+
+ public double getNoisePercentage() {
+ return noisePercentage;
+ }
+
+ public void setNoisePercentage(double noisePercentage) {
+ this.noisePercentage = noisePercentage;
+ }
+
+ public boolean isFilterDescriptionsFollowingFromKB() {
+ return filterDescriptionsFollowingFromKB;
+ }
+
+ public void setFilterDescriptionsFollowingFromKB(boolean filterDescriptionsFollowingFromKB) {
+ this.filterDescriptionsFollowingFromKB = filterDescriptionsFollowingFromKB;
+ }
+
+ public boolean isReplaceSearchTree() {
+ return replaceSearchTree;
+ }
+
+ public void setReplaceSearchTree(boolean replaceSearchTree) {
+ this.replaceSearchTree = replaceSearchTree;
+ }
+
+ public boolean isTerminateOnNoiseReached() {
+ return terminateOnNoiseReached;
+ }
+
+ public void setTerminateOnNoiseReached(boolean terminateOnNoiseReached) {
+ this.terminateOnNoiseReached = terminateOnNoiseReached;
+ }
+
+ public boolean isReuseExistingDescription() {
+ return reuseExistingDescription;
+ }
+
+ public void setReuseExistingDescription(boolean reuseExistingDescription) {
+ this.reuseExistingDescription = reuseExistingDescription;
+ }
+
+ public AbstractHeuristic getHeuristic() {
+ return heuristic;
+ }
+
+ @Autowired(required=false)
+ public void setHeuristic(AbstractHeuristic heuristic) {
+ this.heuristic = heuristic;
+ }
+
+ public int getMaxExecutionTimeInSecondsAfterImprovement() {
+ return maxExecutionTimeInSecondsAfterImprovement;
+ }
+
+ public void setMaxExecutionTimeInSecondsAfterImprovement(
+ int maxExecutionTimeInSecondsAfterImprovement) {
+ this.maxExecutionTimeInSecondsAfterImprovement = maxExecutionTimeInSecondsAfterImprovement;
+ }
+
+ public boolean isSingleSuggestionMode() {
+ return singleSuggestionMode;
+ }
+
+ public void setSingleSuggestionMode(boolean singleSuggestionMode) {
+ this.singleSuggestionMode = singleSuggestionMode;
+ }
+
+ public int getMaxClassExpressionTests() {
+ return maxClassExpressionTests;
+ }
+
+ public void setMaxClassExpressionTests(int maxClassExpressionTests) {
+ this.maxClassExpressionTests = maxClassExpressionTests;
+ }
+
+ public int getMaxClassExpressionTestsAfterImprovement() {
+ return maxClassExpressionTestsAfterImprovement;
+ }
+
+ public void setMaxClassExpressionTestsAfterImprovement(
+ int maxClassExpressionTestsAfterImprovement) {
+ this.maxClassExpressionTestsAfterImprovement = maxClassExpressionTestsAfterImprovement;
+ }
+
+ public double getMaxDepth() {
+ return maxDepth;
+ }
+
+ public void setMaxDepth(double maxDepth) {
+ this.maxDepth = maxDepth;
+ }
+
+ public boolean isStopOnFirstDefinition() {
+ return stopOnFirstDefinition;
+ }
+
+ public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) {
+ this.stopOnFirstDefinition = stopOnFirstDefinition;
+ }
+
+ public long getTotalRuntimeNs() {
+ return totalRuntimeNs;
+ }
+
+ public HashSet getPartialDefinitions() {
+ return partialDefinitions;
+ }
+
+ /**
+ * @return the expandAccuracy100Nodes
+ */
+ public boolean isExpandAccuracy100Nodes() {
+ return expandAccuracy100Nodes;
+ }
+
+ /**
+ * @param expandAccuracy100Nodes the expandAccuracy100Nodes to set
+ */
+ public void setExpandAccuracy100Nodes(boolean expandAccuracy100Nodes) {
+ this.expandAccuracy100Nodes = expandAccuracy100Nodes;
+ }
+
+ /**
+ * Whether to keep track of the best score during the algorithm run.
+ *
+ * @param keepTrackOfBestScore
+ */
+ public void setKeepTrackOfBestScore(boolean keepTrackOfBestScore) {
+ this.keepTrackOfBestScore = keepTrackOfBestScore;
+ }
+
+ /**
+ * @return a map containing time points at which a hypothesis with a better score than before has been found
+ */
+ public SortedMap getRuntimeVsBestScore() {
+ return runtimeVsBestScore;
+ }
+
+ /**
+ * Return a map that contains
+ *
+ * - entries with time points at which a hypothesis with a better score than before has been found
+ * - entries with the current best score for each defined interval time point
+ *
+ *
+ * @param ticksIntervalTimeValue at which time point the current best score is tracked periodically
+ * @param ticksIntervalTimeUnit the time unit of the periodic time point values
+ *
+ * @return the map
+ *
+ */
+ public SortedMap getRuntimeVsBestScore(long ticksIntervalTimeValue, TimeUnit ticksIntervalTimeUnit) {
+ SortedMap map = new TreeMap<>(runtimeVsBestScore);
+
+ // add entries for fixed time points if enabled
+ if(ticksIntervalTimeValue > 0) {
+ long ticksIntervalInMs = TimeUnit.MILLISECONDS.convert(ticksIntervalTimeValue, ticksIntervalTimeUnit);
+
+ // add t = 0 -> 0
+ map.put(0L, 0d);
+
+ for(long t = ticksIntervalInMs; t <= TimeUnit.SECONDS.toMillis(maxExecutionTimeInSeconds); t += ticksIntervalInMs) {
+ // add value of last entry before this time point
+ map.put(t, map.get(runtimeVsBestScore.headMap(t).lastKey()));
+ }
+
+ // add entry for t = totalRuntime
+ long totalRuntimeMs = Math.min(TimeUnit.SECONDS.toMillis(maxExecutionTimeInSeconds), TimeUnit.NANOSECONDS.toMillis(totalRuntimeNs));
+ map.put(totalRuntimeMs, map.get(map.lastKey()));
+ }
+
+ return map;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#clone()
+ */
+ @Override
+ public Object clone() throws CloneNotSupportedException {
+ return new CELOEPartial(this);
+ }
+
+ public static class PartialDefinition {
+ private String id = "";
+ private OWLClassExpression description;
+
+ double coverage = 0.0;
+ List additionValue = new LinkedList<>();
+
+ private int maxAdditionalValue = 15;
+
+ public PartialDefinition (OWLClassExpression des, double cov) {
+ this.description = des;
+ this.coverage = cov;
+ this.initialAdditionalValue();
+ }
+
+ public OWLClassExpression getDescription() {
+ return this.description;
+ }
+
+ private void initialAdditionalValue() {
+ for (int i=0; i getAdditionValue() {
+ return this.additionValue;
+ }
+ */
+
+ public double getAdditionValue(int index) {
+ return this.additionValue.get(index);
+ }
+
+ }
+
+ public static void main(String[] args) throws Exception{
+// File file = new File("../examples/swore/swore.rdf");
+// OWLClass classToDescribe = new OWLClassImpl(IRI.create("http://ns.softwiki.de/req/CustomerRequirement"));
+ File file = new File("../examples/father.owl");
+ OWLClass classToDescribe = new OWLClassImpl(IRI.create("http://example.com/father#male"));
+
+ OWLOntology ontology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(file);
+
+ AbstractKnowledgeSource ks = new OWLAPIOntology(ontology);
+ ks.init();
+
+ OWLAPIReasoner baseReasoner = new OWLAPIReasoner(ks);
+ baseReasoner.setReasonerImplementation(ReasonerImplementation.HERMIT);
+ baseReasoner.init();
+ ClosedWorldReasoner rc = new ClosedWorldReasoner(ks);
+ rc.setReasonerComponent(baseReasoner);
+ rc.init();
+
+ ClassLearningProblem lp = new ClassLearningProblem(rc);
+// lp.setEquivalence(false);
+ lp.setClassToDescribe(classToDescribe);
+ lp.init();
+
+ RhoDRDown op = new RhoDRDown();
+ op.setReasoner(rc);
+ op.setUseNegation(false);
+ op.setUseHasValueConstructor(false);
+ op.setUseCardinalityRestrictions(true);
+ op.setUseExistsConstructor(true);
+ op.setUseAllConstructor(true);
+ op.init();
+
+
+
+ //(male ⊓ (∀ hasChild.⊤)) ⊔ (∃ hasChild.(∃ hasChild.male))
+ OWLDataFactory df = new OWLDataFactoryImpl();
+ OWLClass male = df.getOWLClass(IRI.create("http://example.com/father#male"));
+ OWLClassExpression ce = df.getOWLObjectIntersectionOf(
+ df.getOWLObjectUnionOf(
+ male,
+ df.getOWLObjectIntersectionOf(
+ male, male),
+ df.getOWLObjectSomeValuesFrom(
+ df.getOWLObjectProperty(IRI.create("http://example.com/father#hasChild")),
+ df.getOWLThing())
+ ),
+ df.getOWLObjectAllValuesFrom(
+ df.getOWLObjectProperty(IRI.create("http://example.com/father#hasChild")),
+ df.getOWLThing()
+ )
+ );
+ System.out.println(ce);
+ OWLClassExpressionMinimizer min = new OWLClassExpressionMinimizer(df, rc);
+ ce = min.minimizeClone(ce);
+ System.out.println(ce);
+
+ CELOEPartial alg = new CELOEPartial(lp, rc);
+ alg.setMaxExecutionTimeInSeconds(10);
+ alg.setOperator(op);
+ alg.setWriteSearchTree(true);
+ alg.setSearchTreeFile("log/search-tree.log");
+ alg.setReplaceSearchTree(true);
+ alg.init();
+ alg.setKeepTrackOfBestScore(true);
+
+ alg.start();
+
+ SortedMap map = alg.getRuntimeVsBestScore(1, TimeUnit.SECONDS);
+ System.out.println(MapUtils.asTSV(map, "runtime", "best_score"));
+
+ }
+
+}
\ No newline at end of file
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELCorrectnessGreedyReducer.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELCorrectnessGreedyReducer.java
new file mode 100644
index 0000000000..f5a3d26f9e
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELCorrectnessGreedyReducer.java
@@ -0,0 +1,94 @@
+package org.dllearner.algorithms.parcel.reducer;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.log4j.Logger;
+import org.dllearner.algorithms.parcel.ParCELCompletenessComparator;
+import org.dllearner.algorithms.parcel.ParCELExtraNode;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * This class implements "wise" coverage greedy strategy for compacting the partial definitions In
+ * this strategy, the partial definitions will be chosen based on their coverage. When a partial
+ * definition has been chosen, coverage of other partial definition will be recalculated
+ *
+ * @author An C. Tran
+ *
+ */
+
+public class ParCELCorrectnessGreedyReducer {
+
+ Logger logger = Logger.getLogger(this.getClass());
+
+
+ /**
+ * Compact partial definition with noise allowed
+ *
+ * @param counterPartialDefinitions
+ * Set of partial definitions
+ * @param negativeExamples
+ * Set of positive examples (used to check whether partial definition is useful
+ *
+ * @return Subset of partial definitions that cover (positive examples \ uncovered positive
+ * examples)
+ */
+ public SortedSet reduce(SortedSet counterPartialDefinitions,
+ Set negativeExamples)
+ {
+
+ Set positiveExamplesTmp = new HashSet<>(negativeExamples);
+
+ TreeSet reducedPartialDefinition = new TreeSet<>(
+ new ParCELCompletenessComparator());
+
+ if (counterPartialDefinitions.size() == 0)
+ return reducedPartialDefinition;
+
+ synchronized (counterPartialDefinitions) {
+ Object[] partialDefs = counterPartialDefinitions.toArray();
+
+ // the highest accurate partial definition
+ // reducedPartialDefinition.add((PDLLExtraNode)partialDefs[0]);
+ // positiveExamplesTmp.removeAll(((PDLLExtraNode)partialDefs[0]).getCoveredPositiveExamples());
+
+ for (int i = 0; (positiveExamplesTmp.size() > 0)
+ && (i < counterPartialDefinitions.size()); i++) {
+
+ // count the number of different positive examples covered
+ int counti = 0;
+ for (OWLIndividual indi : ((ParCELExtraNode) partialDefs[i]).getCoveredPositiveExamples()) {
+ if (positiveExamplesTmp.contains(indi))
+ counti++;
+ } // count the number of different covered positive examples by i
+
+
+ for (int j = i + 1; j < counterPartialDefinitions.size(); j++) {
+ int countj = 0;
+
+ for (OWLIndividual indj : ((ParCELExtraNode) partialDefs[j]).getCoveredPositiveExamples())
+ if (positiveExamplesTmp.contains(indj))
+ countj++;
+
+ // swap the partial definition so that the "best" partial
+ // definition will be in the top
+ if (countj > counti) {
+ ParCELExtraNode tmp = (ParCELExtraNode) partialDefs[j];
+ partialDefs[j] = partialDefs[i];
+ partialDefs[i] = tmp;
+ counti = countj;
+ }
+ }
+
+ reducedPartialDefinition.add((ParCELExtraNode) partialDefs[i]);
+ positiveExamplesTmp.removeAll(((ParCELExtraNode) partialDefs[i])
+ .getCoveredPositiveExamples());
+ }
+ }
+
+ return reducedPartialDefinition;
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELCoverageGreedyReducer.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELCoverageGreedyReducer.java
new file mode 100644
index 0000000000..5f251d27e5
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELCoverageGreedyReducer.java
@@ -0,0 +1,77 @@
+package org.dllearner.algorithms.parcel.reducer;
+
+import java.util.*;
+
+import org.dllearner.algorithms.parcel.ParCELCorrectnessComparator;
+import org.dllearner.algorithms.parcel.ParCELExtraNode;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * This class implements a simple strategy for compacting the partial definition set In this
+ * strategy, the partial definition will be chosen based on their accuracy. The partial definition
+ * with the best accuracy will be chosen first and the rests will not be re-calculated before the
+ * next reduction
+ *
+ * @author An C. Tran
+ *
+ */
+public class ParCELCoverageGreedyReducer implements ParCELReducer {
+
+ /**
+ * Compact partial definitions
+ *
+ * @param partialDefinitions
+ * Set of partial definitions
+ * @param positiveExamples
+ * Set of positive examples (used to check whether partial definition is useful
+ *
+ * @return Subset of partial definitions that cover all positive examples
+ */
+ @Override
+ public SortedSet reduce(SortedSet partialDefinitions,
+ Set positiveExamples) {
+ return reduce(partialDefinitions, positiveExamples, 0);
+ }
+
+ /**
+ * Compact partial definition with noise allowed
+ *
+ * @param partialDefinitions
+ * Set of partial definitions
+ * @param positiveExamples
+ * Set of positive examples (used to check whether partial definition is useful
+ * @param uncoveredPositiveExamples
+ * Number of uncovered positive examples allowed
+ *
+ * @return Subset of partial definitions that cover (positive examples \ uncovered positive
+ * examples)
+ */
+ @Override
+ public SortedSet reduce(SortedSet partialDefinitions,
+ Set positiveExamples, int uncoveredPositiveExamples) {
+
+ Set positiveExamplesTmp = new HashSet<>(positiveExamples);
+
+ TreeSet minimisedPartialDefinition = new TreeSet<>(
+ new ParCELCorrectnessComparator());
+
+ Iterator partialDefinitionIterator = partialDefinitions.iterator();
+ while ((positiveExamplesTmp.size() > uncoveredPositiveExamples)
+ && (partialDefinitionIterator.hasNext())) {
+ ParCELExtraNode node = partialDefinitionIterator.next();
+
+ int positiveExamplesRemoved = positiveExamplesTmp.size();
+ positiveExamplesTmp.removeAll(node.getCoveredPositiveExamples());
+
+ positiveExamplesRemoved -= positiveExamplesTmp.size();
+
+ if (positiveExamplesRemoved > 0) {
+ node.setCorrectness(positiveExamplesRemoved);
+ minimisedPartialDefinition.add(node);
+ }
+ }
+
+ return minimisedPartialDefinition;
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELDefinitionLengthReducer.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELDefinitionLengthReducer.java
new file mode 100644
index 0000000000..6ac993e651
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELDefinitionLengthReducer.java
@@ -0,0 +1,57 @@
+package org.dllearner.algorithms.parcel.reducer;
+
+import java.util.*;
+
+import org.dllearner.algorithms.parcel.ParCELDefinitionGenerationTimeComparator;
+import org.dllearner.algorithms.parcel.ParCELDefinitionLengthComparator;
+import org.dllearner.algorithms.parcel.ParCELExtraNode;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * Compact set of partial definitions using Definition Length Greedy Reduction strategy
+ *
+ * @author An C. Tran
+ *
+ */
+public class ParCELDefinitionLengthReducer implements ParCELReducer {
+
+ @Override
+ public SortedSet reduce(SortedSet partialDefinitions,
+ Set positiveExamples) {
+ return reduce(partialDefinitions, positiveExamples, 0);
+ }
+
+ @Override
+ public SortedSet reduce(SortedSet partialDefinitions,
+ Set positiveExamples, int uncoveredPositiveExamples) {
+ Set positiveExamplesTmp = new HashSet<>(positiveExamples);
+
+ TreeSet newSortedPartialDefinitions = new TreeSet<>(
+ new ParCELDefinitionLengthComparator());
+ synchronized (partialDefinitions) {
+ newSortedPartialDefinitions.addAll(partialDefinitions);
+ }
+
+ TreeSet minimisedPartialDefinition = new TreeSet<>(
+ new ParCELDefinitionGenerationTimeComparator());
+
+ Iterator partialDefinitionIterator = newSortedPartialDefinitions.iterator();
+ while ((positiveExamplesTmp.size() > uncoveredPositiveExamples)
+ && (partialDefinitionIterator.hasNext())) {
+ ParCELExtraNode node = partialDefinitionIterator.next();
+
+ int positiveExamplesRemoved = positiveExamplesTmp.size();
+ positiveExamplesTmp.removeAll(node.getCoveredPositiveExamples());
+
+ positiveExamplesRemoved -= positiveExamplesTmp.size();
+
+ if (positiveExamplesRemoved > 0) {
+ node.setCorrectness(positiveExamplesRemoved);
+ minimisedPartialDefinition.add(node);
+ }
+ }
+
+ return minimisedPartialDefinition;
+ }
+
+}
diff --git a/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELGenerationTimeReducer.java b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELGenerationTimeReducer.java
new file mode 100644
index 0000000000..cee2dc6dc9
--- /dev/null
+++ b/components-core/src/main/java/org/dllearner/algorithms/parcel/reducer/ParCELGenerationTimeReducer.java
@@ -0,0 +1,60 @@
+package org.dllearner.algorithms.parcel.reducer;
+
+import java.util.*;
+
+import org.dllearner.algorithms.parcel.ParCELDefinitionGenerationTimeComparator;
+import org.dllearner.algorithms.parcel.ParCELExtraNode;
+import org.dllearner.algorithms.parcel.reducer.ParCELReducer;
+import org.semanticweb.owlapi.model.OWLIndividual;
+
+/**
+ * Compact two a partial definitions using Generation Time Greedy strategy
+ *
+ * @author An C. Tran
+ *
+ */
+
+public class ParCELGenerationTimeReducer implements ParCELReducer {
+
+ @Override
+ public SortedSet reduce(SortedSet partialDefinitions,
+ Set positiveExamples) {
+ return reduce(partialDefinitions, positiveExamples, 0);
+ }
+
+ @Override
+ public SortedSet reduce(SortedSet