diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json
index ae432524b..ac5ebfd1f 100644
--- a/.config/dotnet-tools.json
+++ b/.config/dotnet-tools.json
@@ -2,17 +2,19 @@
"version": 1,
"isRoot": true,
"tools": {
- "fsdocs-tool": {
- "version": "16.1.1",
- "commands": [
- "fsdocs"
- ]
- },
"fantomas": {
"version": "6.1.1",
"commands": [
"fantomas"
- ]
+ ],
+ "rollForward": false
+ },
+ "fsdocs-tool": {
+ "version": "20.0.1",
+ "commands": [
+ "fsdocs"
+ ],
+ "rollForward": false
}
}
}
\ No newline at end of file
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index ce45285cf..6a8a6dee0 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -13,10 +13,10 @@ jobs:
steps:
- uses: actions/checkout@v2
- - name: Setup .NET
- uses: actions/setup-dotnet@v1
+ - name: Setup .NET 8
+ uses: actions/setup-dotnet@v4
with:
- dotnet-version: '9.x.x'
+ dotnet-version: '8.x.x'
- name: Restore local tools
run: dotnet tool restore
- name: make script executable
@@ -30,11 +30,11 @@ jobs:
runs-on: windows-latest
steps:
- - uses: actions/checkout@v2
- - name: Setup .NET
- uses: actions/setup-dotnet@v1
+ - uses: actions/checkout@v4
+ - name: Setup .NET 8
+ uses: actions/setup-dotnet@v4
with:
- dotnet-version: '9.x.x'
+ dotnet-version: '8.x.x'
- name: Restore local tools
run: dotnet tool restore
- name: Build and test
diff --git a/build/BasicTasks.fs b/build/BasicTasks.fs
index 65cb69bf2..c1d6c05f0 100644
--- a/build/BasicTasks.fs
+++ b/build/BasicTasks.fs
@@ -28,7 +28,17 @@ let clean = BuildTask.create "Clean" [] {
let build = BuildTask.create "Build" [clean] {
solutionFile
|> DotNet.build (fun p ->
- { p with MSBuildParams = { p.MSBuildParams with DisableInternalBinLog = true }}
+ let msBuildParams =
+ {p.MSBuildParams with
+ Properties = ([
+ "warnon", "3390"
+ ])
+ DisableInternalBinLog = true
+ }
+ {
+ p with
+ MSBuildParams = msBuildParams
+ }
|> DotNet.Options.withCustomParams (Some "-tl")
)
}
\ No newline at end of file
diff --git a/build/PackageTasks.fs b/build/PackageTasks.fs
index af6517842..db16ad2c6 100644
--- a/build/PackageTasks.fs
+++ b/build/PackageTasks.fs
@@ -8,6 +8,7 @@ open TestTasks
open BlackFox.Fake
open Fake.Core
+open Fake.DotNet
open Fake.IO.Globbing.Operators
let pack = BuildTask.create "Pack" [clean; build; runTests] {
@@ -18,17 +19,18 @@ let pack = BuildTask.create "Pack" [clean; build; runTests] {
|> Seq.iter (Fake.DotNet.DotNet.pack (fun p ->
let msBuildParams =
{p.MSBuildParams with
- DisableInternalBinLog = true
Properties = ([
"Version",stableVersionTag
"PackageReleaseNotes", (release.Notes |> String.concat "\r\n")
] @ p.MSBuildParams.Properties)
+ DisableInternalBinLog = true
}
{
p with
MSBuildParams = msBuildParams
OutputPath = Some pkgDir
}
+ |> DotNet.Options.withCustomParams (Some "--no-dependencies -tl")
))
else failwith "aborted"
}
@@ -41,11 +43,11 @@ let packPrerelease = BuildTask.create "PackPrerelease" [setPrereleaseTag; clean;
|> Seq.iter (Fake.DotNet.DotNet.pack (fun p ->
let msBuildParams =
{p.MSBuildParams with
- DisableInternalBinLog = true
Properties = ([
"Version", prereleaseTag
"PackageReleaseNotes", (release.Notes |> String.toLines )
] @ p.MSBuildParams.Properties)
+ DisableInternalBinLog = true
}
{
p with
@@ -53,6 +55,7 @@ let packPrerelease = BuildTask.create "PackPrerelease" [setPrereleaseTag; clean;
OutputPath = Some pkgDir
MSBuildParams = msBuildParams
}
+ |> DotNet.Options.withCustomParams (Some "--no-dependencies -tl")
))
else
failwith "aborted"
diff --git a/build/TestTasks.fs b/build/TestTasks.fs
index d79c6d935..4ddb58cd4 100644
--- a/build/TestTasks.fs
+++ b/build/TestTasks.fs
@@ -7,16 +7,16 @@ open ProjectInfo
open BasicTasks
let runTests = BuildTask.create "RunTests" [clean; build] {
- testProject
- |> Fake.DotNet.DotNet.test (fun testParams ->
- { testParams with
- Logger = Some "console;verbosity=detailed"
- Configuration = DotNet.BuildConfiguration.fromString configuration
- NoBuild = true
- MSBuildParams = { testParams.MSBuildParams with DisableInternalBinLog = true }
+ Fake.DotNet.DotNet.test(fun testParams ->
+ {
+ testParams with
+ Logger = Some "console;verbosity=detailed"
+ Configuration = DotNet.BuildConfiguration.fromString configuration
+ NoBuild = true
+ MSBuildParams = { testParams.MSBuildParams with DisableInternalBinLog = true }
}
|> DotNet.Options.withCustomParams (Some "-tl")
- )
+ ) testProject
}
// to do: use this once we have actual tests
@@ -27,12 +27,12 @@ let runTestsWithCodeCov = BuildTask.create "RunTestsWithCodeCov" [clean; build]
testParams with
MSBuildParams = {
standardParams with
- DisableInternalBinLog = true
Properties = [
"AltCover","true"
"AltCoverCobertura","../../codeCov.xml"
"AltCoverForce","true"
]
+ DisableInternalBinLog = true
};
Logger = Some "console;verbosity=detailed"
}
diff --git a/build/build.fsproj b/build/build.fsproj
index f978756ba..b3f0ebf49 100644
--- a/build/build.fsproj
+++ b/build/build.fsproj
@@ -31,8 +31,4 @@
-
-
-
-
diff --git a/docs/BasicStats.fsx b/docs/BasicStats.fsx
index e03c4106f..fe1a11be8 100644
--- a/docs/BasicStats.fsx
+++ b/docs/BasicStats.fsx
@@ -10,9 +10,13 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#r "../src/FSharp.Stats/bin/Release/netstandard2.0/FSharp.Stats.dll"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
#r "nuget: FsMath, 0.0.1"
-open FsMath
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
+#r "FSharp.Stats.dll"
(*** condition: ipynb ***)
#if IPYNB
#r "nuget: FSharp.Stats"
@@ -27,19 +31,6 @@ open FsMath
_Summary:_ this tutorial gives an overview over how to do some of the basic statistical measurements with FSharp.Stats.
-### Table of contents
-
- - [Central tendency](#Central-tendency)
- - [Mean](#Mean)
- - [Truncated mean](#Truncated-mean)
- - [Median](#Median)
- - [Harmonic mean](#Harmonic-mean)
- - [Geometric mean](#Geometric-mean)
- - [Dispersion](#Dispersion)
- - [Range](#Range)
- - [Variance and Standard Deviation](#Variance-and-standard-deviation)
- - [Coefficient of variation](#Coefficient-of-variation)
-
## Central tendency
A [central tendency](https://en.wikipedia.org/wiki/Central_tendency) (or measure of central tendency) is a central or typical value for a probability distribution.
diff --git a/docs/Clustering.fsx b/docs/Clustering.fsx
index 4491a7d73..d846b6c65 100644
--- a/docs/Clustering.fsx
+++ b/docs/Clustering.fsx
@@ -10,25 +10,24 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FSharpAux, 1.0.0"
#r "nuget: Cyjs.NET"
-#r "nuget: OptimizedPriorityQueue, 5.1.0"
-#r "nuget: FsMath, 0.0.1"
-open FsMath
+
Plotly.NET.Defaults.DefaultDisplayOptions <-
Plotly.NET.DisplayOptions.init (PlotlyJSReference = Plotly.NET.PlotlyJSReference.NoReference)
(*** condition: ipynb ***)
#if IPYNB
-#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: Plotly.NET.Interactive, 4.0.0"
-#r "nuget: OptimizedPriorityQueue, 5.1.0"
-#r "nuget: FSharpAux, 1.0.0"
#r "nuget: FSharp.Stats"
+#r "nuget: Plotly.NET, 4.0.0"
#r "nuget: Cyjs.NET"
#endif // IPYNB
@@ -40,22 +39,6 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
_Summary:_ this tutorial demonstrates several clustering methods in FSharp.Stats and how to visualize the results with Plotly.NET.
-### Table of contents
-
- - [Iterative Clustering](#Iterative-Clustering)
- - [k-means clustering](#k-means-clustering)
- - [Density based clustering](#Density-based-clustering)
- - [DBSCAN](#DBSCAN)
- - [Hierarchical clustering](#Hierarchical-Clustering)
- - [Distance measures](#Distance-measures)
- - [Linkages](#Linkages)
- - [Determining the optimal number of clusters](#Determining-the-optimal-number-of-clusters)
- - [Rule of thumb](#Rule-of-thumb)
- - [Elbow criterion](#Elbow-criterion)
- - [AIC](#AIC)
- - [Silhouette coefficient](#Silhouette-coefficient)
- - [GapStatistics](#GapStatistics)
-
Clustering methods can be used to group elements of a huge data set based on their similarity. Elements sharing similar properties cluster together and can be reported as coherent group.
**Column wise standardization**
diff --git a/docs/ComparisonMetrics.fsx b/docs/ComparisonMetrics.fsx
index 9920c443f..b4a141aeb 100644
--- a/docs/ComparisonMetrics.fsx
+++ b/docs/ComparisonMetrics.fsx
@@ -10,11 +10,15 @@ categoryindex: 0
(***hide***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
-open FsMath
+
Plotly.NET.Defaults.DefaultDisplayOptions <-
Plotly.NET.DisplayOptions.init (PlotlyJSReference = Plotly.NET.PlotlyJSReference.NoReference)
@@ -23,6 +27,7 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
#if IPYNB
#r "nuget: Plotly.NET, 4.0.0"
#r "nuget: Plotly.NET.Interactive, 4.0.0"
+#r "nuget: FsMath, 0.0.1"
#r "nuget: FSharp.Stats"
#endif // IPYNB
@@ -32,20 +37,6 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
[](https://mybinder.org/v2/gh/fslaborg/FSharp.Stats/gh-pages?urlpath=/tree/home/jovyan/Integration.ipynb)
[]({{root}}{{fsdocs-source-basename}}.ipynb)
-#### Table of contents
-- [Confusion matrices](#Confusion-matrices)
- - [Binary confusion matrix](#Binary-confusion-matrix)
- - [Multi-label confusion matrix](#Multi-label-confusion-matrix)
-- [Comparison-Metric](#Comparison-Metrics)
- - [ComparisonMetrics for binary comparisons](#ComparisonMetrics-for-binary-comparisons)
- - [ComparisonMetrics for multi-label comparisons](#ComparisonMetrics-for-multi-label-comparisons)
- - [Macro-averaging metrics](#Macro-averaging-metrics)
- - [Micro-averaging metrics](#Micro-averaging-metrics)
- - [Creating threshold-dependent metric maps](#Creating-threshold-dependent-metric-maps)
- - [For binary predictions](#For-binary-predictions)
- - [For multi-label predictions](#For-multi-label-predictions)
- - [ROC curve example](#ROC-curve-example)
-
FSharp.Stats contains a collection for assessing both binary and multi-label comparisons, for example the results of a binary/multi-label classification or the results of a statistical test.
Usually, using the functions provided by the `ComparisonMetrics` module should be enough, but for clarity this documentation also introduces the `BinaryConfusionMatrix` and `MultiLabelConfusionMatrix` types that are used to derive the `ComparisonMetrics.`
@@ -72,11 +63,11 @@ $$predicted = (1,1,1,0,1,0,0)$$
a binary confusion matrix can be filled by comparing actual and predicted values at their respective indices:
-| | | predicted | |
-| --- | --- | --- | --- |
-| | | True | False |
-|actual | True | 3 | 1 |
-| | False| 1 | 2 |
+| | | **Predicted**| |
+| --- | --- | --- | --- |
+| | | True | False |
+|**Actual** | True | 3 | 1 |
+| | False| 1 | 2 |
A whole array of prediction/test evaluation metrics can be derived from binary confusion matrices, which are all based on the 4 values of the confusion matrix:
@@ -85,11 +76,12 @@ A whole array of prediction/test evaluation metrics can be derived from binary c
- FP (False Positives, the actual false labels incorrectly predicted as true)
- TP (False Negatives, the actual true labels incorrectly predicted as false)
-| | | Predicted | |
-| --- | --- | --- | --- |
-| | | True | False |
-|Actual | True | TP | FN |
-| | False| FP | TN |
+| | | **Predicted**| |
+| --- | --- | --- | --- |
+| | | True | False |
+|**Actual** | True | TP | FN |
+| | False| FP | TN |
+
These 4 base metrics are in principle what comprises the record type `BinaryConfusionMatrix`.
@@ -100,6 +92,7 @@ A BinaryConfusionMatrix can be created in various ways :
let actual = [1;1;1;1;0;0;0]
let predicted = [1;1;1;0;1;0;0]
+open FsMath
open FSharp.Stats.Testing
BinaryConfusionMatrix.ofPredictions(1,actual,predicted)
@@ -138,12 +131,12 @@ $$predicted = (A,A,A,B,C,B,B,A,C,C,C,C,A,A)$$
a multi-label confusion matrix can be filled by comparing actual and predicted values at their respective indices:
-| | | Predicted | | |
-| --- | --- | --- | --- | --- |
-| | | Label A | Label B | Label C |
-| Actual | Label A | 3 | 1 | 1 |
-| | Label B | 1 | 2 | 0 |
-| | Label C | 2 | 0 | 4 |
+| | | **Predicted** | | |
+| --- | --- | --- | --- | --- |
+| | | Label A | Label B | Label C |
+| **Actual** | Label A | 3 | 1 | 1 |
+| | Label B | 1 | 2 | 0 |
+| | Label C | 2 | 0 | 4 |
A `MultiLabelConfusionMatrix` can be created either
@@ -186,11 +179,11 @@ This is done by taking all occurences of the label in the actual labels as posit
As an example, the derived binary confusion matrix for `Label A` in above example would be:
-| | | Predicted | |
-| --- |--- | --- | --- |
-| | | is A | is not A |
-|Actual | is A | 3 | 2 |
-| | is not A | 3 | 6 |
+| | | **Predicted** | |
+| --- |--- | --- | --- |
+| | | is A | is not A |
+|**Actual** | is A | 3 | 2 |
+| | is not A | 3 | 6 |
Programmatically, this can be done via `MultiLabelConfusionMatrix.oneVsRest`
*)
@@ -214,29 +207,29 @@ mlcm
It also provides static methods to perform calculation of individual metrics derived from a BinaryConfusionMatrix via the `ComparisonMetrics.calculate` functions:
-| Metric | Formula | API reference |
-| --- |--- | --- |
-|Sensitivity (TPR) | $TPR = \frac{TP}{TP+TN}$ | [ComparisonMetrics.calculateSensitivity ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateSensitivity ) |
-|Specificity (TNR) | $TNR = \frac{TN}{TN+TP}$ | [ComparisonMetrics.calculateSpecificity ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateSpecificity ) |
-|Precision (PPV) | $PPV = \frac{TP}{TP+FP}$ | [ComparisonMetrics.calculatePrecision ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePrecision ) |
-|NegativePredictiveValue (NPV) | $NPV = \frac{TN}{TN+FN}$ | [ComparisonMetrics.calculateNegativePredictiveValue](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateNegativePredictiveValue) |
-|Missrate (FNR) | $FNR = \frac{FN}{FN+TP}$ | [ComparisonMetrics.calculateMissrate ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateMissrate ) |
-|FallOut (FPR) | $FPR = \frac{FP}{FP+TN}$ | [ComparisonMetrics.calculateFallOut ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFallOut ) |
-|FalseDiscoveryRate (FDR) | $FDR = \frac{FP}{FP+TP}$ | [ComparisonMetrics.calculateFalseDiscoveryRate ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFalseDiscoveryRate ) |
-|FalseOmissionRate (FOR) | $FOR = \frac{FN}{FN+TN}$ | [ComparisonMetrics.calculateFalseOmissionRate ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFalseOmissionRate ) |
-|PositiveLikelihoodRatio (LR+) | $LR+ = \frac{TPR}{FPR}$ | [ComparisonMetrics.calculatePositiveLikelihoodRatio](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePositiveLikelihoodRatio) |
-|NegativeLikelihoodRatio (LR-) | $LR- = \frac{FNR}{TNR}$ | [ComparisonMetrics.calculateNegativeLikelihoodRatio](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateNegativeLikelihoodRatio) |
-|PrevalenceThreshold (PT) | $PT = \frac{\sqrt{FPR}}{\sqrt{TPR}+\sqrt{FPR}}$ | [ComparisonMetrics.calculatePrevalenceThreshold ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePrevalenceThreshold ) |
-|ThreatScore (TS) | $TS = \frac{TP}{TP+FN+FP}$ | [ComparisonMetrics.calculateThreatScore ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateThreatScore ) |
-|Prevalence | $Prevalence = \frac{P}{P+N}$ | [ComparisonMetrics.calculatePrevalence ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePrevalence ) |
-|Accuracy (ACC) | $ACC = \frac{TP+TN}{TP+TN+FP+FN}$ | [ComparisonMetrics.calculateAccuracy ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateAccuracy ) |
-|BalancedAccuracy (BA) | $BA = \frac{TPR+TNR}{2}$ | [ComparisonMetrics.calculateBalancedAccuracy ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateBalancedAccuracy ) |
-|F1 Score | $F1 = \frac{2TP}{2TP+FP+FN}$ | [ComparisonMetrics.calculateF1 ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateF1 ) |
-|PhiCoefficient (MCC) | $MCC = \frac{TP*TN-FP*FN}{\sqrt{(TP+FP)(TP+FN)(TN+FP)(TN+FN)}}$ | [ComparisonMetrics.calculatePhiCoefficient ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePhiCoefficient ) |
-|FowlkesMallowsIndex (FM) | $FM = \frac{}{}$ | [ComparisonMetrics.calculateFowlkesMallowsIndex ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFowlkesMallowsIndex ) |
-|Informedness (BM) | $BM = \frac{}{}$ | [ComparisonMetrics.calculateInformedness ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateInformedness ) |
-|Markedness (MK) | $MK = \frac{}{}$ | [ComparisonMetrics.calculateMarkedness ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateMarkedness ) |
-|DiagnosticOddsRatio (DOR) | $DOR = \frac{}{}$ | [ComparisonMetrics.calculateDiagnosticOddsRatio ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateDiagnosticOddsRatio ) |
+| Metric | Formula | API reference |
+| --- |--- | --- |
+|Sensitivity (**TPR**) | $TPR = \frac{TP}{TP+TN}$ | [ComparisonMetrics.calculateSensitivity ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateSensitivity ) |
+|Specificity (**TNR**) | $TNR = \frac{TN}{TN+TP}$ | [ComparisonMetrics.calculateSpecificity ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateSpecificity ) |
+|Precision (**PPV**) | $PPV = \frac{TP}{TP+FP}$ | [ComparisonMetrics.calculatePrecision ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePrecision ) |
+|NegativePredictiveValue (**NPV**) | $NPV = \frac{TN}{TN+FN}$ | [ComparisonMetrics.calculateNegativePredictiveValue](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateNegativePredictiveValue) |
+|Missrate (**FNR**) | $FNR = \frac{FN}{FN+TP}$ | [ComparisonMetrics.calculateMissrate ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateMissrate ) |
+|FallOut (**FPR**) | $FPR = \frac{FP}{FP+TN}$ | [ComparisonMetrics.calculateFallOut ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFallOut ) |
+|FalseDiscoveryRate (**FDR**) | $FDR = \frac{FP}{FP+TP}$ | [ComparisonMetrics.calculateFalseDiscoveryRate ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFalseDiscoveryRate ) |
+|FalseOmissionRate (**FOR**) | $FOR = \frac{FN}{FN+TN}$ | [ComparisonMetrics.calculateFalseOmissionRate ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFalseOmissionRate ) |
+|PositiveLikelihoodRatio (**LR+**) | $LR+ = \frac{TPR}{FPR}$ | [ComparisonMetrics.calculatePositiveLikelihoodRatio](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePositiveLikelihoodRatio) |
+|NegativeLikelihoodRatio (**LR-**) | $LR- = \frac{FNR}{TNR}$ | [ComparisonMetrics.calculateNegativeLikelihoodRatio](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateNegativeLikelihoodRatio) |
+|PrevalenceThreshold (**PT**) | $PT = \frac{\sqrt{FPR}}{\sqrt{TPR}+\sqrt{FPR}}$ | [ComparisonMetrics.calculatePrevalenceThreshold ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePrevalenceThreshold ) |
+|ThreatScore (**TS**) | $TS = \frac{TP}{TP+FN+FP}$ | [ComparisonMetrics.calculateThreatScore ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateThreatScore ) |
+|Prevalence | $Prevalence = \frac{P}{P+N}$ | [ComparisonMetrics.calculatePrevalence ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePrevalence ) |
+|Accuracy (**ACC**) | $ACC = \frac{TP+TN}{TP+TN+FP+FN}$ | [ComparisonMetrics.calculateAccuracy ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateAccuracy ) |
+|BalancedAccuracy (**BA**) | $BA = \frac{TPR+TNR}{2}$ | [ComparisonMetrics.calculateBalancedAccuracy ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateBalancedAccuracy ) |
+|F1 Score | $F1 = \frac{2TP}{2TP+FP+FN}$ | [ComparisonMetrics.calculateF1 ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateF1 ) |
+|PhiCoefficient (**MCC**) | $MCC = \frac{TP*TN-FP*FN}{\sqrt{(TP+FP)(TP+FN)(TN+FP)(TN+FN)}}$ | [ComparisonMetrics.calculatePhiCoefficient ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculatePhiCoefficient ) |
+|FowlkesMallowsIndex (**FM**) | $FM = \frac{}{}$ | [ComparisonMetrics.calculateFowlkesMallowsIndex ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateFowlkesMallowsIndex ) |
+|Informedness (**BM**) | $BM = \frac{}{}$ | [ComparisonMetrics.calculateInformedness ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateInformedness ) |
+|Markedness (**MK**) | $MK = \frac{}{}$ | [ComparisonMetrics.calculateMarkedness ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateMarkedness ) |
+|DiagnosticOddsRatio (**DOR**) | $DOR = \frac{}{}$ | [ComparisonMetrics.calculateDiagnosticOddsRatio ](/reference/fsharp-stats-testing-comparisonmetrics.html#calculateDiagnosticOddsRatio ) |
### ComparisonMetrics for binary comparisons
diff --git a/docs/Correlation.fsx b/docs/Correlation.fsx
index cbaddadd8..22442c855 100644
--- a/docs/Correlation.fsx
+++ b/docs/Correlation.fsx
@@ -10,11 +10,16 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
#r "nuget: FsMath, 0.0.1"
-open FsMath
+
Plotly.NET.Defaults.DefaultDisplayOptions <-
Plotly.NET.DisplayOptions.init (PlotlyJSReference = Plotly.NET.PlotlyJSReference.NoReference)
@@ -24,7 +29,7 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
#r "nuget: Plotly.NET, 4.0.0"
#r "nuget: Plotly.NET.Interactive, 4.0.0"
#r "nuget: FSharp.Stats"
-
+#r "nuget: FsMath, 0.0.1"
#endif // IPYNB
(**
@@ -36,16 +41,11 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
_Summary_: This tutorial demonstrates how to calculate correlation coefficients in FSharp.Stats
-### Table of contents
-
- - [Sequence correlations](#Sequence correlations)
- - [Matrix correlations](#Matrix correlations)
- - [Autocorrelation](#Autocorrelation)
-
## Sequence correlations
*)
open Plotly.NET
+open FsMath
open FSharp.Stats
open FSharp.Stats.Correlation
@@ -176,7 +176,7 @@ let table2 =
let proportion = int (255. * (value - min) / (max - min))
Color.fromARGB 1 (255 - proportion) 255 proportion
pearsonCorrelationMatrix
- |> Matrix.toJaggedArray
+ |> fun m -> m.toJaggedArray()
|> JaggedArray.map (mapColor -1. 1.)
|> JaggedArray.transpose
|> Array.map Color.fromColors
@@ -184,7 +184,7 @@ let table2 =
let values =
pearsonCorrelationMatrix
- |> Matrix.toJaggedArray
+ |> fun m -> m.toJaggedArray()
|> JaggedArray.map (sprintf "%.3f")
Chart.Table(["colindex 0";"colindex 1";"colindex 2";"colindex 3"],values,CellsFillColor=cellcolors)
diff --git a/docs/Covariance.fsx b/docs/Covariance.fsx
index 41b35d0d6..7d14e30c8 100644
--- a/docs/Covariance.fsx
+++ b/docs/Covariance.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -47,7 +51,7 @@ let error() = rnd.Next(11)
let sampleA = Vector.init 50 (fun x -> float x)
let sampleB = Vector.init 50 (fun x -> float (x + error()))
-let sampleBHigh = sampleB |> Vector.map (fun x -> 200. + x)
+let sampleBHigh = sampleB |> Array.map (fun x -> 200. + x)
let sampleC = Vector.init 50 (fun x -> 100. - float (x + 3 * error()))
let sampleD = Vector.init 50 (fun x -> 100. + float (10 * error()))
diff --git a/docs/CrossValidation.fsx b/docs/CrossValidation.fsx
index f78da9e08..c97a5e2b5 100644
--- a/docs/CrossValidation.fsx
+++ b/docs/CrossValidation.fsx
@@ -11,10 +11,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -35,15 +39,6 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
_Summary:_ this tutorial demonstrates how to perform several types of cross validation with FSharp.Stats.
-### Table of contents
-
- - [Leave-one-out cross validation (LOOCV)](#Leave-one-out-cross-validation-LOOCV)
- - [Procedure](#Procedure)
- - [Polynomial loocv](#Polynomial-loocv)
- - [Smoothing spline loocv](#Smoothing-spline-loocv)
- - [k fold cross validation](#k-fold-cross-validation)
- - [Shuffle and split cross validation](#Shuffle-and-split-cross-validation)
-
## Leave-one-out cross validation (LOOCV)
When fitting a data set it often comes down to the selection of the optimal fitting parameter(s).
@@ -71,8 +66,8 @@ let xV = vector [1. .. 10.]
let yV = vector [1.;20.;51.;40.;37.;6.;-10.;-5.;0.;10.]
// the fitting function fits a polynomial of order 'order' to the training data set (xTrain and yTrain) and applies it to xTest
-let getFitFuncPolynomial xTrain yTrain (xTest:RowVector) order =
- let xDat = xTrain |> Matrix.toVector
+let getFitFuncPolynomial (xTrain:Matrix) yTrain (xTest:Vector) order =
+ let xDat = xTrain.Data // |> Matrix.toVector
let coeffs = Polynomial.fit order xDat yTrain
let predictFunction = Polynomial.predict coeffs (xTest.[0])
predictFunction
@@ -118,8 +113,8 @@ let error (f1:float) f2 = pown (f1 - f2) 2
/// Leave-one-out cross validation. Returns the mean squared error of each leave-out at the
/// specific polynomial order. Minimize for model selection.
let loocvPolynomial (xData:Vector) (yData:Vector) order =
- let xDataMat = Matrix.ofVector xData
- let getFitFuncPol xTrain yTrain (xTest:RowVector) =
+ let xDataMat = Matrix.ofCols [|xData|]
+ let getFitFuncPol xTrain yTrain (xTest:Vector) =
getFitFuncPolynomial xTrain yTrain xTest order
let meanSquaredError = CrossValidation.loocv xDataMat yData getFitFuncPol error
@@ -163,11 +158,11 @@ let's first create some smoothing splines to cross validate:
*)
// the fitting function fits a smoothing spline with smoothing factor lambda to the training data set (xTrain and yTrain) and applies it to xTest
-let getFitFuncSpline xDat yDat (xDatTrain: RowVector) lambda =
- let xDatVec = xDat |> Matrix.toVector
+let getFitFuncSpline (xDat:Matrix) (yDat:Vector) (xDatTrain: Vector) lambda =
+ let xDatVec = xDat.Data // |> Matrix.toVector
let zippedData = Seq.zip xDatVec yDat |> Array.ofSeq
let xValTest = xDatTrain.[0]
- Spline.smoothingSpline zippedData (xDat |> Array.ofSeq) lambda xValTest
+ Spline.smoothingSpline zippedData (xDat.Data) lambda xValTest
/// in loocv the border points are chosen so that the support range of the training data set does not cover the test point.
/// if splines are used, that are not defined outside the border points use the following:
@@ -215,8 +210,8 @@ let errorSpl (f1:float) f2 =
/// Leave-one-out cross validation. Returns the mean squared error of each leave-out at the
/// specific regularization parameter (lambda). Minimize the (MSE) for model selection.
let loocvSmoothingSpline (xData:Vector) (yData:Vector) lambda =
- let xDataMat = Matrix.ofVector xData
- let getFitFuncSpl xDat yDat (xDatTrain: RowVector) =
+ let xDataMat = Matrix.ofCols [|xData|]
+ let getFitFuncSpl xDat yDat (xDatTrain: Vector) =
getFitFuncSpline xDat yDat xDatTrain lambda
CrossValidation.loocv xDataMat yData getFitFuncSpl errorSpl
@@ -265,9 +260,9 @@ The output contains the average error together with the standardDeviation comput
//repeated k fold cross validation for polynomials
let repeatedKFoldPolynomial k (xData: Vector) (yData: Vector) order =
- let xDataMat = xData |> Matrix.Generic.ofVector
+ let xDataMat = [|xData|] |> Matrix.ofCols
- let getFitFuncPol xTrain yTrain (xTest:RowVector) =
+ let getFitFuncPol xTrain yTrain (xTest:Vector) =
getFitFuncPolynomial xTrain yTrain xTest order
CrossValidation.repeatedKFold k 10 xDataMat yData getFitFuncPol error Seq.stDev
@@ -277,9 +272,9 @@ let kfPolynomial order = repeatedKFoldPolynomial 5 xV yV order
//repeated k fold cross validation for smoothing splines
let repeatedKFoldSpline k (xData: Vector) (yData: Vector) lambda =
- let xDataMat = xData |> Matrix.ofVector
+ let xDataMat = [|xData|] |> Matrix.ofCols
- let getFitFuncSpl xDat yDat (xDatTrain: RowVector) =
+ let getFitFuncSpl xDat yDat (xDatTrain: Vector) =
getFitFuncSpline xDat yDat xDatTrain lambda
CrossValidation.repeatedKFold k 10 xDataMat yData getFitFuncSpl errorSpl Seq.stDev
@@ -357,9 +352,9 @@ The output contains the average error together with the standardDeviation comput
*)
let shuffleAndSplitPolynomial p iterations (xData: Vector) (yData: Vector) order =
- let xDataMat = xData |> Matrix.ofVector
+ let xDataMat = [|xData|] |> Matrix.ofCols
- let getFitFuncPol xTrain yTrain (xTest:RowVector) =
+ let getFitFuncPol xTrain yTrain (xTest:Vector) =
getFitFuncPolynomial xTrain yTrain xTest order
CrossValidation.shuffelAndSplit p iterations xDataMat yData getFitFuncPol error Seq.stDev
@@ -368,9 +363,9 @@ let shuffleAndSplitPolynomial p iterations (xData: Vector) (yData: Vector
let sasPolynomial order = shuffleAndSplitPolynomial 0.2 5 xV yV order
let shuffleAndSplitSpline p iterations (xData: Vector) (yData: Vector) lambda =
- let xDataMat = xData |> Matrix.ofVector
+ let xDataMat = [|xData|] |> Matrix.ofCols
- let getFitFuncSpl xDat yDat (xDatTrain: RowVector) =
+ let getFitFuncSpl xDat yDat (xDatTrain: Vector) =
getFitFuncSpline xDat yDat xDatTrain lambda
CrossValidation.shuffelAndSplit p iterations xDataMat yData getFitFuncSpl errorSpl Seq.stDev
diff --git a/docs/Differentiation.fsx b/docs/Differentiation.fsx
index 25eda0fb4..6484e9a45 100644
--- a/docs/Differentiation.fsx
+++ b/docs/Differentiation.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
diff --git a/docs/Distributions.fsx b/docs/Distributions.fsx
index 5a631acd1..3f769378a 100644
--- a/docs/Distributions.fsx
+++ b/docs/Distributions.fsx
@@ -11,10 +11,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
-#r "FSharp.Stats.dll"
-#r "nuget: Plotly.NET, 4.0.0"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
#r "nuget: FsMath, 0.0.1"
+#r "nuget: Plotly.NET, 4.0.0"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
+#r "FSharp.Stats.dll"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -36,24 +40,6 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
_Summary:_ this tutorial shows how to use the various types of probability distributions in FSharp.Stats.
-### Table of contents
-
-- [Continuous](#Continuous)
- - [Normal distribution](#Normal-distribution)
- - [Multivariate normal distribution](#Multivariate-normal-distribution)
- - [F distribution](#F-distribution)
-- [Discrete](#Discrete)
- - [Bernoulli distribution](#Bernoulli-distribution)
- - [Binomial distribution](#Binomial-distribution)
- - [Multinomial distribution](#Multinomial-distribution)
- - [Hypergerometric distribution](#Hypergerometric-distribution)
- - [Poisson distribution](#Poisson-distribution)
- - [Gamma distribution](#Gamma-distribution)
- - [Negative binomial distribution](#Negative-binomial-distribution)
-- [Empirical](#Empirical)
-- [Density estimation](#Density-estimation)
-- [Distance](#Distance)
-
FSharp.Stats provides a wide range of probability distributions. Given the
distribution parameters they can be used to investigate their statistical properties
or to sample non-uniform random numbers.
@@ -725,7 +711,7 @@ _1 from town B_, and _3 from town C_? The individual success probabilities can b
let multiNomProb = vector [(3./30.); (7./30.); (20./30.)]
// the success combination that is of interest
-let multiNomKs = Vector.Generic.ofList [1; 1; 3]
+let multiNomKs = vector [1; 1; 3]
// gives the probability of obtaining exactly the pattern 1,1,3
let mNom = Discrete.Multinomial.PMF multiNomProb multiNomKs
@@ -745,7 +731,7 @@ multinomial distribution you have to give the corresponding anto-probability:
*)
let mNom_bin_A = (Discrete.Binomial.PMF 0.123 200 20)
-let mNom_bin_B = Discrete.Multinomial.PMF (vector [|0.123; 0.877|]) (Vector.Generic.ofArray [|20; 180|])
+let mNom_bin_B = Discrete.Multinomial.PMF ([|0.123; 0.877|]) ([|20; 180|])
mNom_bin_A //0.0556956956889893
mNom_bin_B //0.0556956956889898
diff --git a/docs/Fitting.fsx b/docs/Fitting.fsx
index 666aaf7e2..fcf5c6bb2 100644
--- a/docs/Fitting.fsx
+++ b/docs/Fitting.fsx
@@ -10,12 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#r "../src/FSharp.Stats/bin/Release/netstandard2.0/FSharp.Stats.dll"
-#r "nuget: Newtonsoft.JSON, 13.0.1"
-#r "nuget: DynamicObj, 2.0.0"
-#r "nuget: Giraffe.ViewEngine, 1.4.0"
-#r "nuget: Plotly.NET, 4.0.0"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
+#r "FSharp.Stats.dll"
+#r "nuget: Plotly.NET, 4.0.0"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -40,17 +42,6 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
_Summary:_ this tutorial will walk through several ways of fitting data with FSharp.Stats.
-### Table of contents
- - [Linear Regression](#Linear-Regression)
- - [Summary](#Summary)
- - [Simple Linear Regression](#Simple-Linear-Regression)
- - [Univariable](#Univariable)
- - [Multivariable](#Multivariable)
- - [Polynomial Regression](#Polynomial-Regression)
- - [Nonlinear Regression](#Nonlinear-Regression)
- - [LevenbergMarquardtConstrained](#LevenbergMarquardtConstrained)
- - [Smoothing spline](#Smoothing-spline)
-
## Linear Regression
In Linear Regression a linear system of equations is generated. The coefficients obtained by the solution to this equation
@@ -212,12 +203,13 @@ let xVectorMulti =
[5.; 4. ;18. ]
[6.; 3. ;22. ]
]
- |> Matrix.ofJaggedSeq
+ |> matrix
let yVectorMulti =
let transformX (x:Matrix) =
x
- |> Matrix.mapiRows (fun _ v -> 100. + (v.[0] * 2.5) + (v.[1] * 4.) + (v.[2] * 0.5))
+ |> Matrix.getRows
+ |> Array.map (fun v -> 100. + (v.[0] * 2.5) + (v.[1] * 4.) + (v.[2] * 0.5))
xVectorMulti
|> transformX
|> vector
@@ -258,7 +250,7 @@ let predictionFunctionPol x =
let orderP = 3
//define the weighting vector
-let weights = yDataP |> Vector.map (fun y -> 1. / y)
+let weights = yDataP |> Array.map (fun y -> 1. / y)
let coefficientsPolW =
OLS.Polynomial.fitWithWeighting orderP weights xDataP yDataP
let predictionFunctionPolW x =
diff --git a/docs/GeneralisedLinearModels.fsx b/docs/GeneralisedLinearModels.fsx
deleted file mode 100644
index b8955ceb5..000000000
--- a/docs/GeneralisedLinearModels.fsx
+++ /dev/null
@@ -1,312 +0,0 @@
-
-(**
----
-title: GLM Documentation
-index: 24
-category: Documentation
-categoryindex: 0
----
-*)
-
-(*** hide ***)
-
-
-(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
-#r "nuget: Deedle"
-#r "FSharp.Stats.dll"
-#r "nuget: Plotly.NET, 4.0.0"
-
-Plotly.NET.Defaults.DefaultDisplayOptions <-
- Plotly.NET.DisplayOptions.init (PlotlyJSReference = Plotly.NET.PlotlyJSReference.NoReference)
-
-(*** condition: ipynb ***)
-#if IPYNB
-#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: Plotly.NET.Interactive, 4.0.0"
-#r "nuget: FSharp.Stats"
-
-open Plotly.NET
-open FSharp.Stats
-open Deedle
-#endif // IPYNB
-
-
-(**
-# General linear models (GLMs)
-
-_Summary:_ This document provides an overview of fitting a Generalized Linear Model (GLM) using FSharp.Stats.
-
-General linear models (GLMs) are a broad class of statistical models that are used to analyze the relationship between a dependent variable and one or more independent variables. GLMs are a flexible framework that encompasses various statistical techniques, including ANOVA (Analysis of Variance).
-
-Like ANOVA, GLMs are used to examine the effects of different factors or variables on an outcome of interest. They allow us to determine if there are significant differences between groups or if there is a relationship between the independent variables and the dependent variable.
-
-GLMs extend the concept of ANOVA by allowing for more complex modeling scenarios. While ANOVA is primarily used for comparing the means of different groups, GLMs can handle a wider range of data types and relationships. For example, GLMs can handle continuous, categorical, and count data, as well as non-linear relationships between variables.
-
-GLMs also provide a flexible framework for incorporating multiple independent variables, interactions between variables, and controlling for confounding factors. This allows for more nuanced relationships and better understand the factors that influence the outcome variable.
-
-In terms of similarities, both ANOVA and GLMs involve partitioning the total variation in the data into different components. ANOVA partitions the variation into between-group and within-group components, while GLMs partition the variation into systematic (explained) and residual (unexplained) components. Both ANOVA and GLMs also calculate statistics (such as F-statistic in ANOVA and t-statistic in GLMs) to assess the significance of the relationships or differences.
-
-Overall, GLMs provide a more flexible and powerful framework for analyzing data compared to ANOVA. They allow for more complex modeling scenarios and can handle a wider range of data types. However, ANOVA remains a useful and widely used technique, particularly when comparing the means of multiple groups.
-
-In this notebook we will discuss how to design your GLMs and how to use them in F#.
-
-# Designing a GLM
-To design a General Linear Model (GLM), you need to consider the following components:
-
-1. Dependent Variable: This is the variable you want to predict or explain. It should be continuous or categorical.
-
-2. Independent Variables: These are the variables that you believe have an impact on the dependent variable. They can be continuous or categorical.
-
-3. Link Function: The link function relates the linear predictor to the expected value of the dependent variable. It transforms the linear combination of the independent variables into the appropriate scale for the dependent variable. The choice of link function depends on the distribution of the dependent variable.
-
-4. Distribution: The distribution of the dependent variable determines the appropriate probability distribution to model the data. The choice of distribution depends on the nature of the dependent variable (continuous, binary, count, etc.) and the assumptions about the data.
-
-The formula for a GLM is typically written as:
-
-```
-Y = β₀ + β₁X₁ + β₂X₂ + ... + βₚXₚ
-```
-This model is used in statistics to predict the outcome of a dependent variable (Y) based on the values of multiple independent variables (X₁, X₂, ..., Xₚ).
-
-Let's break down the equation:
-
-- `Y` is the dependent variable, also known as the response or outcome variable. This is what we're trying to predict or estimate.
-- `β₀` is the y-intercept of the model. It represents the predicted value of Y when all the independent variables (X's) are 0.
-- `β₁, β₂, ..., βₚ` are the coefficients of the independent variables (X₁, X₂, ..., Xₚ). These values quantify the impact of each corresponding independent variable on the dependent variable. For example, `β₁` is the change in the predicted value of Y for a one-unit change in X₁, assuming all other variables are held constant.
-- `X₁, X₂, ..., Xₚ` are the independent variables, also known as predictors or explanatory variables. These are the variables that we use to predict Y.
-
-In the context of programming, this equation could be implemented in a variety of ways depending on the language and libraries used. For instance, in Python, you might use the `statsmodels` or `scikit-learn` libraries to create a GLM, but in F# we can utilise `FSharp.Stats`.
-
-
-## Loading the Dataset
-First, let's read some data to learn how to utilize Generalized Linear Models (GLMs). Below is the code to read the cheeseDataset, which is sourced from David S. Moore and George P. McCabe's "Introduction to the Practice of Statistics" (1993), second edition, published by W. H. Freeman and Company, available on the [Statlib database](https://dasl.datadescription.com). It contains information on the taste and concentration of various chemical components in 30 matured cheddar cheeses from the LaTrobe Valley in Victoria, Australia. The final Taste score is an aggregate of the scores given by several tasters.
-*)
-
-open Deedle
-open Plotly.NET
-open FSharp.Stats
-
-let cheeseDataset :Frame=
- Frame.ReadCsv $"{__SOURCE_DIRECTORY__}/data/cheese.csv"
- |> Frame.indexRows "Column1"
-
-(***include-value:cheeseDataset***)
-
-(**
-## Creating Histograms
-
-Step two involves visualizing the data using histograms. Histograms are an effective way to understand the distribution and frequency of the data by dividing it into bins and displaying the count of data points in each bin. This visual representation can help identify patterns, trends, and potential outliers in the dataset
-*)
-
-let histograms =
- let histogramTaste =
- Chart.Histogram(cheeseDataset?Taste |> Series.values)
- |> Chart.withXAxisStyle("Taste")
- |> Chart.withYAxisStyle("Frequency")
- |> Chart.withTitle "Histogram of Taste"
- |> Chart.withTraceInfo("Taste")
- let histogramAcetic =
- Chart.Histogram(cheeseDataset?Acetic |> Series.values)
- |> Chart.withXAxisStyle("Acetic")
- |> Chart.withYAxisStyle("Frequency")
- |> Chart.withTitle "Histogram of Acetic"
- |> Chart.withTraceInfo("Acetic")
- let histogramH2S =
- Chart.Histogram(cheeseDataset?H2S |> Series.values)
- |> Chart.withXAxisStyle("H2S")
- |> Chart.withYAxisStyle("Frequency")
- |> Chart.withTitle "Histogram of H2S"
- |> Chart.withTraceInfo("H2S")
- let histogramLactic =
- Chart.Histogram(cheeseDataset?Lactic |> Series.values)
- |> Chart.withXAxisStyle("Lactic")
- |> Chart.withYAxisStyle("Frequency")
- |> Chart.withTitle "Histogram of Lactic"
- |> Chart.withTraceInfo("Lactic")
- Chart.Grid(2,2) [histogramTaste; histogramAcetic; histogramH2S; histogramLactic]
-
-(***include-value:histograms***)
-
-
-(**
-## Preparing Data for GLM
-Now we can try to predict the taste of a cheese by its Aciticity, its H2S content and its Lactic acid content: For this we utilise a GLM. To use this we need to get the dependent variable, the given taste from our dataframe, as a vector and the independent variables, Acetic, H2S and Lactic, into a Matrix.
-*)
-
-let dependentVector =
- cheeseDataset?Taste
- |> Series.values
- |> Vector.ofSeq
-
-let independentMatrix =
- cheeseDataset
- |> Frame.dropCol "Taste"
- |> Frame.toJaggedArray
- |> Matrix.ofJaggedArray
-
-(**
-To include the y-intercept (also known as the intercept term) in the GLM, we must add a column of ones to our matrix of independent variables. This column represents the constant term in the model and allows the estimation of the y-intercept when fitting the model.
-*)
-
-let updatedIndependentMatrix =
- independentMatrix
- |> Matrix.toJaggedArray
- |> Array.map (fun row -> Array.append [|1.0|] row)
- |> Matrix.ofJaggedArray
-
-(**
-## Fitting the GLM
-The next step we need to take is to determine which linker functions to use in our Model.
-Generalized Linear Models extend linear models to allow for response variables that have error distribution models other than a normal distribution. The choice of distribution family in a GLM depends on the nature of the response variable (dependent variable). Here is a summary of when to use each GLM distribution family:
-
-**Normal (Gaussian) Distribution**:
- - **Use when**: The response variable is continuous and normally distributed.
- - **Common applications**: Linear regression, ANOVA, ANCOVA.
- - **Examples**: Heights, weights, test scores.
-
-**Binomial Distribution**:
- - **Use when**: The response variable is binary (0 or 1) or proportion data.
- - **Common applications**: Logistic regression, probit regression.
- - **Examples**: Yes/No outcomes, success/failure data.
-
-**Poisson Distribution**:
- - **Use when**: The response variable represents count data, especially counts of rare events.
- - **Common applications**: Poisson regression.
- - **Examples**: Number of customer complaints, number of accidents.
-
-**Negative Binomial Distribution**:
- - **Use when**: The response variable is count data with overdispersion (variance greater than the mean).
- - **Common applications**: Negative binomial regression.
- - **Examples**: Number of insurance claims, number of hospital visits.
-
-**Gamma Distribution**:
- - **Use when**: The response variable is continuous and positive, often for skewed distributions.
- - **Common applications**: Gamma regression.
- - **Examples**: Insurance claims costs, time until an event occurs.
-
-**Inverse Gaussian Distribution**:
- - **Use when**: The response variable is continuous and positive, and particularly when the data has a long right tail.
- - **Common applications**: Inverse Gaussian regression.
- - **Examples**: Reaction times, survival times.
-
-
-**Multinomial Distribution**:
- - **Use when**: The response variable represents categorical data with more than two categories.
- - **Common applications**: Multinomial logistic regression.
- - **Examples**: Survey responses with multiple choices, type of disease diagnosis.
-
-Each distribution family has a corresponding link function that relates the linear predictor to the mean of the distribution. The choice of link function can also be tailored to better fit the specific characteristics of the data. Common link functions include the identity link, log link, logit link, and inverse link, among others.
-
-Understanding the characteristics of your data and the nature of the response variable is crucial in selecting the appropriate distribution family for a GLM.
-*)
-
-// Matrix of independent variables
-let A = updatedIndependentMatrix
-
-// Vector of dependent variable
-let b = dependentVector
-
-// Maximum number of iterations
-let maxIter = 100
-
-// Distribution family of the dependent variable
-let distributionFamily = FSharp.Stats.Fitting.GLM.GlmDistributionFamily.Poisson
-
-// Tolerance for the convergence of the algorithm, usually 1e-11 or 1e-6
-let mTol = 1e-6
-
-// Fit the model
-let glm =
- FSharp.Stats.Fitting.GLM.SolveGLM.solveQR A b maxIter distributionFamily mTol
-
-glm
-(***include-value:glm***)
-
-(**
-## Getting GLM Predictions
-
-The results of the GLM are in the GLMReturn format, containing the coefficient vector *mX* and the mean response vector *mu*. The coefficients in the *mx* vector are in the same order as the matrix of independent variables we gave the model. In our case this order is:
-1. intercept term
-2. Acetic
-3. H2S
-4. Lactic
-
-This means we can build a predictor funtion using the result of the GLM that can predict Taste based on Acetic, H2S and Lactic.
-Lets turn the predictions into a Map for easy access. For this we use the 'GLMParameterStatistics' for easy acess for each parameter of the predictions.
-Using this map we can also access the zScore and Pearson scores of each of the predictors, which tell us how important they are to explain our model.
-*)
-
-let glmPredictions =
- FSharp.Stats.Fitting.GLM.GLMStatistics.getGLMParameterStatistics A b glm ["Intercept"; "Acetic"; "H2S"; "Lactic"]
- |> Map.ofSeq
-
-(***include-value:glmPredictions***)
-
-
-(**
-## Cheese Taste Predictor Function
-
-This function returned a map of the name of the value we assigned to it and their coefficient, standard error, z score and pvalue.
-
-### Coefficient
-The estimated effect size of the predictor variable. It indicates the expected change in the dependent variable for a one-unit change in the predictor variable, holding all other variables constant.
-
-### Standard Error
-Measures the accuracy of the coefficient's estimate. It is the standard deviation of the sampling distribution of the coefficient. A smaller standard error indicates a more precise estimate.
-
-### Z Score
-Calculated as the coefficient divided by its standard error. It tests the null hypothesis that the coefficient is zero. A larger absolute value indicates stronger evidence against the null hypothesis.
-
-### p-value
-Indicates the probability of observing a test statistic as extreme as the observed value under the null hypothesis. A smaller p-value suggests stronger evidence against the null hypothesis. Typically, a p-value less than 0.05 is considered statistically significant.
-
-Lets use these values to create a function to predict the taste based of the coefficients.
-
-*)
-
-/// Predicts the taste of cheese based on the given input variables.
-///
-/// Parameters:
-/// acetic - The acetic acid level in the cheese.
-/// h2s - The hydrogen sulfide level in the cheese.
-/// lactic - The lactic acid level in the cheese.
-///
-/// Returns:
-/// The predicted taste of the cheese.
-let cheeseTastePredictor acetic h2s lactic =
- // Extract the intercept term from the GLM coefficients
- let intercept = glmPredictions.Item "Intercept" |> fun x -> x.Coefficient
-
- // Extract the coefficient for the acetic acid predictor from the GLM coefficients
- let aceticCoefficient = glmPredictions.Item "Acetic" |> fun x -> x.Coefficient
-
- // Extract the coefficient for the hydrogen sulfide (H2S) predictor from the GLM coefficients
- let H2SCoefficient = glmPredictions.Item "H2S" |> fun x -> x.Coefficient
-
- // Extract the coefficient for the lactic acid predictor from the GLM coefficients
- let LacticCoefficient = glmPredictions.Item "Lactic" |> fun x -> x.Coefficient
-
- // Calculate and return the predicted cheese taste
- // The prediction is the sum of the intercept and the products of each coefficient with its corresponding predictor value
- intercept + aceticCoefficient * acetic + H2SCoefficient * h2s + LacticCoefficient * lactic
-
-(**
-## Getting GLM Model Statistics
-
-Lastly, let's examine how well our model fits the data overall. For this, we use the 'GLMModelStatistics', which provide key metrics such as LogLikelihood, Deviance, and PearsonChi2.
-
-### LogLikelihood
-LogLikelihood measures the goodness of fit of the model. It is the logarithm of the likelihood function, which evaluates how likely it is that the observed data would occur under the model parameters. Higher values indicate a better fit of the model to the data.
-
-### Deviance
-Deviance is a measure of the discrepancy between the observed data and the values predicted by the model. It compares the likelihood of the model to the likelihood of a perfect model that predicts the data exactly. Lower deviance indicates a better fit.
-
-### Pearson Chi-Square (PearsonChi2)
-Pearson Chi-Square is another measure of goodness of fit. It assesses how well the observed data match the expected data predicted by the model. Lower values suggest a better fit. It is particularly useful for identifying overdispersion or underdispersion in the model.
-
-These statistics together give us a comprehensive view of the model's performance and its ability to explain the variability in the data.
-*)
-
-let glmStats = FSharp.Stats.Fitting.GLM.GLMStatistics.getGLMStatisticsModel b glm distributionFamily
-(***include-value:glmStats***)
diff --git a/docs/GoodnessOfFit.fsx b/docs/GoodnessOfFit.fsx
index 307e2e733..2668cbdf9 100644
--- a/docs/GoodnessOfFit.fsx
+++ b/docs/GoodnessOfFit.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -39,13 +43,6 @@ open Plotly.NET
_Summary:_ this tutorial shows how to assess fit quality with FSharp.Stats
-### Table of contents
-
- - [Linear regression report](#Linear-regression-report)
- - [Confidence bands](#Confidence-bands)
- - [Prediction bands](#Prediction-bands)
- - [Cook's distance](#Cook-s-distance)
-
## Linear regression report
Consider this simple linear regression:
@@ -199,12 +196,11 @@ let fitValues = xData |> Seq.map (fun xi -> xi,(predictionFunction xi))
///calculate confidence band errors for every x value
let confidence =
xData
- |> Vector.map (calculateConfidenceBandError xData yData 0.95)
+ |> Array.map (calculateConfidenceBandError xData yData 0.95)
///lower and upper bounds of the 95% confidence band sorted according to x values
let (lower,upper) =
xData
- |> Vector.toArray
|> Array.mapi (fun i xi -> (predictionFunction xi) - confidence.[i],(predictionFunction xi) + confidence.[i])
|> Array.unzip
@@ -244,12 +240,11 @@ let newXValues =
///calculate confidence band errors for every x value
let newConfidence =
newXValues
- |> Vector.map (calculateConfidenceBandError xData yData 0.95)
+ |> Array.map (calculateConfidenceBandError xData yData 0.95)
///lower and upper bounds of the 95% confidence band sorted according to x values
let (newLower,newUpper) =
newXValues
- |> Vector.toArray
|> Array.mapi (fun i xi -> (predictionFunction xi) - newConfidence.[i],(predictionFunction xi) + newConfidence.[i])
|> Array.unzip
@@ -283,12 +278,11 @@ let predictionXValues = vector [|1. .. 0.5 .. 15.|]
///calculate preditcion band errors for every x value
let prediction =
predictionXValues
- |> Vector.map (calculatePredictionBandError xData yData 0.95)
+ |> Array.map (calculatePredictionBandError xData yData 0.95)
///lower and upper bounds of the 95% prediction band sorted according to x values
let (pLower,pUpper) =
predictionXValues
- |> Vector.toArray
|> Array.mapi (fun i xi -> (predictionFunction xi) - prediction.[i],(predictionFunction xi) + prediction.[i])
|> Array.unzip
diff --git a/docs/GrowthCurve.fsx b/docs/GrowthCurve.fsx
index 520fa5dfe..e5ad0513a 100644
--- a/docs/GrowthCurve.fsx
+++ b/docs/GrowthCurve.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -37,25 +41,6 @@ open Plotly.NET
_Summary:_ this tutorial demonstrates variou way to model growth curves, a commong task in any (micro)biological lab
-### Table of contents
-
- - [Modelling](#Modelling)
- - [Manual phase selection](#Manual-phase-selection)
- - [Gompertz model](#Gompertz-model)
- - [Generation time calculation](#Generation-time-calculation)
- - [Other models](#Other-models)
- - [Richards curve](#Richards-curve)
- - [Weibull](#Weibull)
- - [Janoschek](#Janoschek)
- - [Exponential](#Exponential)
- - [Verhulst](#Verhulst)
- - [Morgan-Mercer-Flodin](#Morgan-Mercer-Flodin)
- - [von Bertalanffy](#von-Bertalanffy)
- - [Comparison between all models](Comparison-between-all-models)
- - [Fit function](#Fit-function)
- - [Generation time](#Generation-time)
- - [Model examples](#Model-examples)
-
## Modelling
Growth and other physiological parameters like size/weight/length can be modeled as function of time.
@@ -280,10 +265,10 @@ The four parameter Gompertz model allows the determination of generation times f
*)
-let generationtime (parametervector:vector) (logTransform:float -> float) =
+let generationtime (parametervector:Vector) (logTransform:float -> float) =
logTransform 2. * Math.E / (parametervector.[1] * parametervector.[2])
-let lag (parametervector:vector) =
+let lag (parametervector:Vector) =
(parametervector.[3] - 1.) / parametervector.[1]
let g = sprintf "The generation time (Gompertz) is: %.1f min" (60. * (generationtime gompertzParams log))
@@ -351,7 +336,7 @@ let fittingFunctionRichards =
(**Here is a pre-evaluated version (to save time during the build process, as the solver takes quite some time.)*)
-let generationtimeRichards (richardParameters:vector) =
+let generationtimeRichards (richardParameters:Vector) =
let l = richardParameters.[0]
let k = richardParameters.[1]
let y = richardParameters.[2] //x value of inflection point
@@ -434,7 +419,7 @@ let fittingFunctionWeibull =
(**Here is a pre-evaluated version (to save time during the build process, as the solver takes quite some time.)*)
-let generationtimeWeibull (weibullParameters:vector) =
+let generationtimeWeibull (weibullParameters:Vector) =
let b = weibullParameters.[0]
let l = weibullParameters.[1]
let k = weibullParameters.[2]
@@ -520,7 +505,7 @@ let fittingFunctionJanoschek =
(**Here is a pre-evaluated version (to save time during the build process, as the solver takes quite some time.)*)
-let generationtimeJanoschek (janoschekParameters:vector) =
+let generationtimeJanoschek (janoschekParameters:Vector) =
let b = janoschekParameters.[0]
let l = janoschekParameters.[1]
let k = janoschekParameters.[2]
@@ -608,7 +593,7 @@ let fittingFunctionExponential =
(**Here is a pre-evaluated version (to save time during the build process, as the solver takes quite some time.)*)
-let generationtimeExponential (expParameters:vector) =
+let generationtimeExponential (expParameters:Vector) =
let b = expParameters.[0]
let l = expParameters.[1]
let k = expParameters.[2]
@@ -699,7 +684,7 @@ let fittingFunctionVerhulst() =
(**Here is a pre-evaluated version (to save time during the build process, as the solver takes quite some time.)*)
-let generationtimeVerhulst (verhulstParameters:vector) =
+let generationtimeVerhulst (verhulstParameters:Vector) =
let lmax = verhulstParameters.[0]
let k = verhulstParameters.[1]
let d = verhulstParameters.[2]
@@ -783,7 +768,7 @@ let fittingFunctionMMF() =
(**Here is a pre-evaluated version (to save time during the build process, as the solver takes quite some time.)*)
-let generationtimeMmf (mmfParameters:vector) =
+let generationtimeMmf (mmfParameters:Vector) =
let b = mmfParameters.[0]
let l = mmfParameters.[1]
let k = mmfParameters.[2]
diff --git a/docs/Imputation.fsx b/docs/Imputation.fsx
index 92b646902..528f065e8 100644
--- a/docs/Imputation.fsx
+++ b/docs/Imputation.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
diff --git a/docs/Integration.fsx b/docs/Integration.fsx
index 2e201f0f0..78523e311 100644
--- a/docs/Integration.fsx
+++ b/docs/Integration.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
diff --git a/docs/Interpolation.fsx b/docs/Interpolation.fsx
index ddc4eb368..54bedc997 100644
--- a/docs/Interpolation.fsx
+++ b/docs/Interpolation.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -37,17 +41,6 @@ open Plotly.NET
_Summary:_ This tutorial demonstrates several ways of interpolating with FSharp.Stats
-### Table of contents
-
-- [Summary](#Summary)
-- [Polynomial interpolation](#Polynomial-interpolation)
-- [Cubic interpolating spline](#Cubic-spline-interpolation)
-- [Akima interpolating subspline](#Akima-subspline-interpolation)
-- [Hermite interpolation](#Hermite-interpolation)
-- [Bezier interpolation](#Bezier-interpolation)
-- [Chebyshev function approximation](#Chebyshev-function-approximation)
-
-
## Summary
With the `FSharp.Stats.Interpolation` module you can apply various interpolation methods. While interpolating functions always go through the input points (knots), methods to predict function values
@@ -395,7 +388,7 @@ let bezierInterpolation =
let c2 = vector [|6.5;-1.5|] //control point 2
let c3 = vector [|13.5;4.|] //control point 3
let p1 = vector [|10.;5.|] //point 1 that should be traversed
- let toPoint (v : vector) = v[0],v[1]
+ let toPoint (v : Vector) = v[0],v[1]
let interpolate = Bezier.interpolate [|p0;c0;c1;c2;c3;p1|] >> toPoint
[
@@ -431,7 +424,7 @@ let bezierInterpolation3d =
let c0 = vector [|1.5;2.1;2.|] //control point 0
let c1 = vector [|5.8;1.6;1.4|] //control point 1
let p1 = vector [|3.;2.;0.|] //point 1 that should be traversed
- let to3Dpoint (v : vector) = v[0],v[1],v[2]
+ let to3Dpoint (v : Vector) = v[0],v[1],v[2]
let interpolate = Bezier.interpolate [|p0;c0;c1;p1|] >> to3Dpoint
[
@@ -542,7 +535,7 @@ let xs_cheby =
// to get the corresponding y values to the xs_cheby a linear spline is generated that approximates the new y values
let ys_cheby =
let ls = Interpolation.LinearSpline.interpolate xs ys
- xs_cheby |> Vector.map (Interpolation.LinearSpline.predict ls)
+ xs_cheby |> Array.map (Interpolation.LinearSpline.predict ls)
// again polynomial interpolation coefficients are determined, but here with the x and y data that correspond to the chebyshev spacing
let coeffs_cheby = Interpolation.Polynomial.interpolate xs_cheby ys_cheby
diff --git a/docs/Intervals.fsx b/docs/Intervals.fsx
index 13e90c87f..51c2b1ca5 100644
--- a/docs/Intervals.fsx
+++ b/docs/Intervals.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -50,7 +54,7 @@ The interval module enables working with closed intervals. A closed interval inc
open FSharp.Stats
open Plotly.NET
-let myInterval = Interval.CreateLeftOpen (-3.,2.)
+let myInterval = Interval.CreateLeftOpen (-3.,2.)
let loi = sprintf "myInterval is: %s" (myInterval.ToString())
diff --git a/docs/Normalization.fsx b/docs/Normalization.fsx
index 3bd51bfd4..4a2229d1d 100644
--- a/docs/Normalization.fsx
+++ b/docs/Normalization.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -43,11 +47,6 @@ open Plotly.NET.LayoutObjects
_Summary:_ this tutorial demonstrates multiple ways of data normalization accross several samples
-### Table of contents
- - [Introduction](#Introduction)
- - [Median of Ratios](#Median-of-ratios)
- - [Quantile normalization](#Quantile-normalization)
-
## Introduction
When you want to compare e.g. intensity measurements of elements between samples, you often have to normalize the samples in order
@@ -163,8 +162,9 @@ let rawData =
// visualization of the raw data
let rawDataChart =
- rawData.Transpose
- |> Matrix.toJaggedArray
+ rawData
+ |> Matrix.transpose
+ |> (fun m -> m.toJaggedArray())
|> Array.mapi (fun sampleID sample ->
let sampleIntensities =
sample
@@ -221,8 +221,9 @@ corrFactors
// visualization of the normed data
let normedDataChart =
- morNormedData.Transpose
- |> Matrix.toJaggedArray
+ morNormedData
+ |> Matrix.transpose
+ |> (fun m -> m.toJaggedArray())
|> Array.mapi (fun sampleID sample ->
let sampleIntensities =
sample
@@ -315,8 +316,9 @@ let quantileNorm =
// visualization of the normed data
let normedDataQuantileChart =
- quantileNorm.Transpose
- |> Matrix.toJaggedArray
+ quantileNorm
+ |> Matrix.transpose
+ |> (fun m -> m.toJaggedArray())
|> Array.mapi (fun sampleID sample ->
let sampleIntensities =
sample
diff --git a/docs/NuGet.config b/docs/NuGet.config
deleted file mode 100644
index cf1ace51a..000000000
--- a/docs/NuGet.config
+++ /dev/null
@@ -1,14 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/Optimization.fsx b/docs/Optimization.fsx
index c175a8f07..4d6589a29 100644
--- a/docs/Optimization.fsx
+++ b/docs/Optimization.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -39,13 +43,8 @@ open Plotly.NET
_Summary:_ This tutorial teaches how to use optimization methods within FSharp.Stats
-### Table of contents
-
- - [Nelder-Mead](#Nelder-Mead)
-
## Nelder-Mead
-
The Nelder-Mead method (also downhill simplex method) can be used to find the minimum or maximum of an objective function.
Please check out Mathias' blog post about the [nelder mead algorithm](https://brandewinder.com/2022/03/31/breaking-down-Nelder-Mead/).
@@ -66,7 +65,7 @@ open Plotly.NET
open Plotly.NET.TraceObjects
-let myFunction (xs: vector) =
+let myFunction (xs: Vector) =
let x = xs.[0]
x**2. + 0.32*x + 0.13
@@ -122,7 +121,7 @@ Lets define the function, and a starting coordinate for the optimization task.
// Rosenbrock's valley or Rosenbrock's banana function
-let rosenbrock (xs: vector) =
+let rosenbrock (xs: Vector) =
let x, y = xs.[0], xs.[1]
pown (1.0 - x) 2 + 100.0 * pown (y - pown x 2) 2
@@ -233,7 +232,7 @@ The Auckley function has many valleys, with one center and global minimum at $(0
// Auckley function
-let auckley (xs: vector) =
+let auckley (xs: Vector) =
let x, y = xs.[0], xs.[1]
-20.*exp(-0.2*sqrt(0.5*(x**2. + y**2))) -
exp(0.5*(cos(2. * Math.PI * x) + cos(2. * Math.PI * y))) +
@@ -300,7 +299,7 @@ The Nelder-Mead method is able to identiy a local minimum, but misses the global
// Beale function function
-let beale (xs: vector) =
+let beale (xs: Vector) =
let x, y = xs.[0], xs.[1]
(1.5 - x + x*y)**2. +
(2.25 - x + x*y**2)**2. +
diff --git a/docs/Quantiles.fsx b/docs/Quantiles.fsx
index 9e2102e77..a175be6d1 100644
--- a/docs/Quantiles.fsx
+++ b/docs/Quantiles.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -37,16 +41,6 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
_Summary:_ this tutorial demonstrates how to handle quantiles and QQ-Plots
-### Table of contents
-
- - [Quantiles](#Quantiles)
- - [QQ plot](#QQ-plot)
- - [Comparing two sample distributions](#Comparing-two-sample-distributions)
- - [Comparing a sample against a distribution](#Comparing-a-sample-against-a-distribution)
- - [Normal distribution](#Normal-distribution)
- - [Uniform Distribution](#Uniform-Distribution)
-- [Quantile normalization](#Quantile-normalization)
-
## Quantiles
Quantiles are values that divide data into equally spaced groups. Percentiles are just quantiles that divide the data in 100 equally sized groups.
diff --git a/docs/Rank.fsx b/docs/Rank.fsx
index 7b7af6534..63ee06197 100644
--- a/docs/Rank.fsx
+++ b/docs/Rank.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
diff --git a/docs/Signal.fsx b/docs/Signal.fsx
index cdf566238..ce3e7f2d5 100644
--- a/docs/Signal.fsx
+++ b/docs/Signal.fsx
@@ -10,10 +10,14 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -43,16 +47,6 @@ open Plotly.NET.LayoutObjects
_Summary:_ this tutorial demonstrates multiple ways of signal processing with FSharp.Stats.
-### Table of contents
- - [Outliers](#Outliers)
- - [Tukey's fences](#Tukey-s-fences)
- - [Filtering](#Filtering)
- - [Padding](#Padding)
- - [Wavelet](#Wavelet)
- - [Continuous Wavelet](#Continuous-Wavelet)
- - [Continuous Wavelet 3D](#Continuous-Wavelet-3D)
- - [Fast Fourier transform](#Fast-Fourier-transform)
-
## Outliers
### Tukey's fences
diff --git a/docs/Testing.fsx b/docs/Testing.fsx
index ba709a690..2e33df7d0 100644
--- a/docs/Testing.fsx
+++ b/docs/Testing.fsx
@@ -10,12 +10,15 @@ categoryindex: 0
(*** hide ***)
(*** condition: prepare ***)
-#I "../src/FSharp.Stats/bin/Release/netstandard2.0/"
+#r "nuget: FSharpAux.Core, 2.0.0"
+#r "nuget: FSharpAux, 2.0.0"
+#r "nuget: FSharpAux.IO, 2.0.0"
+#r "nuget: OptimizedPriorityQueue, 5.1.0"
+#r "nuget: FsMath, 0.0.1"
+#I "../src/FSharp.Stats/bin/Release/.net8.0/"
#r "FSharp.Stats.dll"
#r "nuget: Plotly.NET, 4.0.0"
-#r "nuget: FSharpAux, 1.1.0"
#r "nuget: Deedle, 3.0.0"
-#r "nuget: FsMath, 0.0.1"
open FsMath
Plotly.NET.Defaults.DefaultDisplayOptions <-
@@ -26,7 +29,6 @@ Plotly.NET.Defaults.DefaultDisplayOptions <-
#r "nuget: Plotly.NET, 4.0.0"
#r "nuget: Plotly.NET.Interactive, 4.0.0"
#r "nuget: FSharp.Stats"
-#r "nuget: FSharpAux, 1.1.0"
#r "nuget: Deedle, 3.0.0"
open Plotly.NET
@@ -47,28 +49,6 @@ open Deedle
_Summary:_ this tutorial explains how to perform various statistical tests with FSharp.Stats.
-### Table of contents
-
- - [Test Statistics](#Test-Statistics)
- - [T-Test](#T-Test)
- - [Anova](#Anova)
- - [F-Test](#F-Test)
- - [H-Test](#H-Test)
- - [Friedman-Test](#Friedman-Test)
- - [Wilcoxon signed-rank Test](#Wilcoxon-Test)
- - [Chi-Squared Test](#Chi-Squared-Test)
- - [Bartlett](#Bartlett)
- - [PostHoc](#PostHoc)
- - [Fisher's LSD](#Fisher-s-LSD)
- - [Hays](#Hays)
- - [Tukey HSD](#Tukey-HSD)
- - [Dunnetts test](#Dunnetts-test)
- - [Fisher Hotelling](#Fisher-Hotelling)
-- [Multiple testing](#Multiple-testing)
- - [Benjamini-Hochberg](#Benjamini-Hochberg)
- - [Q Value](#Q-Value)
- - [SAM](#SAM)
-
FSharp.Stats provides hypothesis tests for different applications.
A hypothesis test is a statistical test that is used to determine whether there is enough evidence
in a sample of data to infer that a certain condition is true for the entire population.
diff --git a/docs/_head.html b/docs/_head.html
new file mode 100644
index 000000000..b4c501468
--- /dev/null
+++ b/docs/_head.html
@@ -0,0 +1,25 @@
+
+{{fsdocs-page-title}}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/_template.html b/docs/_template.html
deleted file mode 100644
index 9fd0e8331..000000000
--- a/docs/_template.html
+++ /dev/null
@@ -1,83 +0,0 @@
-
-
-
-
-
- {{fsdocs-page-title}}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {{fsdocs-watch-script}}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/global.json b/global.json
index f15a95928..501e79a87 100644
--- a/global.json
+++ b/global.json
@@ -1,6 +1,6 @@
{
"sdk": {
- "version": "9.0.100",
- "rollForward": "latestMinor"
+ "version": "8.0.100",
+ "rollForward": "latestFeature"
}
}
\ No newline at end of file
diff --git a/src/FSharp.Stats/Array.fs b/src/FSharp.Stats/Array.fs
index ebaed80db..4221def69 100644
--- a/src/FSharp.Stats/Array.fs
+++ b/src/FSharp.Stats/Array.fs
@@ -645,7 +645,7 @@ module ArrayExtension =
///
/// start value (is included)
/// end value (by default is included )
- /// sets the number of elements in the array. If not set, stepsize = 1.
+ /// sets the number of elements in the array. If not set, stepsize = 1.
/// If false, the array does not contain the stop value
static member linspace(start:float,stop:float,num:int,?IncludeEndpoint:bool) : float [] =
@@ -658,7 +658,7 @@ module ArrayExtension =
///
/// start value (is included)
/// end value (by default is included)
- /// sets the number of elements in the array. Defaults to 50.
+ /// sets the number of elements in the array. Defaults to 50.
/// If false, the array does not contain the stop value. Defaults to true.
static member geomspace(start:float,stop:float,num:int,?IncludeEndpoint:bool) : float array =
let includeEndpoint = defaultArg IncludeEndpoint true
diff --git a/src/FSharp.Stats/Correlation.fs b/src/FSharp.Stats/Correlation.fs
index 805000d38..5182b213c 100644
--- a/src/FSharp.Stats/Correlation.fs
+++ b/src/FSharp.Stats/Correlation.fs
@@ -298,19 +298,19 @@ module Correlation =
///
/// Tau A - Make no adjustments for ties
///
- /// The first array of observations.
- /// The second array of observations.
+ /// The first array of observations.
+ /// The second array of observations.
/// Number of concordant minues the number of discordant pairs.
/// n(n-1)/2 or (n choose 2), where n is the number of observations.
- /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
- /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
+ /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
+ /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
/// The Kendall tau A statistic.
let tauA _x _y pq n0 _n1 _n2 = pq / float n0
///
/// Tau B - Adjust for ties. tau_b = pq / sqrt((n0 - n1)(n0 - n2))
///
- /// The first array of observations.
- /// The second array of observations.
+ /// The first array of observations.
+ /// The second array of observations.
/// Number of concordant minues the number of discordant pairs.
/// n(n-1)/2 or (n choose 2), where n is the number of observations.
/// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
@@ -325,9 +325,9 @@ module Correlation =
/// The first array of observations.
/// The second array of observations.
/// Number of concordant minues the number of discordant pairs.
- /// n(n-1)/2 or (n choose 2), where n is the number of observations.
- /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
- /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
+ /// n(n-1)/2 or (n choose 2), where n is the number of observations.
+ /// sum_i(t_i(t_i-1)/2) where t_is is t_i he number of pairs of observations with the same x value.
+ /// sum_i(u_i(u_i-1)/2) where u_is is u_i he number of pairs of observations with the same y value.
/// The Kendall tau C statistic.
let tauC (x : _[]) y pq _n0 _n1 _n2 =
let n = x.Length
diff --git a/src/FSharp.Stats/DistanceMetrics.fs b/src/FSharp.Stats/DistanceMetrics.fs
index aa2f0ff20..5c173c529 100644
--- a/src/FSharp.Stats/DistanceMetrics.fs
+++ b/src/FSharp.Stats/DistanceMetrics.fs
@@ -124,8 +124,8 @@ module DistanceMetrics =
/// The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two vectors of order `p`.
/// The two vectors need not have equal lengths: when one vectors is exhausted any remaining elements in the other vectors are ignored.
- /// first vector
- /// second vector
+ /// first vector
+ /// second vector
/// float constrained to `p > 0`
/// Minkowski distance between elements of given vectors. Returns NaN if vectors contain NaN.
///
@@ -163,8 +163,8 @@ module DistanceMetrics =
/// The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two vectors (ignores NaN) of order `p`.
/// Non-regular differences between the sequences are ignored.
/// The two vectors need not have equal lengths: when one vectors is exhausted any remaining elements in the other vectors are ignored.
- /// first vector
- /// second vector
+ /// first vector
+ /// second vector
/// float constrained to `p > 0`
/// Minkowski distance between elements of given vectors.
///
@@ -319,8 +319,8 @@ module DistanceMetrics =
/// The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two arrays of order `p`.
/// The two arrays need not have equal lengths: when one array is exhausted any remaining elements in the other array are ignored.
- /// first array
- /// second array
+ /// first array
+ /// second array
/// float constrained to `p > 0`
/// Minkowski distance between elements of given arrays. Returns NaN if arrays contain NaN.
///
@@ -358,8 +358,8 @@ module DistanceMetrics =
/// The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two arrays (ignores NaN) of order `p`.
/// Non-regular differences between the sequences are ignored.
/// The two arrays need not have equal lengths: when one array is exhausted any remaining elements in the other array are ignored.
- /// first array
- /// second array
+ /// first array
+ /// second array
/// float constrained to `p > 0`
/// Minkowski distance between elements of given arrays.
///
diff --git a/src/FSharp.Stats/Distributions/Continuous/ChiSquared.fs b/src/FSharp.Stats/Distributions/Continuous/ChiSquared.fs
index 393c70c5d..44aa14ac4 100644
--- a/src/FSharp.Stats/Distributions/Continuous/ChiSquared.fs
+++ b/src/FSharp.Stats/Distributions/Continuous/ChiSquared.fs
@@ -67,7 +67,7 @@ type ChiSquared =
ChiSquared.CheckParam dof
sqrt (dof * 2.)
- /// Degrees of freedom (must be positive).
+ /// Degrees of freedom (must be positive).
/// A sample from χ²(dof), interpreted as Gamma(dof/2, 2).
static member SampleUnchecked (dof: float) : float =
let alpha = dof / 2.0
diff --git a/src/FSharp.Stats/Distributions/Discrete/Binomial.fs b/src/FSharp.Stats/Distributions/Discrete/Binomial.fs
index b4a292d08..a4f654c40 100644
--- a/src/FSharp.Stats/Distributions/Discrete/Binomial.fs
+++ b/src/FSharp.Stats/Distributions/Discrete/Binomial.fs
@@ -82,7 +82,6 @@ type Binomial =
/// Produces a random sample using the current random number generator (from GetSampleGenerator()). No parameter checking!
///
- ///
///
///
///
diff --git a/src/FSharp.Stats/Distributions/Discrete/Hypergeometric.fs b/src/FSharp.Stats/Distributions/Discrete/Hypergeometric.fs
index e71974105..c64319715 100644
--- a/src/FSharp.Stats/Distributions/Discrete/Hypergeometric.fs
+++ b/src/FSharp.Stats/Distributions/Discrete/Hypergeometric.fs
@@ -100,7 +100,6 @@ type Hypergeometric =
/// Produces a random sample using the current random number generator (from GetSampleGenerator()). No parameter checking!
///
- ///
///
///
///
@@ -253,11 +252,17 @@ type Hypergeometric =
/// Initializes a hypergeometric distribution.
///
- /// The hypergeometric distribution is a discrete probability distribution that describes the probability of `k` successes (random draws for which the object drawn has a specified feature) in `n` draws, without replacement, from a finite population of size `N` that contains exactly `K` objects with that feature, wherein each draw is either a success (`1.0`) or a failure (`0.0`). The population size The number of success states in the population The number of draws
+ ///
+ /// The hypergeometric distribution is a discrete probability distribution
+ /// that describes the probability of `k` successes (random draws for which the object
+ /// drawn has a specified feature) in `n` draws, without replacement, from a finite
+ /// population of size `N` that contains exactly `K` objects with that feature,
+ /// wherein each draw is either a success (`1.0`) or a failure (`0.0`).
+ ///
+ /// The population size
+ /// The number of success states in the population
+ /// The number of draws
///
- ///
- ///
- ///
///
///
///
diff --git a/src/FSharp.Stats/Distributions/Discrete/NegativeBinomial.fs b/src/FSharp.Stats/Distributions/Discrete/NegativeBinomial.fs
index 44d1f0a3e..429f6217a 100644
--- a/src/FSharp.Stats/Distributions/Discrete/NegativeBinomial.fs
+++ b/src/FSharp.Stats/Distributions/Discrete/NegativeBinomial.fs
@@ -1,4 +1,4 @@
-namespace FSharp.Stats.Distributions.Discrete
+Fkspace FSharp.Stats.Distributions.Discrete
open System
open FSharp.Stats
@@ -154,10 +154,13 @@ type NegativeBinomial_trials =
sprintf "NegativeBinomial_trials(r = %i, p = %f)" r p
/// Initializes a negative binomial distribution.
- /// The negative binomial distribution is a discrete probability distribution that models the number of trials needed x to get the rth success in repeated independent Bernoulli trials with probability p.
The number of success states The probability of each independent bernoulli trial The number of trials until the rth success
+ /// The negative binomial distribution is a discrete probability distribution
+ /// that models the number of trials needed x to get the rth success in repeated
+ /// independent Bernoulli trials with probability p.
+ ///
+ /// The number of success states
+ /// The probability of each independent bernoulli trial
///
- ///
- ///
///
///
///
diff --git a/src/FSharp.Stats/Distributions/Discrete/Poisson.fs b/src/FSharp.Stats/Distributions/Discrete/Poisson.fs
index 7ffd9d580..82c64a1cc 100644
--- a/src/FSharp.Stats/Distributions/Discrete/Poisson.fs
+++ b/src/FSharp.Stats/Distributions/Discrete/Poisson.fs
@@ -86,7 +86,6 @@ type Poisson =
/// Produces a random sample using the current random number generator (from GetSampleGenerator()). No parameter checking!
///
- ///
///
///
///
diff --git a/src/FSharp.Stats/Distributions/Distance.fs b/src/FSharp.Stats/Distributions/Distance.fs
index acc97b092..1d1c14f10 100644
--- a/src/FSharp.Stats/Distributions/Distance.fs
+++ b/src/FSharp.Stats/Distributions/Distance.fs
@@ -201,13 +201,13 @@ module Distance =
///
///
///
- ///
+ ///
///
///
///
///
///
- let cdfDistanceWeighted p (xs : float []) (ys : float []) xWeights yWeights =
+ let cdfDistanceWeighted p (xs : float []) (ys : float []) xWeights yWeights =
if xs.Length = 0 then failwith "Given distribtuion xs is empty"
if ys.Length = 0 then failwith "Given distribtuion ys is empty"
diff --git a/src/FSharp.Stats/Distributions/Empirical.fs b/src/FSharp.Stats/Distributions/Empirical.fs
index a37c6af6f..15f40b64e 100644
--- a/src/FSharp.Stats/Distributions/Empirical.fs
+++ b/src/FSharp.Stats/Distributions/Empirical.fs
@@ -327,8 +327,8 @@ module Empirical =
/// Merges two maps into a single map. If a key exists in both maps, the value is determined by f with the first value being from mapA and the second originating from mapB.
/// Is the binwidth equal for both distributions? For nominal data set to true.
/// Function to transform values if key is present in both histograms. `histA-value → histB-value → newValue`
- /// Empirical distribution A
- /// Empirical distribution B
+ /// Empirical distribution A
+ /// Empirical distribution B
/// When applied to continuous data the bandwidths must be equal!
/// This function is not commutative! (mergeBy f a b) is not equal to (mergeBy f b a)
/// New frequency map that results from merged maps mapA and mapB. Values from keys that are present in both maps are handled by f
diff --git a/src/FSharp.Stats/Distributions/Frequency.fs b/src/FSharp.Stats/Distributions/Frequency.fs
index 942c63939..086e81e68 100644
--- a/src/FSharp.Stats/Distributions/Frequency.fs
+++ b/src/FSharp.Stats/Distributions/Frequency.fs
@@ -141,8 +141,8 @@ module Frequency =
/// Merges two histograms into a single histogram. If a key exists in both maps, the value is determined by f with the first value being from mapA and the second originating from mapB.
/// Is the binwidth equal for both frequencies? For nominal data set to true.
/// Function to transform values if key is present in both histograms. `mapA-value → mapB-value → newValue`
- /// Frequency map A
- /// Frequency map B
+ /// Frequency map A
+ /// Frequency map B
/// When applied to continuous data the bandwidths must be equal!
/// This function is not commutative! (mergeBy f a b) is not equal to (mergeBy f b a)
/// New frequency map that results from merged maps mapA and mapB. Values from keys that are present in both maps are handled by f
diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj
index bc175d2b9..c7dc2ee1f 100644
--- a/src/FSharp.Stats/FSharp.Stats.fsproj
+++ b/src/FSharp.Stats/FSharp.Stats.fsproj
@@ -40,6 +40,7 @@
+
diff --git a/src/FSharp.Stats/Fitting/CrossValidation.fs b/src/FSharp.Stats/Fitting/CrossValidation.fs
index 6e724ac76..b7c6a59a9 100644
--- a/src/FSharp.Stats/Fitting/CrossValidation.fs
+++ b/src/FSharp.Stats/Fitting/CrossValidation.fs
@@ -19,6 +19,7 @@ module CrossValidation =
/// Computes sum of squared residuals (SSR)
///
///
+ ///
///
///
///
@@ -31,6 +32,7 @@ module CrossValidation =
/// Computes root mean square error (RMSE)
///
///
+ ///
///
///
///
@@ -50,7 +52,13 @@ module CrossValidation =
/// Computes a repeated k fold cross-validation, k: training set size (and number of iterations), iterations: number of random subset creation, xData: rowwise x-coordinate matrix, yData: yData vector fit: x and y data lead to function that maps a xData row vector to a y-coordinate, error: defines the error of the fitted y-coordinate and the actual y-coordinate, getStDev: function that calculates the standard deviation from a seq<^T>. (Seq.stDev)
///
- ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
///
///
///
@@ -113,7 +121,10 @@ module CrossValidation =
/// Computes a leave one out cross-validation xData: rowwise x-coordinate matrix, yData: yData vector fit: x and y data lead to function that maps an xData row vector to a y-coordinate, error: defines the error of the fitted y-coordinate and the actual y-coordinate
///
- ///
+ ///
+ ///
+ ///
+ ///
///
///
///
@@ -141,13 +152,18 @@ module CrossValidation =
/// Computes a repeated shuffel-and-split cross validation p: percentage of training set size from original size, iterations: number of random subset creation, xData: rowwise x-coordinate matrix, yData: yData vector fit: x and y data lead to function that maps a xData row vector to a y-coordinate, error: defines the error of the fitted y-coordinate and the actual y-coordinate, getStDev: function that calculates the standard deviation from a seq<^T>. (Seq.stDev)
///
- ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
///
///
///
///
///
- let inline shuffelAndSplit
+ let inline shuffleAndSplit
p (iterations: int) (xData:Matrix< ^T >) (yData:Vector< ^T >)
(fit: Matrix< ^T > -> Vector< ^T > -> Vector< ^T > -> ^T)
(error: ^T -> ^T -> ^T)
diff --git a/src/FSharp.Stats/Fitting/LinearRegression.fs b/src/FSharp.Stats/Fitting/LinearRegression.fs
index 0d89702c5..27be4c5cc 100644
--- a/src/FSharp.Stats/Fitting/LinearRegression.fs
+++ b/src/FSharp.Stats/Fitting/LinearRegression.fs
@@ -161,7 +161,7 @@ module LinearRegression =
///
/// let mySlope = 17.8
///
- /// // get the f�tting function that fits through the origin
+ /// // get the fitting function that fits through the origin
/// let myF =
/// LinearRegression.OLS.Linear.RTO.predictFunc mySlope
///
@@ -451,7 +451,7 @@ module LinearRegression =
/// Takes linear coefficients and x vector to predict the corresponding y value.
///
/// Coefficients from linear regression.
- /// x vector for which the y value should be predicted
+ /// x vector for which the y value should be predicted
/// predicted y value with given coefficients at X=x
///
///
@@ -620,7 +620,6 @@ module LinearRegression =
///
/// Takes polynomial coefficients and x value to predict the corresponding y value.
///
- /// order of the polynomial (1 = linear, 2 = quadratic, ... )
/// vector of polynomial coefficients (e.g. determined by Polynomial.coefficients), sorted as [intercept;constant;quadratic;...]
/// x value of which the corresponding y value should be predicted
/// predicted y value with given polynomial coefficients at X=x
@@ -858,14 +857,12 @@ module LinearRegression =
///
/// Defines if regression function should pass any specific point.
///
-/// float*float coordinate
type Constraint<'a> =
/// No constraints are given.
| Unconstrained
/// The regression line must go through the origin (0,0)
| RegressionThroughOrigin
/// The regression line must go through a specified point, defined as float*float tuple ('xCorrdinate*'yCoordinate)
- /// float*float coordinate
| RegressionThroughXY of 'a
///
@@ -1022,7 +1019,7 @@ type LinearRegression() =
/// Creates prediction function for linear regression.
///
/// Linear regression coefficients (e.g. from LinearRegression.fit())
- /// x value of which the corresponding y value should be predicted
+ /// x value of which the corresponding y value should be predicted
/// Prediction function that takes an x value and predicts its corresponding y value.
///
///
@@ -1046,7 +1043,7 @@ type LinearRegression() =
/// Creates prediction function for multivariate linear regression.
///
/// Multivariate linear regression coefficients (e.g. from LinearRegression.fit())
- /// x value of which the corresponding y value should be predicted
+ /// x value of which the corresponding y value should be predicted
/// Prediction function that takes an x vector and predicts its corresponding y value.
///
///
diff --git a/src/FSharp.Stats/Fitting/LogisticRegression.fs b/src/FSharp.Stats/Fitting/LogisticRegression.fs
index 873c1a639..95bf061fc 100644
--- a/src/FSharp.Stats/Fitting/LogisticRegression.fs
+++ b/src/FSharp.Stats/Fitting/LogisticRegression.fs
@@ -127,6 +127,7 @@ module LogisticRegression =
/// Returns the regression function
///
///
+ ///
///
///
///
@@ -204,6 +205,7 @@ module LogisticRegression =
/// Returns the regression function
///
///
+ ///
///
///
///
diff --git a/src/FSharp.Stats/Fitting/NonLinearRegression.fs b/src/FSharp.Stats/Fitting/NonLinearRegression.fs
index 54c5234ca..68197e269 100644
--- a/src/FSharp.Stats/Fitting/NonLinearRegression.fs
+++ b/src/FSharp.Stats/Fitting/NonLinearRegression.fs
@@ -376,6 +376,8 @@ module NonLinearRegression =
/// Returns an estimate for an initial parameter for the linear least square estimator for a given dataset (xData, yData). The initial estimation is intended for a logistic function. The returned parameters are the max y value, the steepness of the curve and the x value in the middle of the slope.
///
///
+ ///
+ ///
///
///
///
@@ -535,6 +537,7 @@ module NonLinearRegression =
/// Takes the result of the linearization as initialGuessParams
///
///
+ ///
///
///
///
diff --git a/src/FSharp.Stats/Fitting/Spline.fs b/src/FSharp.Stats/Fitting/Spline.fs
index 80eaa661c..d26ff3fca 100644
--- a/src/FSharp.Stats/Fitting/Spline.fs
+++ b/src/FSharp.Stats/Fitting/Spline.fs
@@ -9,7 +9,6 @@ module Spline =
/// Some preprocessing of the input data
///
- ///
///
///
///
@@ -32,6 +31,7 @@ module Spline =
/// Creates a smoothing spline through some data. Takes as spline points the x-values given by basispts. The resulting function takes lambda (regularization parameter) and a x_Value as input.
///
///
+ ///
///
///
///
diff --git a/src/FSharp.Stats/Interpolation.fs b/src/FSharp.Stats/Interpolation.fs
index dfbe59bbe..29f58812b 100644
--- a/src/FSharp.Stats/Interpolation.fs
+++ b/src/FSharp.Stats/Interpolation.fs
@@ -31,7 +31,6 @@ module Interpolation =
///
/// takes x value to predict the corresponding interpolating y value
///
- /// x value of which the corresponding y value should be predicted
/// predicted y value with given polynomial coefficients at X=x
///
///
@@ -355,7 +354,6 @@ module Interpolation =
///
/// Predicts the y value at point x. A straight line is fitted between the neighboring x values given.
///
- /// X value at which the corresponding y value should be predicted
/// Y value corresponding to the given x value.
///
///
@@ -598,7 +596,6 @@ module Interpolation =
///
/// Predicts the y value at point x. A straight line is fitted between the neighboring x values given.
///
- /// X value at which the corresponding y value should be predicted
/// Y value corresponding to the given x value.
///
///
@@ -772,7 +769,6 @@ module Interpolation =
///
/// Returns function that takes x value and predicts the corresponding interpolating y value.
///
- /// X value of which the y value should be predicted.
/// Function that takes an x value and returns function value.
///
///
@@ -1015,7 +1011,7 @@ module Interpolation =
///
/// Returns integral from interpolating function from x=xVal1 to x=xVal2.
///
- /// Interpolation functions coefficients.
+ /// Integration function.
/// X value from where the integral should be calculated.
/// X value up to which the integral should be calculated.
/// Integral (area under the curve) from x=xVal1 to x=xVal2
@@ -1061,7 +1057,6 @@ module Interpolation =
///
/// Returns function that takes x value (that lies within the range of input x values) and predicts the corresponding interpolating y value.
///
- /// X value of which the y value should be predicted.
/// Function that takes an x value and returns function value.
///
///
@@ -1106,7 +1101,6 @@ module Interpolation =
///
/// Returns function that takes x value and predicts the corresponding interpolating y value.
///
- /// X value of which the y value should be predicted.
/// Function that takes an x value and returns function value.
///
///
@@ -1767,7 +1761,6 @@ module Interpolation =
///
/// Returns function that takes x value and predicts the corresponding interpolating y value.
///
- /// X value of which the y value should be predicted.
/// Function that takes an x value and returns function value.
///
///
@@ -1944,6 +1937,7 @@ module Interpolation =
/// If the knots are monotone in/decreasing, the spline also is monotone (CJC Kruger method)
/// The x data has to be sorted ascending
///
+ /// x values
/// function value at x values
/// Coefficients that define the interpolating function.
///
@@ -2003,7 +1997,7 @@ module Interpolation =
///
/// Returns function that takes x value and predicts the corresponding interpolating y value.
///
- /// Interpolation functions coefficients.
+ /// Interpolation functions coefficients.
/// X value of which the y value should be predicted.
/// Function that takes an x value and returns function value.
///
@@ -2366,7 +2360,6 @@ type InterpolationMethod =
///
/// Creates a spline as piecewise cubic polynomials with continuous first and second derivative at the knots.
///
- /// One of four conditions to manipulate the curvatures at the outer knots.
| CubicSpline of CubicSpline.BoundaryCondition
///
/// Creates a subspline as piecewise cubic polynomials with continuous first derivative but DIScontinuous second derivative at the knots.
@@ -2375,7 +2368,6 @@ type InterpolationMethod =
///
/// Creates a spline as piecewise cubic polynomials with given slope.
///
- /// choose between cSpline, given slopes or monotonicity when appropriate
| HermiteSpline of HermiteMethod
diff --git a/src/FSharp.Stats/List.fs b/src/FSharp.Stats/List.fs
index 88c4c02cb..43911e177 100644
--- a/src/FSharp.Stats/List.fs
+++ b/src/FSharp.Stats/List.fs
@@ -39,9 +39,9 @@ module List =
///
let inline median (xs: 'T list) =
let one = LanguagePrimitives.GenericOne<'T>
+
/// Partition list into three piles; less-than, equal and greater-than x: Current pivot xs: Sublist to partition cont: Continuation function
///
- ///
///
///
///
@@ -70,9 +70,9 @@ module List =
// place item in greater-than pile
partition x ys (fun lts n1 eqs n2 gts n3 ->
cont lts n1 eqs n2 (y::gts) (n3+1))
+
/// Partition input and recurse into the part than contains the median before: Number of elements before this sublist. xs: Current sublist. after: Number of elements after this sublist.
///
- ///
///
///
///
@@ -228,7 +228,7 @@ module ListExtension =
///
/// start value (is included)
/// end value (by default is included )
- /// sets the number of elements in the list. If not set, stepsize = 1.
+ /// sets the number of elements in the list. If not set, stepsize = 1.
/// If false, the list does not contain the stop value
static member linspace(start:float,stop:float,num:int,?IncludeEndpoint:bool) : float list =
@@ -241,7 +241,7 @@ module ListExtension =
///
/// start value (is included)
/// end value (by default is included)
- /// sets the number of elements in the list. Defaults to 50.
+ /// sets the number of elements in the list. Defaults to 50.
/// If false, the list does not contain the stop value. Defaults to true.
static member geomspace(start:float,stop:float,num:int,?IncludeEndpoint:bool) : float list =
let includeEndpoint = defaultArg IncludeEndpoint true
diff --git a/src/FSharp.Stats/Ops.fs b/src/FSharp.Stats/Ops.fs
index c7ba5b903..4a0516712 100644
--- a/src/FSharp.Stats/Ops.fs
+++ b/src/FSharp.Stats/Ops.fs
@@ -59,7 +59,7 @@ module Ops =
/// Returs true if x is infinity (generics)
///
- ///
+ ///
///
///
///
@@ -71,7 +71,7 @@ module Ops =
/// Returs true if x is positive infinity (generics)
///
- ///
+ ///
///
///
///
@@ -83,7 +83,7 @@ module Ops =
/// Returs true if x is positive infinity (generics)
///
- ///
+ ///
///
///
///
diff --git a/src/FSharp.Stats/Quantile.fs b/src/FSharp.Stats/Quantile.fs
index f1a0bfc07..87abe0e86 100644
--- a/src/FSharp.Stats/Quantile.fs
+++ b/src/FSharp.Stats/Quantile.fs
@@ -542,7 +542,7 @@ module Quantile =
/// Computes percentiles percentiles: Each percentile must be between 0.0 and 1.0 (inclusive) CalcMethod should be ofSorted array
///
- ///
+ ///
///
///
///
diff --git a/src/FSharp.Stats/Rank.fs b/src/FSharp.Stats/Rank.fs
index faae6fbb0..97cbf13ae 100644
--- a/src/FSharp.Stats/Rank.fs
+++ b/src/FSharp.Stats/Rank.fs
@@ -4,7 +4,6 @@ module Rank =
/// Comparer that sorts nan at the end of a collection
///
- ///
///
///
///
@@ -25,7 +24,6 @@ module Rank =
/// Comparer that sorts nan at the start of a collection
///
- ///
///
///
///
@@ -36,7 +34,6 @@ module Rank =
/// Ranks each entry of the given unsorted data array. Use 'breakTies function to break ties
///
- ///
///
///
///
diff --git a/src/FSharp.Stats/Signal/Normalization.fs b/src/FSharp.Stats/Signal/Normalization.fs
index de46e6568..c01ff54fa 100644
--- a/src/FSharp.Stats/Signal/Normalization.fs
+++ b/src/FSharp.Stats/Signal/Normalization.fs
@@ -17,10 +17,10 @@ module Normalization =
///
///
/// Bortz J., Schuster C., Statistik für Human- und Sozialwissenschaftler, 7 (2010), p. 35
- let zScoreTransformPopulation (yVal:Vector) : Vector =
- let yMean = Seq.mean yVal
- let std = Seq.stDevPopulation yVal
- yVal |> Array.map (fun x -> (x - yMean) / std)
+ let zScoreTransformPopulation (yData:Vector) : Vector =
+ let yMean = Seq.mean yData
+ let std = Seq.stDevPopulation yData
+ yData |> Array.map (fun x -> (x - yMean) / std)
///
/// z score normalization/transformation using the sample standard deviation. Rarely used since variance is not equal to 1.
@@ -34,10 +34,10 @@ module Normalization =
///
///
/// Bortz J., Schuster C., Statistik für Human- und Sozialwissenschaftler, 7 (2010), p. 35
- let zScoreTransform (yVal:Vector) : Vector =
- let yMean = Seq.mean yVal
- let std = Seq.stDev yVal
- yVal |> Array.map (fun x -> (x - yMean) / std)
+ let zScoreTransform (yData:Vector) : Vector =
+ let yMean = Seq.mean yData
+ let std = Seq.stDev yData
+ yData |> Array.map (fun x -> (x - yMean) / std)
/// Summary of the median of ratios (mor) normalization with normed data, determined correctionfactors, and transformation function.
type MorResult = {
@@ -46,122 +46,135 @@ module Normalization =
NormFunction : Matrix -> Matrix
} with static member Create cf nd f = {CorrFactors=cf;NormedData=nd;NormFunction=f}
- /////
- ///// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
- ///// Rows are genes, columns are samples
- /////
- ///// The transformation function is applied on all values of the matrix before calculating the normalization factors.
- ///// data matrix with columns as features (samples,time points) and rows as measured entities (genes,proteins).
- ///// Normalized data matrix with correction factors and normalization function.
- /////
- /////
- ///// // raw data with proteins as rows and samples as columns
- ///// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
- ///// let normedData = Normalization.medianOfRatiosBy (fun x -> ln (x+1)) myData
- /////
- /////
- //let medianOfRatiosBy (f: float -> float) (data: Matrix) =
- // let sampleWiseCorrectionFactors =
- // data
- // |> Matrix.mapiRows (fun _ v ->
- // let v = Array.map f v
- // let geometricMean = Seq.meanGeometric v
- // Array.map (fun s -> s / geometricMean) v
- // )
- // |> Matrix.mapiCols (fun _ (v:Vector) -> Seq.median v)
- // let normData m =
- // m
- // |> Matrix.mapi (fun r c v ->
- // v / sampleWiseCorrectionFactors.[c]
- // )
- // MorResult.Create sampleWiseCorrectionFactors (normData data) normData
+ ///
+ /// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
+ /// Rows are genes, columns are samples
+ ///
+ /// The transformation function is applied on all values of the matrix before calculating the normalization factors.
+ /// data matrix with columns as features (samples,time points) and rows as measured entities (genes,proteins).
+ /// Normalized data matrix with correction factors and normalization function.
+ ///
+ ///
+ /// // raw data with proteins as rows and samples as columns
+ /// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
+ /// let normedData = Normalization.medianOfRatiosBy (fun x -> ln (x+1)) myData
+ ///
+ ///
+ let medianOfRatiosBy (f: float -> float) (data: Matrix) =
+ let sampleWiseCorrectionFactors =
+ data
+ |> Matrix.mapiRows (fun _ v ->
+ let v = Array.map f v
+ let geometricMean = Seq.meanGeometric v
+ Array.map (fun s -> s / geometricMean) v
+ )
+ |> Matrix.getCols
+ |> Array.map (fun (v:Vector) -> Vector.median v)
+
+ let normData m =
+ m
+ |> Matrix.mapi (fun r c v ->
+ v / sampleWiseCorrectionFactors.[c]
+ )
+ MorResult.Create sampleWiseCorrectionFactors (normData data) normData
- /////
- ///// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
- ///// Rows are genes, columns are samples
- /////
- ///// data matrix with columns as features (samples,time points) and rows as measured entities (genes,proteins).
- ///// Normalized data matrix with correction factors and normalization function.
- /////
- /////
- ///// // raw data with proteins as rows and samples as columns
- ///// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
- ///// let normedData = Normalization.medianOfRatios myData
- /////
- /////
- //let medianOfRatios (data:Matrix) =
- // medianOfRatiosBy id data
+ ///
+ /// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
+ /// Rows are genes, columns are samples
+ ///
+ /// data matrix with columns as features (samples,time points) and rows as measured entities (genes,proteins).
+ /// Normalized data matrix with correction factors and normalization function.
+ ///
+ ///
+ /// // raw data with proteins as rows and samples as columns
+ /// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
+ /// let normedData = Normalization.medianOfRatios myData
+ ///
+ ///
+ let medianOfRatios (data:Matrix) =
+ medianOfRatiosBy id data
- /////
- ///// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
- ///// Columns are genes, rows are samples
- /////
- ///// The transformation function is applied on all values of the matrix before calculating the normalization factors.
- ///// data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).
- ///// Normalized data matrix with correction factors and normalization function.
- /////
- /////
- ///// // raw data with proteins as columns and samples as rows
- ///// let myData = Matrix.init 5 500 (fun _ _ -> rnd.NextDouble())
- ///// let normedData = Normalization.medianOfRatiosWideBy (fun x -> ln (x+1)) myData
- /////
- /////
- //let medianOfRatiosWideBy (f: float -> float) (data:Matrix) =
- // let sampleWiseCorrectionFactors =
- // data
- // |> Matrix.mapiCols (fun _ v ->
- // let v = Vector.map f v
- // let geometricMean = Seq.meanGeometric v
- // Vector.map (fun s -> s / geometricMean) v
- // )
- // |> Matrix.ofCols
- // |> Matrix.mapiRows (fun _ v -> Seq.median v)
- // let normData m =
- // m
- // |> Matrix.mapi (fun r c v ->
- // v / sampleWiseCorrectionFactors.[r]
- // )
- // MorResult.Create sampleWiseCorrectionFactors (normData data) normData
+ ///
+ /// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
+ /// Columns are genes, rows are samples
+ ///
+ /// The transformation function is applied on all values of the matrix before calculating the normalization factors.
+ /// data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).
+ /// Normalized data matrix with correction factors and normalization function.
+ ///
+ ///
+ /// // raw data with proteins as columns and samples as rows
+ /// let myData = Matrix.init 5 500 (fun _ _ -> rnd.NextDouble())
+ /// let normedData = Normalization.medianOfRatiosWideBy (fun x -> ln (x+1)) myData
+ ///
+ ///
+ let medianOfRatiosWideBy (f: float -> float) (data:Matrix) =
+ let sampleWiseCorrectionFactors =
+ data
+ |> Matrix.mapiCols (fun _ v ->
+ let v = Array.map f v
+ let geometricMean = Seq.meanGeometric v
+ Array.map (fun s -> s / geometricMean) v
+ )
+ |> Matrix.getCols
+ |> Array.map (fun v -> Vector.median v)
+ let normData m =
+ m
+ |> Matrix.mapi (fun r c v ->
+ v / sampleWiseCorrectionFactors.[r]
+ )
+ MorResult.Create sampleWiseCorrectionFactors (normData data) normData
- /////
- ///// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
- ///// Columns are genes, rows are samples
- /////
- ///// data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).
- ///// Normalized data matrix with correction factors and normalization function.
- /////
- /////
- ///// // raw data with proteins as columns and samples as rows
- ///// let myData = Matrix.init 5 500 (fun _ _ -> rnd.NextDouble())
- ///// let normedData = Normalization.medianOfRatiosWide myData
- /////
- /////
- //let medianOfRatiosWide (data:Matrix) =
- // medianOfRatiosWideBy id data
+ ///
+ /// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
+ /// Columns are genes, rows are samples
+ ///
+ /// data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).
+ /// Normalized data matrix with correction factors and normalization function.
+ ///
+ ///
+ /// // raw data with proteins as columns and samples as rows
+ /// let myData = Matrix.init 5 500 (fun _ _ -> rnd.NextDouble())
+ /// let normedData = Normalization.medianOfRatiosWide myData
+ ///
+ ///
+ let medianOfRatiosWide (data:Matrix) =
+ medianOfRatiosWideBy id data
- /////
- ///// Quantile normalization with equal number of elements (rows) for each sample (column).
- ///// Column mean and column standard deviation are qual after normalization.
- ///// Rows are genes, columns are samples.
- /////
- ///// data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).
- ///// Normalized data matrix.
- /////
- /////
- ///// // raw data with proteins as rows and samples as columns
- ///// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
- ///// let normedData = Normalization.quantile myData
- /////
- /////
- //let quantile (data:Matrix) =
- // data
- // |> Matrix.mapCols (Seq.indexed >> Seq.sortBy snd)
- // |> Matrix.Generic.ofColSeq
- // |> Matrix.Generic.mapRows (fun row ->
- // let avg = Seq.meanBy snd row
- // row |> RowVector.Generic.map (fun (i,_) -> i,avg)
- // )
- // |> Matrix.Generic.ofSeq
- // |> Matrix.Generic.mapCols (Seq.sortBy fst >> Seq.map snd >> vector)
- // |> Matrix.ofCols
+ ///
+ /// Quantile normalization with equal number of elements (rows) for each sample (column).
+ /// Column mean and column standard deviation are qual after normalization.
+ /// Rows are genes, columns are samples.
+ ///
+ /// data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).
+ /// Normalized data matrix.
+ ///
+ ///
+ /// // raw data with proteins as rows and samples as columns
+ /// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
+ /// let normedData = Normalization.quantile myData
+ ///
+ ///
+ let quantile (data:Matrix) =
+ data
+ |> Matrix.getCols
+ |> Array.map (fun v ->
+ v |> Seq.indexed |> Seq.sortBy snd |> Array.ofSeq
+ )
+ |> JaggedArray.transpose
+ |> Array.map (fun row ->
+
+ let avg = Seq.meanBy snd row
+ row |> Array.map (fun (i,_) -> i,avg)
+ )
+ |> JaggedArray.transpose
+ |> Array.map (Seq.sortBy fst >> Seq.map snd >> Vector.ofSeq)
+ |> Matrix.ofCols
+ //|> Matrix.Generic.mapRows (fun row ->
+ // let avg = Seq.meanBy snd row
+ // row |> RowVector.Generic.map (fun (i,_) -> i,avg)
+ // )
+ //|> Matrix.Generic.ofSeq
+ //|> Matrix.Generic.mapCols (Seq.sortBy fst >> Seq.map snd >> vector)
+ //|> Matrix.ofCols
diff --git a/src/FSharp.Stats/Signal/Padding.fs b/src/FSharp.Stats/Signal/Padding.fs
index 0be333ca0..5ff4f6c16 100644
--- a/src/FSharp.Stats/Signal/Padding.fs
+++ b/src/FSharp.Stats/Signal/Padding.fs
@@ -30,6 +30,7 @@ module Padding =
/// median spacing of the data points
///
///
+ ///
///
///
///
diff --git a/src/FSharp.Stats/Signal/PeakDetection.fs b/src/FSharp.Stats/Signal/PeakDetection.fs
index a011d4865..4f15807bf 100644
--- a/src/FSharp.Stats/Signal/PeakDetection.fs
+++ b/src/FSharp.Stats/Signal/PeakDetection.fs
@@ -331,7 +331,6 @@ module PeakDetection =
0
/// Inspects the sourrounding of the peak. The function walks in the direction given by the step parameter. The function accumulates all lift offs till till the next peak or the end of the signal trace is reached. Returns the last index, the number of lift offs and a bool indicating if a flanking peak is present.
///
- ///
///
///
///
diff --git a/src/FSharp.Stats/SpecialFunctions/Erf.fs b/src/FSharp.Stats/SpecialFunctions/Erf.fs
index 6db65b96e..a545e2421 100644
--- a/src/FSharp.Stats/SpecialFunctions/Erf.fs
+++ b/src/FSharp.Stats/SpecialFunctions/Erf.fs
@@ -18,7 +18,6 @@ module Errorfunction =
/// Computes the error function. Note that this implementation has only been verified to have a relative error of around 1e-5.
///
- ///
///
///
///
@@ -41,7 +40,6 @@ module Errorfunction =
/// Computes the complement of the error function. Note that this implementation has only been verified to have a relative error of around 1e-4.
///
- ///
///
///
///
diff --git a/src/FSharp.Stats/SummaryStats.fs b/src/FSharp.Stats/SummaryStats.fs
index 315eb5389..e82183c08 100644
--- a/src/FSharp.Stats/SummaryStats.fs
+++ b/src/FSharp.Stats/SummaryStats.fs
@@ -31,11 +31,11 @@ module SummaryStats =
///
/// Creates a SummaryStats record from the given fields.
///
- /// The number of observed data points.
+ /// The number of observed data points.
/// The running mean of the data points.
- /// The sum of squared deviations fom the mean
- /// The minimum observed value.
- /// The maximum observed value.
+ /// The sum of squared deviations fom the mean
+ /// The minimum observed value.
+ /// The maximum observed value.
/// A new SummaryStats record.
let createSummaryStats n mean sos min max =
{N=n;Mean=mean;SumSqrdDevations=sos;Min=min;Max=max}
@@ -110,6 +110,7 @@ module SummaryStats =
/// A sequence of numeric data.
///
/// A SummaryStats record containing the final count, mean, sum of squares, min, and max.
+ ///
let inline ofSeq (items: seq<'T>) =
use e = items.GetEnumerator()
let zero = LanguagePrimitives.GenericZero<'T>
diff --git a/src/FSharp.Stats/Testing/TestStatistics.fs b/src/FSharp.Stats/Testing/TestStatistics.fs
index e81a5279e..955c125ff 100644
--- a/src/FSharp.Stats/Testing/TestStatistics.fs
+++ b/src/FSharp.Stats/Testing/TestStatistics.fs
@@ -9,17 +9,16 @@ module TestStatistics =
/// Creates a new T-Test for a given statistic
/// with given degrees of freedom.
///
- ///
- /// The test statistic.
- /// The degrees of freedom for the numerator.
- /// One Tailed/Sided.
- /// One Tailed/Sided.
- /// Two Tailed/Sided.
type TTestStatistics = {
+ /// The test statistic.
Statistic : float
+ /// The degrees of freedom for the numerator.
DegreesOfFreedom : float
+ /// One Tailed/Sided.
PValueLeft : float
+ /// One Tailed/Sided.
PValueRight : float
+ /// Two Tailed/Sided.
PValue : float
}
@@ -33,13 +32,12 @@ module TestStatistics =
/// Creates a new F-Test for a given statistic
/// with given degrees of freedom.
///
- ///
- /// The test statistic.
- /// The degrees of freedom for the numerator.
- /// The degrees of freedom for the denominator.
type FTestStatistics = {
+ /// The test statistic.
Statistic : float
+ /// The degrees of freedom for the numerator.
DegreesOfFreedom1 : float
+ /// The degrees of freedom for the denominator.
DegreesOfFreedom2 : float
PValue : float
PValueTwoTailed : float
@@ -56,20 +54,16 @@ module TestStatistics =
/// Computes the Chi-Square test statistics for a given statistic
/// with given degrees of freedom.
///
- ///
- /// The test statistic.
- /// The degrees of freedom for the numerator.
- /// One Tailed/Sided.
- /// One Tailed/Sided.
- /// Two Tailed/Sided.
type ChiSquareStatistics = {
+ /// The test statistic.
Statistic : float
+ /// The degrees of freedom for the numerator.
DegreesOfFreedom : float
- /// one tailed/sided chiSquare pValue
+ /// One Tailed/Sided.
PValueLeft : float
- /// one tailed/sided chiSquare pValue (default)
+ /// One Tailed/Sided.
PValueRight : float
- /// two tailed/sided chiSquare pValue
+ /// Two Tailed/Sided.
PValue : float
}
@@ -83,13 +77,12 @@ module TestStatistics =
///
/// Computes the Wilcoxon test statistics for a given statistic.
///
- /// The test statistic.
- /// One Tailed/Sided.
- /// Two Tailed/Sided.
type WilcoxonTestStatistics = {
+ /// The test statistic.
Statistic : float
PValueLeft : float
PValueRight : float
+ /// Two Tailed/Sided.
PValueTwoTailed : float
}
let createWilcoxon statistic =
diff --git a/src/FSharp.Stats/Vector.fs b/src/FSharp.Stats/Vector.fs
new file mode 100644
index 000000000..8026d1acc
--- /dev/null
+++ b/src/FSharp.Stats/Vector.fs
@@ -0,0 +1,240 @@
+namespace FSharp.Stats
+
+open FsMath
+open System
+
+[]
+module Vector =
+
+
+ //----------------------------------------------------------------------------
+ // Stats
+ //----------------------------------------------------------------------------
+
+
+ /// Creates a vector from a sequence
+ let interval (items:Vector<'T>) =
+ let rec loop index (minimum) (maximum) =
+ if index < items.Length then
+ let current = items.[index]
+ loop (index+1) (min current minimum) (max current maximum)
+ else
+ Interval.CreateClosed<_> (minimum,maximum)
+ //Init by fist value
+ if items.Length > 1 then
+ loop 1 items.[0] items.[0]
+ else
+ Interval.Empty
+
+ ///// Computes the population mean (Normalized by N)
+ /////
+ /////
+ /////
+ /////
+ /////
+ /////
+ /////
+ //let inline mean (items:Vector<'T>) =
+ // Vector.mean items
+
+
+ /// Computes the sample median
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let inline median (items:Vector) =
+ items |> Array.median
+
+ /// Median absolute deviation (MAD)
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let medianAbsoluteDev (items:Vector) =
+ items |> Array.medianAbsoluteDev
+
+
+ /// Returns SummaryStats of vector with N, mean, sum-of-squares, minimum and maximum
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let inline stats (items:Vector<'T>) =
+ let zero = LanguagePrimitives.GenericZero< 'T >
+ let one = LanguagePrimitives.GenericOne< 'T >
+
+ let rec loop index n (minimum) (maximum) m1 m2 =
+ if index < items.Length then
+ let current = items.[index]
+ let delta = current - m1
+ let deltaN = (delta / n)
+ //let delta_n2 = deltaN * deltaN
+ let m1' = m1 + deltaN
+ let m2' = m2 + delta * deltaN * (n-one)
+ loop (index+1) (n + one) (min current minimum) (max current maximum) m1' m2'
+ else
+ SummaryStats.createSummaryStats (n-one) m1 m2 minimum maximum
+ //Init by fist value
+ if items.Length > 1 then
+ loop 0 one items.[0] items.[0] zero zero
+ else
+ let uNan = zero / zero
+ SummaryStats.createSummaryStats zero uNan uNan uNan uNan
+
+
+ /// Returns an estimator of the population covariance of two random variables v1 and v2
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let covPopulation (v1:Vector) (v2:Vector) =
+ Seq.covPopulation v1 v2
+
+ /// Returns the sample covariance of two random variables v1 and v2. (Bessel's correction by N-1)
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let cov (v1:Vector) (v2:Vector) =
+ Seq.cov v1 v2
+
+ /// calculates the sample means with a given number of replicates present in the sequence
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let getMeanOfReplicates rep (data:Vector) =
+ Seq.getMeanOfReplicates rep data
+ |> Vector.ofSeq
+
+ /// calculates the sample standard deviations with a given number of replicates present in the sequence
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let getStDevOfReplicates rep (data:Vector) =
+ Seq.getStDevOfReplicates rep data
+ |> Vector.ofSeq
+
+ /// calculates the coefficient of variation based on the sample standard deviations with a given number of replicates present in the sequence
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let getCvOfReplicates rep (data:Vector) =
+ Seq.getCvOfReplicates rep data
+ |> Vector.ofSeq
+
+ ///// Splits a vector according to given indices. Returns (vector including values according to indices, rest)
+ /////
+ /////
+ /////
+ /////
+ /////
+ /////
+ /////
+ /////
+ //let splitVector (indices:int[]) (v:Vector<_>) =
+ // let len = v.Length
+ // //let nv = Vector.Generic.zero (len-indices.Length)
+ // //let nvi = Vector.Generic.zero indices.Length
+ // let nv = VG.zeroCreate (len-indices.Length)
+ // let nvi = VG.zeroCreate indices.Length
+ // indices |> Array.sortInPlace
+ // let rec loop ni nii i =
+ // match i with
+ // | i when i < 0 -> nvi,nv
+ // | i when nii >= 0 && i = indices.[nii] ->
+ // nvi.[nii] <- v.[i]
+ // loop (ni) (nii-1) (i-1)
+ // | _ ->
+ // nv.[ni] <- v.[i]
+ // loop (ni-1) (nii) (i-1)
+
+ // loop (len-1-indices.Length) (indices.Length-1) (len-1)
+
+
+ /// Module to compute common statistical measure on
+ module SummaryStats =
+
+ /// Returns SummaryStats of vector with N, mean, sum-of-squares, minimum and maximum
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ let ofVector (a:Vector<'a>) = stats a
+
+
+
+
+
+
+
+
+
+//[]
+//module VectorExtension =
+
+// type Vector<'T when 'T :> Numerics.INumber<'T>> with
+// member x.Norm = Vector.Generic.norm x
+// member x.Copy () = Vector.Generic.copy x
+
+// ///
+// /// Creates an vector with values between a given interval
+// ///
+// /// start value (is included)
+// /// end value (by default is included)
+// /// sets the number of elements in the vector. If not set, stepsize = 1.
+// /// If false, the vector does not contain the stop value
+// static member linspace(start:float,stop:float,num:int,?IncludeEndpoint:bool) : vector =
+
+// let includeEndpoint = defaultArg IncludeEndpoint true
+
+// Seq.linspace(start,stop,num,includeEndpoint) |> Vector.ofSeq
+
+// ///
+// /// Creates a geometric vector of floats with values between a given interval.
+// ///
+// /// start value (is included)
+// /// end value (by default is included)
+// /// sets the number of elements in the vector. Defaults to 50.
+// /// If false, the vector does not contain the stop value. Defaults to true.
+// static member geomspace(start:float,stop:float,num:int,?IncludeEndpoint:bool) : vector =
+// let includeEndpoint = defaultArg IncludeEndpoint true
+
+// Seq.geomspace (start, stop ,num, includeEndpoint)
+// |> Vector.ofSeq
+