diff --git a/CODEOWNERS b/CODEOWNERS
index bcd1ea095..f121172f7 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -87,3 +87,7 @@
 # C code tricks
 /src/chmatch.c @aitap
 /src/fread.c @aitap
+
+# ordering
+/src/topn.c @ben-schwen
+/man/topn.Rd @ben-schwen
diff --git a/NAMESPACE b/NAMESPACE
index 361b706d3..7c1155c1f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -217,6 +217,8 @@ S3method(format_list_item, data.frame)
 
 export(fdroplevels, setdroplevels)
 S3method(droplevels, data.table)
+
+export(topn)
 export(frev)
 export(.selfref.ok)
 
diff --git a/NEWS.md b/NEWS.md
index bec4ada86..03f739612 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -296,7 +296,26 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
     #   user  system elapsed
     #  0.028   0.000   0.005
     ```
-    20. `fread()` now supports the `comment.char` argument to skip trailing comments or comment-only lines, consistent with `read.table()`, [#856](https://github.com/Rdatatable/data.table/issues/856). The default remains `comment.char = ""` (no comment parsing) for backward compatibility and performance, in contrast to `read.table(comment.char = "#")`. Thanks to @arunsrinivasan and many others for the suggestion and @ben-schwen for the implementation.
+
+20. `fread()` now supports the `comment.char` argument to skip trailing comments or comment-only lines, consistent with `read.table()`, [#856](https://github.com/Rdatatable/data.table/issues/856). The default remains `comment.char = ""` (no comment parsing) for backward compatibility and performance, in contrast to `read.table(comment.char = "#")`. Thanks to @arunsrinivasan and many others for the suggestion and @ben-schwen for the implementation.
+
+21. New function `topn(x,n)` [#3804](https://github.com/Rdatatable/data.table/issues/3804). It returns the indices of the `n` smallest/largest values of a vector `x`. Previously, one had to use `order(x)[1:n]` which produced a full sorting of `x`. Usage of `topn` is advised for large vectors where sorting takes long time. Thanks to Michael Chirico for requesting, and Benjamin Schwendinger for the PR.
+
+    ```R
+    set.seed(123)
+    x = rnorm(1e8)
+    bm = bench::mark(check=FALSE, min_iterations=10,
+       topn(x, 5L, sorted=TRUE),
+       topn(x, 5L, sorted=FALSE),
+       order(x)[1:5] 
+    )
+    setDT(bm)[, .(expression, min, median, lapply(time, max), mem_alloc)]
+    #                     expression          min       median     V4     mem_alloc
+    #                   <bench_expr> <bench_time> <bench_time> <list> <bench_bytes>
+    # 1:  topn(x, 5L, sorted = TRUE)     151.65ms     155.21ms  171ms            0B
+    # 2: topn(x, 5L, sorted = FALSE)     151.59ms     158.77ms  176ms            0B
+    # 3:               order(x)[1:5]        2.69s        2.77s  2.84s         381MB
+    ```
 
 ### BUG FIXES
 
diff --git a/R/wrappers.R b/R/wrappers.R
index 7683b434f..5effb7e50 100644
--- a/R/wrappers.R
+++ b/R/wrappers.R
@@ -22,5 +22,7 @@ fitsInInt64 = function(x) .Call(CfitsInInt64R, x)
 
 coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy)
 
+topn = function(x, n, na.last=TRUE, decreasing=FALSE, sorted=FALSE) .Call(Ctopn, x, as.integer(n), na.last, decreasing, sorted)
+quickn = function(x, n, na.last=TRUE, decreasing=FALSE) .Call(Cquickn, x, as.integer(n), na.last, decreasing)
 frev   = function(x) .Call(Cfrev, x, TRUE)
 setfrev = function(x) invisible(.Call(Cfrev, x, FALSE))
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 8b151609b..1cdc27fbf 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -76,6 +76,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   .shallow = data.table:::.shallow
   stopf = data.table:::stopf
   test = data.table:::test
+  topn = data.table:::topn
   uniqlengths = data.table:::uniqlengths
   uniqlist = data.table:::uniqlist
   warningf = data.table:::warningf
@@ -17938,6 +17939,7 @@ DT = data.table(A=letters[1:3], B=4:6, key="A")
 test(2215.1, DT["b", B], 5L)  # has worked forever
 test(2215.2, DT[factor("b"), B], 5L)  # now works too, joining fact/fact, char/fact and fact/char have plenty of tests
 
+
 # segfault on merge keyed all-NA_character_ due to is.sorted, #5070
 DT1 = data.table(x1 = rep(letters[1:4], each=3), x2=NA_character_, key="x2")
 DT2 = data.table(x1 = letters[1:3])
@@ -21901,3 +21903,49 @@ rm(DT, strings)
 DT = unserialize(serialize(as.data.table(mtcars), NULL))
 test(2351, DT[,carb:=NULL], as.data.table(mtcars)[,carb:=NULL])
 rm(DT)
+
+# topn retrieving which.max/which.min for more than 1 element #3804
+set.seed(373L)
+x=list(sample(c(-1:1, NA), 10, TRUE),
+       sample(c(rnorm(3), NA), 10, TRUE),
+       sample(c(letters[1:3],NA), 10, TRUE),
+       c(NaN, Inf, -Inf),
+       c(-Inf, 0, Inf),
+       c(-Inf, Inf),
+       c(Inf, -Inf),
+       c(0, NaN),
+       c(NaN, 0),
+       c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i),
+       c(3, 10+5i, 8, 10+4i, NA, 3, 1+2i, 5+1i, 8+1i, 8, NA))
+testnum = 2352.0
+b = CJ(c(TRUE,FALSE), c(TRUE,FALSE))
+for (i in seq.int(x)) {
+  l = length(x[[i]])
+  s = sample(l-1L, 1L)
+  for (j in nrow(b)) {
+    test(testnum, topn(x[[i]], l, na.last=b[[j,1]], decreasing=b[[j,2]], sorted=TRUE),
+      order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]]))
+    testnum = testnum + 0.01
+    test(testnum, topn(x[[i]], s, na.last=b[[j,1]], decreasing=b[[j,2]], sorted=TRUE),
+      order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])[1L:s])
+    testnum = testnum + 0.01
+    test(testnum, sort(topn(x[[i]], s, na.last=b[[j,1]], decreasing=b[[j,2]], sorted=FALSE)),
+      sort(order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])[1L:s]))
+    testnum = testnum + 0.01
+  }
+}
+test(2352.79, topn(as.raw(1), 1), error="Type 'raw' not supported by topn.")
+test(2352.80, topn(1L, 2L), 1L, warning="n should be smaller or equal than length(x) but provided n=2 and length(x)=1.\n  Coercing n to length(x).")
+test(2352.81, topn(1L, -1L), error="topn(x,n) only implemented for n > 0.")
+if (test_bit64) {
+  x=as.integer64(c(-5, 5, 0, NA, -5, 5, 1e18, NA, 5, 1e-18))
+  test(2352.82, topn(x, 10L, na.last=TRUE,  decreasing=FALSE, sorted=TRUE), c(1L, 5L, 3L, 10L, 2L, 6L, 9L, 7L, 4L, 8L))
+  test(2352.83, topn(x, 10L, na.last=FALSE, decreasing=FALSE, sorted=TRUE), c(4L, 8L, 1L, 5L, 3L, 10L, 2L, 6L, 9L, 7L))
+  test(2352.84, topn(x, 10L, na.last=TRUE,  decreasing=TRUE,  sorted=TRUE), c(7L, 2L, 6L, 9L, 3L, 10L, 1L, 5L, 4L, 8L))
+  test(2352.85, topn(x, 10L, na.last=FALSE, decreasing=TRUE,  sorted=TRUE), c(4L, 8L, 7L, 2L, 6L, 9L, 3L, 10L, 1L, 5L))
+  # answers are sort(head(x, 2)) of answer before
+  test(2352.86, sort(topn(x, 2L, na.last=TRUE,  decreasing=FALSE, sorted=FALSE)), c(1L, 5L))
+  test(2352.87, sort(topn(x, 2L, na.last=FALSE, decreasing=FALSE, sorted=FALSE)), c(4L, 8L))
+  test(2352.88, sort(topn(x, 2L, na.last=TRUE,  decreasing=TRUE,  sorted=FALSE)), c(2L, 7L))
+  test(2352.89, sort(topn(x, 2L, na.last=FALSE, decreasing=TRUE,  sorted=FALSE)), c(4L, 8L))
+}
diff --git a/man/topn.Rd b/man/topn.Rd
new file mode 100644
index 000000000..442360ba2
--- /dev/null
+++ b/man/topn.Rd
@@ -0,0 +1,45 @@
+\name{topn}
+\alias{topn}
+\alias{which.max}
+\alias{which.min}
+\title{ Top n values index }
+\description{
+  \code{topn} returns the indices of the smallest (resp. largest) \code{n} values of x. This is an extension \code{\link{which.min}} (\code{\link{which.max}}) which only return the index of the minimum (resp. maximum).
+
+  Especially, for large vectors this method will be faster and less memory intensive than \code{order} since no full sort is performed.
+
+  \code{bit64::integer64} type is also supported.
+}
+
+\usage{
+topn(x, n, na.last=TRUE, decreasing=FALSE, sorted=FALSE)
+}
+\arguments{
+  \item{x}{ A numeric, complex, character or logical vector. }
+  \item{n}{ A numeric vector length 1. How many indices to select. }
+  \item{na.last}{ Control treatment of \code{NA}s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first. }
+  \item{decreasing}{ Logical. Default is \code{FALSE}. Indicating whether the order should be increasing or decreasing. }
+  \item{sorted}{ Logical. Default is \code{FALSE}. Indicating whether order should be sorted with respect to decreasing. }
+}
+
+\value{
+  An integer vector giving the indicies of the \code{n} smallest (largest) for \code{decreasing=FALSE (TRUE)} elements of \code{x}.
+}
+
+\examples{
+x = c(1:4, 0:5, 11)
+# indices of smallest 3 values
+topn(x, 3)
+# indices of largest 3 values
+topn(x, 3, decreasing = TRUE)
+
+## NA's can be put to front or back
+x = c(NA, 1:4)
+topn(x, 5)
+topn(x, 5, na.last=FALSE)
+
+}
+\seealso{
+  \code{\link{data.table}}, \code{\link{order}}, \code{\link{which.max}}, \code{\link{which.min}}
+}
+\keyword{ data }
diff --git a/src/data.table.h b/src/data.table.h
index 0bfcb09d8..47b44ad89 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -363,6 +363,10 @@ SEXP substitute_call_arg_namesR(SEXP expr, SEXP env);
 //negate.c
 SEXP notchin(SEXP x, SEXP table);
 
+// topn.c
+SEXP topn(SEXP, SEXP, SEXP, SEXP, SEXP);
+SEXP quickn(SEXP, SEXP, SEXP, SEXP);
+
 // hash.c
 typedef struct {
   SEXP prot; // make sure to PROTECT() while the table is in use
diff --git a/src/init.c b/src/init.c
index ecdc64e39..d55afa209 100644
--- a/src/init.c
+++ b/src/init.c
@@ -147,6 +147,8 @@ R_CallMethodDef callMethods[] = {
 {"Cdt_has_zlib", (DL_FUNC)&dt_has_zlib, -1},
 {"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1},
 {"CstartsWithAny", (DL_FUNC)&startsWithAny, -1},
+{"Ctopn", (DL_FUNC)&topn, -1},
+{"Cquickn", (DL_FUNC)&quickn, -1},
 {"CconvertDate", (DL_FUNC)&convertDate, -1},
 {"Cnotchin", (DL_FUNC)&notchin, -1},
 {"Ccbindlist", (DL_FUNC) &cbindlist, -1},
diff --git a/src/topn.c b/src/topn.c
new file mode 100644
index 000000000..a58c0bc41
--- /dev/null
+++ b/src/topn.c
@@ -0,0 +1,199 @@
+#include "data.table.h"
+
+static inline void swap(int *a, int *b) { int tmp=*a; *a=*b; *b=tmp; }
+
+static inline bool icmp(const int *x, int i, int j, bool min, bool nalast) {
+  if (x[i]==x[j]) return i > j;
+  if (x[i]==NA_INTEGER) return nalast;
+  if (x[j]==NA_INTEGER) return !nalast;
+  return min ? x[i] < x[j] : x[i] > x[j];
+}
+
+static inline bool dcmp(const double *x, int i, int j, bool min, bool nalast) {
+  if (x[i]==x[j] || (isnan(x[i]) && isnan(x[j]))) return i > j;
+  if (isnan(x[i])) return nalast;
+  if (isnan(x[j])) return !nalast;
+  return min ? x[i] < x[j] : x[i] > x[j];
+}
+
+static inline bool i64cmp(const int64_t *x, int i, int j, bool min, bool nalast) {
+  if (x[i]==x[j]) return i > j;
+  if (x[i]==NA_INTEGER64) return nalast;
+  if (x[j]==NA_INTEGER64) return !nalast;
+  return min ? x[i] < x[j] : x[i] > x[j];
+}
+
+static inline bool scmp(const SEXP *restrict x, int i, int j, bool min, bool nalast) {
+  if (strcmp(CHAR(x[i]), CHAR(x[j])) == 0) return i > j;
+  if (x[i]==NA_STRING) return nalast;
+  if (x[j]==NA_STRING) return !nalast;
+  return min ? strcmp(CHAR(x[i]),CHAR(x[j]))<0 : strcmp(CHAR(x[i]),CHAR(x[j]))>0;
+}
+
+static inline bool ccmp(const Rcomplex *x, int i, int j, bool min, bool nalast) {
+  if (ISNAN_COMPLEX(x[i]) && ISNAN_COMPLEX(x[j])) return i > j;
+  if (x[i].r==x[j].r) {
+    if (x[i].i==x[j].i) return i > j;
+    return min ? x[i].i < x[j].i : x[i].i > x[j].i;
+  }
+  if (ISNAN_COMPLEX(x[i])) return nalast;
+  if (ISNAN_COMPLEX(x[j])) return !nalast;
+  return min ? x[i].r < x[j].r : x[i].r > x[j].r;
+}
+
+// compare value of node with values of left/right child nodes and sift value down if value of child node is smaller than parent (for minheap)
+#undef SIFT
+#define SIFT(CMP) {                                                   \
+  int smallest, l, r;                                                 \
+  while(true) {                                                       \
+    smallest = k;                                                     \
+    l = (k << 1) + 1;                                                 \
+    r = l+1;                                                          \
+    if (l < len && CMP(VAL,INDEX[l],INDEX[smallest],min,nalast))      \
+      smallest = l;                                                   \
+    if (r < len && CMP(VAL,INDEX[r],INDEX[smallest],min,nalast))      \
+      smallest = r;                                                   \
+    if (smallest != k) {                                              \
+      swap(&INDEX[k], &INDEX[smallest]);                              \
+      k = smallest;                                                   \
+    } else {                                                          \
+      break;                                                          \
+    }                                                                 \
+  }                                                                   \
+}
+
+// for finding decreasing topn build minheap and add values if they exceed
+// minimum by overwriting minimum and following down sifting
+#undef HEAPN
+#define HEAPN(CTYPE, RTYPE, CMP, SORTED) {                \
+  const CTYPE *restrict VAL = (const CTYPE *)RTYPE(x);    \
+  for (int i=n/2; i>=0; --i) { k=i; len=n; SIFT(CMP); }   \
+  for (int i=n; i<xlen; ++i) {                            \
+    if (CMP(VAL,INDEX[0],i,min,nalast)) {                 \
+      INDEX[0] = i;                                       \
+      k=0; len=n; SIFT(CMP);                              \
+    }                                                     \
+  }                                                       \
+  if (SORTED) {                                           \
+    for (int i=0; i<n; ++i) {                             \
+      swap(&INDEX[0], &INDEX[n-1-i]);                     \
+      k=0; len=n-1-i; SIFT(CMP);                          \
+      ians[n-1-i] = INDEX[n-1-i]+1;                       \
+    }                                                     \
+  } else {                                                \
+    for (int i=0; i<n; ++i) {                             \
+      ians[i] = INDEX[i]+1;                               \
+    }                                                     \
+  }                                                       \
+  free(INDEX);                                            \
+}
+
+SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg, SEXP sortedArg) {
+  if (!isInteger(nArg) || LENGTH(nArg)!=1 || INTEGER(nArg)[0]<=0 || INTEGER(nArg)[0]==NA_INTEGER) error(_("topn(x,n) only implemented for n > 0."));
+  if (!IS_TRUE_OR_FALSE(ascArg)) error(_("%s must be TRUE or FALSE"), "decreasing");
+  if (!IS_TRUE_OR_FALSE(naArg)) error(_("%s must be TRUE or FALSE"), "na.last");
+  if (!IS_TRUE_OR_FALSE(sortedArg)) error(_("%s must be TRUE or FALSE"), "sorted");
+
+  const int xlen = LENGTH(x);
+  int n = INTEGER(nArg)[0];
+  if (n > xlen) {
+    warning(_("n should be smaller or equal than length(x) but provided n=%d and length(x)=%d.\n  Coercing n to length(x)."), n, xlen);
+    n = xlen;
+  }
+
+  const bool min = LOGICAL(ascArg)[0];
+  const bool nalast = LOGICAL(naArg)[0];
+  const bool sorted = LOGICAL(sortedArg)[0];
+
+  SEXP ans;
+  int k, len;
+  ans = PROTECT(allocVector(INTSXP, n));
+  int *restrict ians = INTEGER(ans);
+  int *restrict INDEX = malloc(n*sizeof(int));
+  if (!INDEX) error(_("Internal error: Couldn't allocate memory for heap indices.")); // # nocov
+  for (int i=0; i<n; ++i) INDEX[i] = i;
+  switch(TYPEOF(x)) {
+  case LGLSXP: case INTSXP: {          HEAPN(int,      INTEGER,    icmp,   sorted); } break;
+  case REALSXP: {
+    if (INHERITS(x, char_integer64)) { HEAPN(int64_t,  REAL,       i64cmp, sorted); }
+    else {                             HEAPN(double,   REAL,       dcmp,   sorted); } break; }
+  case CPLXSXP: {                      HEAPN(Rcomplex, COMPLEX,    ccmp,   sorted); } break;
+  case STRSXP: {                       HEAPN(SEXP,     STRING_PTR, scmp,   sorted); } break;
+  default:
+    free(INDEX); error(_("Type '%s' not supported by topn."), type2char(TYPEOF(x)));
+  }
+  UNPROTECT(1);
+  return(ans);
+}
+
+#undef QUICKN
+#define QUICKN(CTYPE, RTYPE, CMP, SWAP)             \
+  CTYPE *ix = (CTYPE *)RTYPE(x);                    \
+  CTYPE *ians = (CTYPE *)RTYPE(ans);                \
+  unsigned long l = 0, ir = xlen - 1;               \
+  for (;;) {                                        \
+    if (ir <= l + 1) {                              \
+      if (ir == l + 1 && CMP(ix,l,ir,min,nalast)) { \
+        SWAP(ix+l, ix+ir);                          \
+      }                                             \
+      break;                                        \
+    } else {                                        \
+      unsigned long mid = (l + ir) >> 1;            \
+      SWAP(ix+mid, ix+l + 1);                       \
+      if (CMP(ix,l,ir,min,nalast)) {                \
+        SWAP(ix+l, ix+ir);                          \
+      }                                             \
+      if (CMP(ix,l+1,ir,min,nalast)) {              \
+        SWAP(ix+l+1, ix+ir);                        \
+      }                                             \
+      if (CMP(ix,l,l+1,min,nalast)) {               \
+        SWAP(ix+l, ix+l+1);                         \
+      }                                             \
+      unsigned long i = l + 1, j = ir;              \
+      for (;;) {                                    \
+        do i++; while (CMP(ix,l+1,i,min,nalast));   \
+        do j--; while (CMP(ix,j,l+1,min,nalast));   \
+        if (j < i) break;                           \
+        SWAP(ix+i, ix+j);                           \
+      }                                             \
+      SWAP(ix+l+1, ix+j);                           \
+      if (j >= n) ir = j - 1;                       \
+      if (j <= n) l = i;                            \
+    }                                               \
+  }                                                 \
+  memcpy(ians, ix, n * sizeof(CTYPE))               
+
+static inline void iswap(int *a, int *b)           {int      tmp=*a; *a=*b; *b=tmp;}
+static inline void dswap(double *a, double *b)     {double   tmp=*a; *a=*b; *b=tmp;}
+static inline void i64swap(int64_t *a, int64_t *b) {int64_t  tmp=*a; *a=*b; *b=tmp;}
+static inline void cswap(Rcomplex *a, Rcomplex *b) {Rcomplex tmp=*a; *a=*b; *b=tmp;}
+static inline void sswap(SEXP *a, SEXP *b)         {SEXP     tmp=*a; *a=*b; *b=tmp;}
+
+SEXP quickn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) {
+  if (!isInteger(nArg) || LENGTH(nArg)!=1 || INTEGER(nArg)[0]<=0 || INTEGER(nArg)[0]==NA_INTEGER) error(_("topn(x,n) only implemented for n > 0."));
+  if (!IS_TRUE_OR_FALSE(ascArg)) error(_("%s must be TRUE or FALSE"), "decreasing");
+  if (!IS_TRUE_OR_FALSE(naArg)) error(_("%s must be TRUE or FALSE"), "na.last");
+
+  const int xlen = LENGTH(x);
+  int n = INTEGER(nArg)[0];
+  x = PROTECT(duplicate(x));
+
+  const bool min = LOGICAL(ascArg)[0];
+  const bool nalast = LOGICAL(naArg)[0];
+
+  SEXP ans;
+  ans = PROTECT(allocVector(TYPEOF(x), n));
+  switch(TYPEOF(x)) {
+  case LGLSXP: case INTSXP: {          QUICKN(int,      INTEGER,    icmp, iswap); } break;
+  case REALSXP: {
+    if (INHERITS(x, char_integer64)) { QUICKN(int64_t,  REAL,       i64cmp, i64swap); }
+    else {                             QUICKN(double,   REAL,       dcmp,   dswap); } break; }
+  case CPLXSXP: {                      QUICKN(Rcomplex, COMPLEX,    ccmp,   cswap); } break;
+  case STRSXP: {                       QUICKN(SEXP,     STRING_PTR, scmp,   sswap); } break;
+  default:
+    error(_("Type '%s' not supported by quickn."), type2char(TYPEOF(x)));
+  }
+  copyMostAttrib(x, ans);
+  UNPROTECT(2);
+  return(ans);
+}