Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 38 additions & 38 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4404,16 +4404,16 @@ defmodule Explorer.DataFrame do
iex> Explorer.DataFrame.sample(df, 3, seed: 100)
#Explorer.DataFrame<
Polars[3 x 10]
year s64 [2012, 2013, 2014]
country string ["SYRIAN ARAB REPUBLIC", "EGYPT", "AFGHANISTAN"]
total s64 [12198, 58198, 2675]
solid_fuel s64 [1, 224, 1194]
liquid_fuel s64 [7909, 26501, 1393]
gas_fuel s64 [3265, 24672, 74]
cement s64 [816, 6800, 14]
gas_flaring s64 [208, 0, 0]
per_capita f64 [0.61, 0.66, 0.08]
bunker_fuels s64 [437, 694, 9]
year s64 [2014, 2014, 2014]
country string ["GUYANA", "MALDIVES", "CAYMAN ISLANDS"]
total s64 [548, 364, 148]
solid_fuel s64 [0, 0, 0]
liquid_fuel s64 [548, 364, 148]
gas_fuel s64 [0, 0, 0]
cement s64 [0, 0, 0]
gas_flaring s64 [0, 0, 0]
per_capita f64 [0.72, 1.02, 2.5]
bunker_fuels s64 [8, 204, 9]
>

Or you can sample a proportion of rows:
Expand All @@ -4422,16 +4422,16 @@ defmodule Explorer.DataFrame do
iex> Explorer.DataFrame.sample(df, 0.03, seed: 100)
#Explorer.DataFrame<
Polars[32 x 10]
year s64 [2013, 2012, 2014, 2011, 2011, ...]
country string ["BRITISH VIRGIN ISLANDS", "TAJIKISTAN", "AFGHANISTAN", "ICELAND", "SINGAPORE", ...]
total s64 [48, 800, 2675, 513, 12332, ...]
solid_fuel s64 [0, 192, 1194, 94, 7, ...]
liquid_fuel s64 [48, 501, 1393, 400, 7774, ...]
gas_fuel s64 [0, 74, 74, 0, 4551, ...]
cement s64 [0, 34, 14, 19, 0, ...]
gas_flaring s64 [0, 0, 0, 0, 0, ...]
per_capita f64 [1.64, 0.1, 0.08, 1.6, 2.38, ...]
bunker_fuels s64 [0, 28, 9, 168, 41786, ...]
year s64 [2014, 2014, 2014, 2011, 2014, ...]
country string ["HAITI", "MALI", "CENTRAL AFRICAN REPUBLIC", "RUSSIAN FEDERATION", "HONG KONG SPECIAL ADMINSTRATIVE REGION OF CHINA", ...]
total s64 [780, 385, 82, 480885, 12605, ...]
solid_fuel s64 [0, 0, 0, 109218, 8420, ...]
liquid_fuel s64 [739, 385, 82, 108312, 2659, ...]
gas_fuel s64 [0, 0, 0, 247327, 1268, ...]
cement s64 [41, 0, 0, 7643, 258, ...]
gas_flaring s64 [0, 0, 0, 8385, 0, ...]
per_capita f64 [0.07, 0.02, 0.02, 3.36, 1.74, ...]
bunker_fuels s64 [29, 20, 29, 7962, 12769, ...]
>

## Grouped examples
Expand All @@ -4446,10 +4446,10 @@ defmodule Explorer.DataFrame do
#Explorer.DataFrame<
Polars[6 x 5]
Groups: ["species"]
sepal_length f64 [4.8, 5.0, 5.5, 6.5, 7.4, ...]
sepal_width f64 [3.1, 3.6, 2.4, 2.8, 2.8, ...]
petal_length f64 [1.6, 1.4, 3.8, 4.6, 6.1, ...]
petal_width f64 [0.2, 0.2, 1.1, 1.5, 1.9, ...]
sepal_length f64 [5.0, 4.8, 5.0, 5.7, 6.8, ...]
sepal_width f64 [3.5, 3.0, 2.3, 3.0, 3.2, ...]
petal_length f64 [1.6, 1.4, 3.3, 4.2, 5.9, ...]
petal_width f64 [0.6, 0.3, 1.0, 1.2, 2.3, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-versicolor", "Iris-versicolor", "Iris-virginica", ...]
>

Expand All @@ -4462,10 +4462,10 @@ defmodule Explorer.DataFrame do
#Explorer.DataFrame<
Polars[15 x 5]
Groups: ["species"]
sepal_length f64 [5.2, 5.0, 5.2, 5.0, 5.0, ...]
sepal_width f64 [3.4, 3.6, 3.5, 3.0, 3.4, ...]
petal_length f64 [1.4, 1.4, 1.5, 1.6, 1.6, ...]
petal_width f64 [0.2, 0.2, 0.2, 0.2, 0.4, ...]
sepal_length f64 [5.0, 4.4, 5.1, 5.4, 5.0, ...]
sepal_width f64 [3.5, 3.2, 3.4, 3.9, 3.5, ...]
petal_length f64 [1.3, 1.3, 1.5, 1.3, 1.6, ...]
petal_width f64 [0.3, 0.2, 0.2, 0.4, 0.6, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>

Expand Down Expand Up @@ -4507,16 +4507,16 @@ defmodule Explorer.DataFrame do
iex> Explorer.DataFrame.shuffle(df, seed: 100)
#Explorer.DataFrame<
Polars[1094 x 10]
year s64 [2014, 2014, 2014, 2012, 2010, ...]
country string ["ISRAEL", "ARGENTINA", "NETHERLANDS", "YEMEN", "GRENADA", ...]
total s64 [17617, 55638, 45624, 5091, 71, ...]
solid_fuel s64 [6775, 1588, 9070, 129, 0, ...]
liquid_fuel s64 [6013, 25685, 18272, 4173, 71, ...]
gas_fuel s64 [3930, 26368, 18010, 414, 0, ...]
cement s64 [898, 1551, 272, 375, 0, ...]
gas_flaring s64 [0, 446, 0, 0, 0, ...]
per_capita f64 [2.22, 1.29, 2.7, 0.2, 0.68, ...]
bunker_fuels s64 [1011, 2079, 14210, 111, 4, ...]
year s64 [2011, 2011, 2014, 2013, 2010, ...]
country string ["MARTINIQUE", "PARAGUAY", "HONDURAS", "GERMANY", "GERMANY", ...]
total s64 [605, 1451, 2583, 206521, 206943, ...]
solid_fuel s64 [0, 1, 145, 86226, 83574, ...]
liquid_fuel s64 [585, 1362, 2207, 70373, 71983, ...]
gas_fuel s64 [0, 0, 0, 45658, 47408, ...]
cement s64 [20, 88, 231, 4258, 3972, ...]
gas_flaring s64 [0, 0, 0, 5, 6, ...]
per_capita f64 [1.53, 0.23, 0.32, 2.56, 2.57, ...]
bunker_fuels s64 [120, 22, 57, 8982, 9078, ...]
>

"""
Expand Down
3 changes: 1 addition & 2 deletions lib/explorer/polars_backend/lazy_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do

@impl true
def drop_nil(%DF{} = df, columns) do
exprs = for col <- columns, do: Native.expr_column(col)
Shared.apply_dataframe(df, df, :lf_drop_nils, [exprs])
Shared.apply_dataframe(df, df, :lf_drop_nils, [columns])
end

@impl true
Expand Down
2 changes: 1 addition & 1 deletion lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ defmodule Explorer.PolarsBackend.Native do
def lf_mutate_with(_df, _exprs), do: err()
def lf_summarise_with(_df, _groups, _stable_groups?, _aggs), do: err()
def lf_rename_columns(_df, _column_pairs), do: err()
def lf_drop_nils(_df, _column_pairs), do: err()
def lf_drop_nils(_df, _columns), do: err()
def lf_pivot_longer(_df, _id_vars, _value_vars, _names_to, _values_to), do: err()
def lf_join(_df, _other, _left_on, _right_on, _how, _suffix), do: err()

Expand Down
65 changes: 37 additions & 28 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1577,28 +1577,28 @@ defmodule Explorer.Series do
iex> Explorer.Series.sample(s, 10, seed: 100)
#Explorer.Series<
Polars[10]
s64 [57, 9, 54, 62, 50, 77, 35, 88, 1, 69]
s64 [80, 95, 78, 33, 84, 100, 23, 58, 21, 30]
>

iex> s = 1..100 |> Enum.to_list() |> Explorer.Series.from_list()
iex> Explorer.Series.sample(s, 0.05, seed: 100)
#Explorer.Series<
Polars[5]
s64 [9, 56, 79, 28, 54]
s64 [85, 89, 82, 35, 88]
>

iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list()
iex> Explorer.Series.sample(s, 7, seed: 100, replace: true)
#Explorer.Series<
Polars[7]
s64 [4, 1, 3, 4, 3, 4, 2]
s64 [5, 5, 5, 2, 5, 2, 2]
>

iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list()
iex> Explorer.Series.sample(s, 1.2, seed: 100, replace: true)
#Explorer.Series<
Polars[6]
s64 [4, 1, 3, 4, 3, 4]
s64 [5, 5, 5, 2, 5, 2]
>

iex> s = 0..9 |> Enum.to_list() |> Explorer.Series.from_list()
Expand All @@ -1612,7 +1612,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.sample(s, 1.0, seed: 100, shuffle: true)
#Explorer.Series<
Polars[10]
s64 [7, 9, 2, 0, 4, 1, 3, 8, 5, 6]
s64 [3, 7, 8, 0, 5, 1, 2, 6, 4, 9]
>

"""
Expand Down Expand Up @@ -1655,7 +1655,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.shuffle(s, seed: 100)
#Explorer.Series<
Polars[10]
s64 [8, 10, 3, 1, 5, 2, 4, 9, 6, 7]
s64 [4, 8, 9, 1, 6, 2, 3, 7, 5, 10]
>

"""
Expand Down Expand Up @@ -2055,7 +2055,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.rank(s, method: :random, seed: 42)
#Explorer.Series<
Polars[5]
s64 [3, 4, 2, 1, 5]
s64 [3, 5, 1, 2, 4]
>
"""
@doc type: :element_wise
Expand Down Expand Up @@ -5116,7 +5116,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.window_sum(s, 2, weights: [1.0, 2.0])
#Explorer.Series<
Polars[10]
f64 [1.0, 5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0]
f64 [2.0, 5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0]
>
"""
@doc type: :window
Expand Down Expand Up @@ -5149,7 +5149,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.window_mean(s, 2, weights: [0.25, 0.75])
#Explorer.Series<
Polars[10]
f64 [0.25, 1.75, 2.75, 3.75, 4.75, 5.75, 6.75, 7.75, 8.75, 9.75]
f64 [1.0, 1.75, 2.75, 3.75, 4.75, 5.75, 6.75, 7.75, 8.75, 9.75]
>

iex> s = 1..10 |> Enum.to_list() |> Explorer.Series.from_list()
Expand Down Expand Up @@ -5229,7 +5229,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.window_min(s, 2, weights: [1.0, 2.0])
#Explorer.Series<
Polars[10]
f64 [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
f64 [2.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
>
"""
@doc type: :window
Expand Down Expand Up @@ -5262,7 +5262,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.window_max(s, 2, weights: [1.0, 2.0])
#Explorer.Series<
Polars[10]
f64 [1.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0]
f64 [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0]
>
"""
@doc type: :window
Expand Down Expand Up @@ -5295,7 +5295,7 @@ defmodule Explorer.Series do
iex> Explorer.Series.window_standard_deviation(s, 2, weights: [0.25, 0.75])
#Explorer.Series<
Polars[6]
f64 [0.4330127018922193, 0.4330127018922193, 0.4330127018922193, 0.4330127018922193, 0.4330127018922193, 0.4330127018922193]
f64 [0.0, 0.4330127018922193, 0.4330127018922193, 0.4330127018922193, 0.4330127018922193, 0.4330127018922193]
>
"""
@doc type: :window
Expand Down Expand Up @@ -5330,21 +5330,39 @@ defmodule Explorer.Series do
iex> Explorer.Series.ewm_mean(s)
#Explorer.Series<
Polars[5]
f64 [1.0, 1.6666666666666667, 2.4285714285714284, 3.2666666666666666, 4.161290322580645]
f64 [1.0, 1.6666666666666665, 2.4285714285714284, 3.2666666666666666, 4.161290322580645]
>

iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list()
iex> Explorer.Series.ewm_mean(s, alpha: 0.1)
#Explorer.Series<
Polars[5]
f64 [1.0, 1.5263157894736843, 2.070110701107011, 2.6312881651642916, 3.2097140484969833]
f64 [1.0, 1.526315789473684, 2.0701107011070112, 2.631288165164292, 3.209714048496984]
>
"""
@doc type: :window
def ewm_mean(series, opts \\ []) do
opts = Keyword.validate!(opts, alpha: 0.5, adjust: true, min_periods: 1, ignore_nils: true)

apply_series(series, :ewm_mean, [
float_series =
case dtype(series) do
:f32 ->
series

:f64 ->
series

_ ->
try do
cast(series, :f64)
rescue
_ ->
raise ArgumentError,
"must pass float-compatible series, found dtype #{series.dtype}"
end
end

apply_series(float_series, :ewm_mean, [
opts[:alpha],
opts[:adjust],
opts[:min_periods],
Expand Down Expand Up @@ -5377,14 +5395,14 @@ defmodule Explorer.Series do
iex> Explorer.Series.ewm_standard_deviation(s)
#Explorer.Series<
Polars[5]
f64 [0.0, 0.7071067811865476, 0.9636241116594314, 1.1771636613972951, 1.3452425132127066]
f64 [0.0, 0.7071067811865476, 0.9636241116594315, 1.1771636613972953, 1.3452425132127066]
>

iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list()
iex> Explorer.Series.ewm_standard_deviation(s, alpha: 0.1)
#Explorer.Series<
Polars[5]
f64 [0.0, 0.7071067811865476, 0.9990770648702808, 1.2879021599718157, 1.5741638698820746]
f64 [0.0, 0.7071067811865476, 0.999077064870281, 1.2879021599718157, 1.5741638698820741]
>
"""
@doc type: :window
Expand Down Expand Up @@ -5432,14 +5450,14 @@ defmodule Explorer.Series do
iex> Explorer.Series.ewm_variance(s)
#Explorer.Series<
Polars[5]
f64 [0.0, 0.5, 0.9285714285714284, 1.385714285714286, 1.8096774193548393]
f64 [0.0, 0.5, 0.9285714285714286, 1.3857142857142861, 1.8096774193548393]
>

iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list()
iex> Explorer.Series.ewm_variance(s, alpha: 0.1)
#Explorer.Series<
Polars[5]
f64 [0.0, 0.5, 0.9981549815498153, 1.6586919736600685, 2.4779918892421087]
f64 [0.0, 0.5000000000000001, 0.9981549815498157, 1.658691973660068, 2.477991889242108]
>
"""
@doc type: :window
Expand Down Expand Up @@ -6932,15 +6950,6 @@ defmodule Explorer.Series do
Polars[1]
struct[1] [%{"a" => 1}]
>

If the decoded value does not match the given `dtype`,
an error is raised:

iex> s = Series.from_list(["\\"1\\""])
iex> Series.json_decode(s, {:s, 64})
** (RuntimeError) Polars Error: error deserializing JSON: error deserializing value \"String(\"1\")\" as numeric. \\\n Try increasing `infer_schema_length` or specifying a schema.\n

It raises an exception if the string is invalid JSON.
"""
@doc type: :string_wise
@spec json_decode(Series.t(), dtype() | dtype_alias()) :: Series.t()
Expand Down
Loading
Loading