diff --git a/docs/api/data/data_index.md b/docs/api/data/data_index.md index bcf5a846..ec51498c 100644 --- a/docs/api/data/data_index.md +++ b/docs/api/data/data_index.md @@ -141,9 +141,6 @@ The rest of this page contains reference information for the components of the D | | | - [coordinates.StandardCoordinateNames](data_api.md#pyearthtools.data.transforms.coordinates.StandardCoordinateNames) | | | | - [coordinates.Select](data_api.md#pyearthtools.data.transforms.coordinates.Select) | | | | - [coordinates.Drop](data_api.md#pyearthtools.data.transforms.coordinates.Drop) | -| | | - [coordinates.Flatten](data_api.md#pyearthtools.data.transforms.coordinates.Flatten) | -| | | - [coordinates.Expand](data_api.md#pyearthtools.data.transforms.coordinates.Expand) | -| | | - [coordinates.SelectFlatten](data_api.md#pyearthtools.data.transforms.coordinates.SelectFlatten) | | | | - [coordinates.Assign](data_api.md#pyearthtools.data.transforms.coordinates.Assign) | | | | - [coordinates.Pad](data_api.md#pyearthtools.data.transforms.coordinates.Pad) | | | | - [default.get_default_transforms](data_api.md#pyearthtools.data.transforms.default.get_default_transforms) | diff --git a/docs/api/pipeline/pipeline_index.md b/docs/api/pipeline/pipeline_index.md index 21cb5511..5dc215d0 100644 --- a/docs/api/pipeline/pipeline_index.md +++ b/docs/api/pipeline/pipeline_index.md @@ -4,154 +4,155 @@ This is the Pipeline package which forms a part of the [PyEarthTools package](ht The rest of this page contains reference information for the components of the Pipeline package. The Pipeline API docs can be viewed at [Pipeline API Docs](pipeline_api.md). -| Module | Purpose | API Docs | -|----------------------|--------------------------------------|----------------| -| `pipeline` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.Sampler) | -| | | - [Pipeline](pipeline_api.md#pyearthtools.pipeline.Pipeline) | -| | | - [Operation](pipeline_api.md#pyearthtools.pipeline.Operation) | -| | | - [PipelineException](pipeline_api.md#pyearthtools.pipeline.PipelineException) | -| | | - [PipelineFilterException](pipeline_api.md#pyearthtools.pipeline.PipelineFilterException) | -| | | - [PipelineRuntimeError](pipeline_api.md#pyearthtools.pipeline.PipelineRuntimeError) | -| | | - [PipelineTypeError](pipeline_api.md#pyearthtools.pipeline.PipelineTypeError) | -| `pipeline.branching` | | - [PipelineBranchPoint](pipeline_api.md#pyearthtools.pipeline.branching.PipelineBranchPoint) | -| | | - [Unifier](pipeline_api.md#pyearthtools.pipeline.branching.Unifier) | -| | | - [Joiner](pipeline_api.md#pyearthtools.pipeline.branching.Joiner) | -| | | - [Spliter](pipeline_api.md#pyearthtools.pipeline.branching.Spliter) | -| `pipeline.filters` | | - [Filter](pipeline_api.md#pyearthtools.pipeline.filters.Filter) | -| | | - [FilterCheck](pipeline_api.md#pyearthtools.pipeline.filters.FilterCheck) | -| | | - [FilterWarningContext](pipeline_api.md#pyearthtools.pipeline.filters.FilterWarningContext) | -| | | - [TypeFilter](pipeline_api.md#pyearthtools.pipeline.filters.TypeFilter) | -| `pipeline.iterators` | | - [Iterator](pipeline_api.md#pyearthtools.pipeline.iterators.Iterator) | -| | | - [Range](pipeline_api.md#pyearthtools.pipeline.iterators.Range) | -| | | - [Predefined](pipeline_api.md#pyearthtools.pipeline.iterators.Predefined) | -| | | - [File](pipeline_api.md#pyearthtools.pipeline.iterators.File) | -| | | - [DateRange](pipeline_api.md#pyearthtools.pipeline.iterators.DateRange) | -| | | - [DateRangeLimit](pipeline_api.md#pyearthtools.pipeline.iterators.DateRangeLimit) | -| | | - [Randomise](pipeline_api.md#pyearthtools.pipeline.iterators.Randomise) | -| | | - [SuperIterator](pipeline_api.md#pyearthtools.pipeline.iterators.SuperIterator) | -| | | - [IterateResults](pipeline_api.md#pyearthtools.pipeline.iterators.IterateResults) | -| `pipeline.modifications` | | - [Cache](pipeline_api.md#pyearthtools.pipeline.modifications.Cache) | -| | | - [StaticCache](pipeline_api.md#pyearthtools.pipeline.modifications.StaticCache) | -| | | - [MemCache](pipeline_api.md#pyearthtools.pipeline.modifications.MemCache) | -| | | - [IdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.IdxModifier) | -| | | - [IdxOverride](pipeline_api.md#pyearthtools.pipeline.modifications.IdxOverride) | -| | | - [TimeIdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.TimeIdxModifier) | -| | | - [SequenceRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.SequenceRetrieval) | -| | | - [TemporalRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalRetrieval) | -| | | - [idx_modification](pipeline_api.md#pyearthtools.pipeline.modifications.idx_modification) | -| `pipeline.operations` | | - [Transforms](pipeline_api.md#pyearthtools.pipeline.operations.Transforms) | -| `pipeline.operations.xarray` | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Compute) | -| | | - [Merge](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Merge) | -| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Concatenate) | -| | | - [Sort](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Sort) | -| | | - [Chunk](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Chunk) | -| | | - [RecodeCalendar](pipeline_api.md#pyearthtools.pipeline.operations.xarray.RecodeCalendar) | -| | | - [AlignDates](pipeline_api.md#pyearthtools.pipeline.operations.xarray.AlignDates) | -| `pipeline.operations.xarray.conversion` | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToNumpy) | -| | | - [ToDask](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToDask) | -| `pipeline.operations.xarray.filters` | | - [XarrayFilter](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.XarrayFilter) | -| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAnyNan) | -| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAllNan) | -| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropValue) | -| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.Shape) | -| `pipeline.operations.xarray.reshape` | | - [Dimensions](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.Dimensions) | -| | | - [CoordinateFlatten](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten) | -| `pipeline.operations.xarray.select` | | - [SelectDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SelectDataset) | -| | | - [DropDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.DropDataset) | -| | | - [SliceDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SliceDataset) | -| `pipeline.operations.xarray.split` | | - [OnVariables](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnVariables) | -| | | - [OnCoordinate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnCoordinate) | -| `pipeline.operations.xarray.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.FillNan) | -| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.MaskValue) | -| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.ForceNormalised) | -| | | - [Derive](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.Derive) | -| `pipeline.operations.xarray.metadata` | | - [Rename](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Rename) | -| | | - [Encoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Encoding) | -| | | - [MaintainEncoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainEncoding) | -| | | - [Attributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Attributes) | -| | | - [MaintainAttributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainAttributes) | -| `pipeline.operations.xarray.normalisation` | | - [xarrayNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.xarrayNormalisation) | -| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Anomaly) | -| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Deviation) | -| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Division) | -| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Evaluated) | -| `pipeline.operations.xarray.remapping` | | - [HEALPix](pipeline_api.md#pyearthtools.pipeline.operations.xarray.remapping.HEALPix) | -| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | -| | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | -| | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | -| | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | -| | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | -| | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | -| | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | -| `pipeline.operations.dask` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.dask.Stack) | -| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.dask.Concatenate) | -| | | - [VStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.VStack) | -| | | - [HStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.HStack) | -| | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.dask.Compute) | -| `pipeline.operations.dask.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Rotate) | -| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Flip) | -| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Transform) | -| `pipeline.operations.dask.filters` | | - [daskFilter](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.daskFilter) | -| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAnyNan) | -| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAllNan) | -| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropValue) | -| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.Shape) | -| `pipeline.operations.dask.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Rearrange) | -| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Squeeze) | -| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flattener) | -| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flatten) | -| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.SwapAxis) | -| `pipeline.operations.dask.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Select) | -| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Slice) | -| `pipeline.operations.dask.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnAxis) | -| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnSlice) | -| `pipeline.operations.dask.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.FillNan) | -| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.MaskValue) | -| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.ForceNormalised) | -| `pipeline.operations.dask.normalisation` | | - [daskNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.daskNormalisation) | -| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Anomaly) | -| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Deviation) | -| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Division) | -| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Evaluated) | -| `pipeline.operations.dask.conversion` | | - [ToXarray](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | -| | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | -| `pipeline.operations.numpy` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Stack) | -| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Concatenate) | -| `pipeline.operations.numpy.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Rotate) | -| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Flip) | -| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Transform) | -| `pipeline.operations.numpy.filters` | | - [NumpyFilter](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.NumpyFilter) | -| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAnyNan) | -| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAllNan) | -| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropValue) | -| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.Shape) | -| `pipeline.operations.numpy.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Rearrange) | -| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | -| | | - [Expand](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Expand) | -| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | -| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flattener) | -| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flatten) | -| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.SwapAxis) | -| `pipeline.operations.numpy.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Select) | -| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Slice) | -| `pipeline.operations.numpy.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnAxis) | -| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnSlice) | -| | | - [VSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.VSplit) | -| | | - [HSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.HSplit) | -| `pipeline.operations.numpy.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.FillNan) | -| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.MaskValue) | -| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.ForceNormalised) | -| `pipeline.operations.numpy.normalisation` | | - [numpyNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.numpyNormalisation) | -| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Anomaly) | -| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Deviation) | -| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Division) | -| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Evaluated) | -| `pipeline.operations.transform` | | - [TimeOfYear](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.TimeOfYear) | -| | | - [AddCoordinates](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.AddCoordinates) | -| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | -| `pipeline.samplers` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | -| `pipeline.samplers` | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | -| `pipeline.samplers` | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | -| `pipeline.samplers` | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | -| `pipeline.samplers` | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | -| `pipeline.samplers` | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | +| Module | Purpose | API Docs | +|----------------------|--------------------------------------|--------------------------------------------------------------------------------------------------------------------| +| `pipeline` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.Sampler) | +| | | - [Pipeline](pipeline_api.md#pyearthtools.pipeline.Pipeline) | +| | | - [Operation](pipeline_api.md#pyearthtools.pipeline.Operation) | +| | | - [PipelineException](pipeline_api.md#pyearthtools.pipeline.PipelineException) | +| | | - [PipelineFilterException](pipeline_api.md#pyearthtools.pipeline.PipelineFilterException) | +| | | - [PipelineRuntimeError](pipeline_api.md#pyearthtools.pipeline.PipelineRuntimeError) | +| | | - [PipelineTypeError](pipeline_api.md#pyearthtools.pipeline.PipelineTypeError) | +| `pipeline.branching` | | - [PipelineBranchPoint](pipeline_api.md#pyearthtools.pipeline.branching.PipelineBranchPoint) | +| | | - [Unifier](pipeline_api.md#pyearthtools.pipeline.branching.Unifier) | +| | | - [Joiner](pipeline_api.md#pyearthtools.pipeline.branching.Joiner) | +| | | - [Spliter](pipeline_api.md#pyearthtools.pipeline.branching.Spliter) | +| `pipeline.filters` | | - [Filter](pipeline_api.md#pyearthtools.pipeline.filters.Filter) | +| | | - [FilterCheck](pipeline_api.md#pyearthtools.pipeline.filters.FilterCheck) | +| | | - [FilterWarningContext](pipeline_api.md#pyearthtools.pipeline.filters.FilterWarningContext) | +| | | - [TypeFilter](pipeline_api.md#pyearthtools.pipeline.filters.TypeFilter) | +| `pipeline.iterators` | | - [Iterator](pipeline_api.md#pyearthtools.pipeline.iterators.Iterator) | +| | | - [Range](pipeline_api.md#pyearthtools.pipeline.iterators.Range) | +| | | - [Predefined](pipeline_api.md#pyearthtools.pipeline.iterators.Predefined) | +| | | - [File](pipeline_api.md#pyearthtools.pipeline.iterators.File) | +| | | - [DateRange](pipeline_api.md#pyearthtools.pipeline.iterators.DateRange) | +| | | - [DateRangeLimit](pipeline_api.md#pyearthtools.pipeline.iterators.DateRangeLimit) | +| | | - [Randomise](pipeline_api.md#pyearthtools.pipeline.iterators.Randomise) | +| | | - [SuperIterator](pipeline_api.md#pyearthtools.pipeline.iterators.SuperIterator) | +| | | - [IterateResults](pipeline_api.md#pyearthtools.pipeline.iterators.IterateResults) | +| `pipeline.modifications` | | - [Cache](pipeline_api.md#pyearthtools.pipeline.modifications.Cache) | +| | | - [StaticCache](pipeline_api.md#pyearthtools.pipeline.modifications.StaticCache) | +| | | - [MemCache](pipeline_api.md#pyearthtools.pipeline.modifications.MemCache) | +| | | - [IdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.IdxModifier) | +| | | - [IdxOverride](pipeline_api.md#pyearthtools.pipeline.modifications.IdxOverride) | +| | | - [TimeIdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.TimeIdxModifier) | +| | | - [SequenceRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.SequenceRetrieval) | +| | | - [TemporalRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalRetrieval) | +| | | - [idx_modification](pipeline_api.md#pyearthtools.pipeline.modifications.idx_modification) | +| `pipeline.operations` | | - [Transforms](pipeline_api.md#pyearthtools.pipeline.operations.Transforms) | +| `pipeline.operations.xarray` | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Compute) | +| | | - [Merge](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Merge) | +| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Concatenate) | +| | | - [Sort](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Sort) | +| | | - [Chunk](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Chunk) | +| | | - [RecodeCalendar](pipeline_api.md#pyearthtools.pipeline.operations.xarray.RecodeCalendar) | +| | | - [AlignDates](pipeline_api.md#pyearthtools.pipeline.operations.xarray.AlignDates) | +| `pipeline.operations.xarray.conversion` | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToNumpy) | +| | | - [ToDask](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToDask) | +| `pipeline.operations.xarray.filters` | | - [XarrayFilter](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.XarrayFilter) | +| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAnyNan) | +| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAllNan) | +| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropValue) | +| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.Shape) | +| `pipeline.operations.xarray.reshape` | | - [Dimensions](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.Dimensions) | +| | | - [CoordinateFlatten](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten) | +| | | - [CoordinateExpand](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.CoordinateExpand) | +| `pipeline.operations.xarray.select` | | - [SelectDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SelectDataset) | +| | | - [DropDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.DropDataset) | +| | | - [SliceDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SliceDataset) | +| `pipeline.operations.xarray.split` | | - [OnVariables](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnVariables) | +| | | - [OnCoordinate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnCoordinate) | +| `pipeline.operations.xarray.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.FillNan) | +| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.MaskValue) | +| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.ForceNormalised) | +| | | - [Derive](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.Derive) | +| `pipeline.operations.xarray.metadata` | | - [Rename](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Rename) | +| | | - [Encoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Encoding) | +| | | - [MaintainEncoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainEncoding) | +| | | - [Attributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Attributes) | +| | | - [MaintainAttributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainAttributes) | +| `pipeline.operations.xarray.normalisation` | | - [xarrayNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.xarrayNormalisation) | +| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Anomaly) | +| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Deviation) | +| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Division) | +| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Evaluated) | +| `pipeline.operations.xarray.remapping` | | - [HEALPix](pipeline_api.md#pyearthtools.pipeline.operations.xarray.remapping.HEALPix) | +| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | +| | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | +| | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | +| | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | +| | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | +| | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | +| | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | +| `pipeline.operations.dask` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.dask.Stack) | +| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.dask.Concatenate) | +| | | - [VStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.VStack) | +| | | - [HStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.HStack) | +| | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.dask.Compute) | +| `pipeline.operations.dask.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Rotate) | +| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Flip) | +| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Transform) | +| `pipeline.operations.dask.filters` | | - [daskFilter](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.daskFilter) | +| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAnyNan) | +| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAllNan) | +| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropValue) | +| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.Shape) | +| `pipeline.operations.dask.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Rearrange) | +| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Squeeze) | +| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flattener) | +| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flatten) | +| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.SwapAxis) | +| `pipeline.operations.dask.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Select) | +| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Slice) | +| `pipeline.operations.dask.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnAxis) | +| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnSlice) | +| `pipeline.operations.dask.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.FillNan) | +| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.MaskValue) | +| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.ForceNormalised) | +| `pipeline.operations.dask.normalisation` | | - [daskNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.daskNormalisation) | +| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Anomaly) | +| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Deviation) | +| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Division) | +| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Evaluated) | +| `pipeline.operations.dask.conversion` | | - [ToXarray](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | +| | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | +| `pipeline.operations.numpy` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Stack) | +| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Concatenate) | +| `pipeline.operations.numpy.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Rotate) | +| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Flip) | +| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Transform) | +| `pipeline.operations.numpy.filters` | | - [NumpyFilter](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.NumpyFilter) | +| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAnyNan) | +| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAllNan) | +| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropValue) | +| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.Shape) | +| `pipeline.operations.numpy.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Rearrange) | +| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | +| | | - [Expand](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Expand) | +| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | +| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flattener) | +| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flatten) | +| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.SwapAxis) | +| `pipeline.operations.numpy.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Select) | +| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Slice) | +| `pipeline.operations.numpy.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnAxis) | +| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnSlice) | +| | | - [VSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.VSplit) | +| | | - [HSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.HSplit) | +| `pipeline.operations.numpy.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.FillNan) | +| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.MaskValue) | +| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.ForceNormalised) | +| `pipeline.operations.numpy.normalisation` | | - [numpyNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.numpyNormalisation) | +| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Anomaly) | +| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Deviation) | +| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Division) | +| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Evaluated) | +| `pipeline.operations.transform` | | - [TimeOfYear](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.TimeOfYear) | +| | | - [AddCoordinates](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.AddCoordinates) | +| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | +| `pipeline.samplers` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | +| `pipeline.samplers` | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | +| `pipeline.samplers` | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | +| `pipeline.samplers` | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | +| `pipeline.samplers` | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | +| `pipeline.samplers` | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | diff --git a/notebooks/tutorial/CNN-Model-Training.ipynb b/notebooks/tutorial/CNN-Model-Training.ipynb index b279d56b..0dae655b 100644 --- a/notebooks/tutorial/CNN-Model-Training.ipynb +++ b/notebooks/tutorial/CNN-Model-Training.ipynb @@ -32,7 +32,80 @@ "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "A module that was compiled using NumPy 1.x cannot be run in\n", + "NumPy 2.0.1 as it may crash. To support both 1.x and 2.x\n", + "versions of NumPy, modules must be compiled with NumPy 2.0.\n", + "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", + "\n", + "If you are a user of the module, the easiest solution will be to\n", + "downgrade to 'numpy<2' or try to upgrade the affected module.\n", + "We expect that some modules will need time to support NumPy 2.\n", + "\n", + "Traceback (most recent call last): File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel_launcher.py\", line 18, in \n", + " app.launch_new_instance()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n", + " app.start()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelapp.py\", line 739, in start\n", + " self.io_loop.start()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/tornado/platform/asyncio.py\", line 205, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/asyncio/base_events.py\", line 645, in run_forever\n", + " self._run_once()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/asyncio/base_events.py\", line 1999, in _run_once\n", + " handle._run()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/asyncio/events.py\", line 88, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n", + " await self.process_one()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n", + " await dispatch(*args)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n", + " await result\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n", + " await super().execute_request(stream, ident, parent)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n", + " reply_content = await reply_content\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n", + " res = shell.run_cell(\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3098, in run_cell\n", + " result = self._run_cell(\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3153, in _run_cell\n", + " result = runner(coro)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3365, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3610, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3670, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"/var/folders/1s/z56f8rw50755xx8fxp2969r477wmss/T/ipykernel_48096/575015278.py\", line 7, in \n", + " import torch\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/__init__.py\", line 1477, in \n", + " from .functional import * # noqa: F403\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/functional.py\", line 9, in \n", + " import torch.nn.functional as F\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/__init__.py\", line 1, in \n", + " from .modules import * # noqa: F403\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/modules/__init__.py\", line 35, in \n", + " from .transformer import TransformerEncoder, TransformerDecoder, \\\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/modules/transformer.py\", line 20, in \n", + " device: torch.device = torch.device(torch._C._get_default_device()), # torch.device('cpu'),\n", + "/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/modules/transformer.py:20: UserWarning: Failed to initialize NumPy: _ARRAY_API not found (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/tensor_numpy.cpp:84.)\n", + " device: torch.device = torch.device(torch._C._get_default_device()), # torch.device('cpu'),\n" + ] + } + ], "source": [ "import sys\n", "from pathlib import Path\n", @@ -130,6 +203,7 @@ "metadata": {}, "outputs": [ { + "data": { "text/html": [ "
\n", @@ -625,6 +699,7 @@ }, "metadata": {}, "output_type": "display_data" + } ], "source": [ @@ -5976,7 +6051,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.9" }, "nbsphinx": { "orphan": true diff --git a/notebooks/tutorial/HimawariAllBands.ipynb b/notebooks/tutorial/HimawariAllBands.ipynb index 8d0961cc..c4c3c64b 100644 --- a/notebooks/tutorial/HimawariAllBands.ipynb +++ b/notebooks/tutorial/HimawariAllBands.ipynb @@ -916,7 +916,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" }, "nbsphinx": { "orphan": true diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index ad4b78e6..5e4fdf3d 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -24,8 +24,7 @@ import pandas as pd -from pyearthtools.data.transforms.transform import Transform, TransformCollection -from pyearthtools.data.transforms.attributes import SetType +from pyearthtools.data.transforms.transform import Transform from pyearthtools.data.warnings import pyearthtoolsDataWarning from pyearthtools.data.exceptions import DataNotFoundError @@ -396,199 +395,6 @@ def apply(self, dataset: xr.Dataset) -> xr.Dataset: return dataset -def weak_cast_to_int(value): - """ - Basically, turns integer floats to int types, otherwise - does nothing. - """ - try: - if int(value) == value: - value = int(value) - except Exception: - pass - return value - - -class Flatten(Transform): - """Operation to flatten a coordinate in a dataset, putting the data at each value of the coordinate into a separate - data variable.""" - - def __init__( - self, coordinate: Hashable | list[Hashable] | tuple[Hashable], *extra_coordinates, skip_missing: bool = False - ): - """ - - Flatten a coordinate in an xarray Dataset, putting the data at each value of the coordinate into a separate - data variable. - - The output data variables will be named "". For example, if the input - Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values - [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. - - If more than one coordinate is flattened, the output data variable names will concatenate the values of each - coordinate. - - Args: - coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): - Coordinates to flatten, either str or list of candidates. - *extra_coordinates (optional): - Arguments form of `coordinate`. - skip_missing (bool, optional): - Whether to skip data that does not have any of the listed coordinates. If True, will return such data - unchanged. Defaults to False. - - Raises: - ValueError: - If invalid number of coordinates found - - """ - super().__init__() - self.record_initialisation() - - coordinate = coordinate if isinstance(coordinate, (list, tuple)) else [coordinate] - coordinate = [*coordinate, *extra_coordinates] - - self._coordinate = coordinate - self._skip_missing = skip_missing - - # @property - # def _info_(self): - # return dict(coordinate=self._coordinate, skip_missing=self._skip_missing) - - def apply(self, dataset: xr.Dataset) -> xr.Dataset: - discovered_coord = list(set(self._coordinate).intersection(set(dataset.coords))) - - if len(discovered_coord) == 0: - if self._skip_missing: - return dataset - - raise ValueError( - f"{self._coordinate} could not be found in dataset with coordinates {list(dataset.coords)}.\n" - "Set 'skip_missing' to True to skip this." - ) - - elif len(discovered_coord) > 1: - transforms = TransformCollection(*[Flatten(coord) for coord in discovered_coord]) - return transforms(dataset) - - discovered_coord = str(discovered_coord[0]) - - coords = dataset.coords - new_ds = xr.Dataset(coords={co: v for co, v in coords.items() if not co == discovered_coord}) - new_ds.attrs.update( - {f"{discovered_coord}-dtype": str(dataset[discovered_coord].encoding.get("dtype", "int32"))} - ) - - for var in dataset: - if discovered_coord not in dataset[var].coords: - new_ds[var] = dataset[var] - continue - - coord_size = dataset[var][discovered_coord].values - coord_size = coord_size if isinstance(coord_size, np.ndarray) else np.array(coord_size) - - if coord_size.size == 1 and False: - coord_val = weak_cast_to_int(dataset[var][discovered_coord].values) - new_ds[f"{var}{coord_val}"] = Drop(discovered_coord, ignore_missing=True)(dataset[var]) - - else: - for coord_val in dataset[discovered_coord]: - coord_val = weak_cast_to_int(coord_val.values.item()) - - selected = dataset[var].sel(**{discovered_coord: coord_val}) # type: ignore - selected = selected.drop_vars(discovered_coord) # type: ignore - selected.attrs.update(**{discovered_coord: coord_val}) - - new_ds[f"{var}{coord_val}"] = selected - return new_ds - - -class Expand(Transform): - """Inverse operation to `Flatten`""" - - def __init__(self, coordinate: Hashable | list[Hashable] | tuple[Hashable], *extra_coordinates): - """ - Inverse operation to [flatten][pyearthtools.data.transforms.coordinate.Flatten] - - Will find flattened variables and regroup them upon the extra coordinate - - Args: - coordinate (Hashable | list[Hashable] | tuple[Hashable]): - Coordinate to unflatten. - *extra_coordinates (optional): - Argument form of `coordinate`. - """ - super().__init__() - self.record_initialisation() - - if not isinstance(coordinate, (list, tuple)): - coordinate = (coordinate,) - - coordinate = (*coordinate, *extra_coordinates) - self._coordinate = coordinate - - # @property - # def _info_(self): - # return dict(coordinate=self._coordinate) - - def apply(self, dataset: xr.Dataset) -> xr.Dataset | xr.DataArray: - dataset = type(dataset)(dataset) - - for coord in self._coordinate: - dtype = dataset.attrs.get(f"{coord}-dtype", "int32") - components = [] - for var in list(dataset.data_vars): - var_data = dataset[var] - if coord in var_data.attrs: - value = var_data.attrs.pop(coord) - var_data = ( - var_data.to_dataset(name=var.replace(str(value), "")) - .assign_coords(**{coord: [value]}) - .set_coords(coord) - ) - components.append(var_data) - - dataset = xr.combine_by_coords(components) # type: ignore - dataset = SetType(**{str(coord): dtype})(dataset) - - ## Add stored encoding if there - if f"{coord}-dtype" in dataset.attrs: - dtype = dataset.attrs.pop(f"{coord}-dtype") - dataset[coord].encoding.update(dtype=dtype) - - return dataset - - -def SelectFlatten( - coordinates: dict[str, tuple[Any] | Any] | None = None, - tolerance: float = 0.01, - **extra_coordinates, -) -> TransformCollection: - """ - Select upon coordinates, and flatten said coordinate - - Args: - coordinates (dict[str, tuple[Any] | Any] | None, optional): - Coordinates and values to select. - Must be coordinate in data Defaults to None. - tolerance (float, optional): - tolerance of selection. Defaults to 0.01. - - Returns: - (TransformCollection): - TransformCollection to select and Flatten - """ - - if coordinates is None: - coordinates = {} - coordinates.update(extra_coordinates) - - select_trans = Select(coordinates, ignore_missing=True, tolerance=tolerance) - flatten_trans = Flatten(list(coordinates.keys())) - - return select_trans + flatten_trans - - class Assign(Transform): """Assign coordinates to object""" diff --git a/packages/data/tests/data/transforms/test_data_coordinates.py b/packages/data/tests/data/transforms/test_data_coordinates.py deleted file mode 100644 index ee01f7ff..00000000 --- a/packages/data/tests/data/transforms/test_data_coordinates.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pyearthtools.data.transforms import coordinates -import xarray as xr -import pytest - -SIMPLE_DA1 = xr.DataArray( - [ - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - [ - [1.9, 1.0, 1.5], - [1.7, 2.4, 1.1], - [1.4, 1.5, 3.3], - ], - ], - coords=[[10, 20], [0, 1, 2], [5, 6, 7]], - dims=["height", "lat", "lon"], -) - -SIMPLE_DA2 = xr.DataArray( - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - coords=[[0, 1, 2], [5, 6, 7]], - dims=["lat", "lon"], -) - -SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) -SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) - -COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) - - -def test_Flatten(): - f = coordinates.Flatten(["height"]) - output = f.apply(SIMPLE_DS2) - variables = list(output.keys()) - for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: - assert vbl in variables - - -def test_Flatten_2_coords(): - f = coordinates.Flatten(["height", "lon"]) - output = f.apply(SIMPLE_DS1) - variables = list(output.keys()) - # Note that it's hard to predict which coordinate will be processed first. - try: - for vbl in [ - "Temperature510", - "Temperature520", - "Temperature610", - "Temperature620", - "Temperature710", - "Temperature720", - ]: - assert vbl in variables - except AssertionError: - for vbl in [ - "Temperature105", - "Temperature205", - "Temperature106", - "Temperature206", - "Temperature107", - "Temperature207", - ]: - assert vbl in variables - - -def test_Flatten_complicated_dataset(): - """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" - f = coordinates.Flatten(["height"]) - output = f.apply(COMPLICATED_DS1) - variables = list(output.keys()) - for vbl in ["Temperature10", "Temperature20", "MSLP"]: - assert vbl in variables - - -def test_Flatten_skip_missing(): - f = coordinates.Flatten(["scrupulosity"]) - with pytest.raises(ValueError): - f.apply(SIMPLE_DS1) - f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) - output2 = f2.apply(SIMPLE_DS1) - assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." diff --git a/packages/data/tests/transform/test_transform_coordinates.py b/packages/data/tests/transform/test_transform_coordinates.py index 49a83d6a..56a3cfbd 100644 --- a/packages/data/tests/transform/test_transform_coordinates.py +++ b/packages/data/tests/transform/test_transform_coordinates.py @@ -83,37 +83,6 @@ def test_Drop(ds_vertical): assert "vertical" not in _result.variables -def test_Flatten(ds_vertical): - - tf_flatten = coordinates.Flatten("vertical") - _result = tf_flatten.apply(ds_vertical) - assert set(_result.variables.keys()) == {"longitude", "temperature0", "temperature1", "temperature2"} - assert _result.sel(longitude=0).temperature1.values == ds_vertical.sel(longitude=0, vertical=1).temperature.values - - -def test_Expand(ds_vertical): - tf_flatten = coordinates.Flatten("vertical") - flattened = tf_flatten.apply(ds_vertical) - - tf_expand = coordinates.Expand("vertical") - _result = tf_expand.apply(flattened) - assert set(_result.variables.keys()) == {"longitude", "temperature", "vertical"} - assert ( - _result.sel(longitude=0, vertical=1).temperature.values - == ds_vertical.sel(longitude=0, vertical=1).temperature.values - ) - - -def test_SelectFlatten(ds_vertical): - tf_selectflatten = coordinates.SelectFlatten({"longitude": slice(10, 120)}) - - tf_selectflatten = coordinates.SelectFlatten({"longitude": slice(2, 3)}) - - _result = tf_selectflatten.apply(ds_vertical) - assert set(_result.variables.keys()) == {"vertical", "temperature2", "temperature3"} - assert _result.sel(vertical=2).temperature3.values == ds_vertical.sel(longitude=3, vertical=2).temperature.values - - def test_Assign(ds_vertical): tf_assign = coordinates.Assign({"longitude": list(range(0, 4)), "vertical": list(range(3, 6))}) @@ -138,13 +107,3 @@ def test_Assign(ds_vertical): # # with pytest.raises(AssertionError): # assert not np.isnan(_result.sel(longitude=-1, vertical=1).temperature.values) - - -def test_weak_cast_to_int(): - - wcti = coordinates.weak_cast_to_int - - assert wcti(5.0) == 5 - assert isinstance(wcti(5.0), int) - - assert wcti("hello") == "hello" diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index 93b53bde..bcab81a8 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -16,10 +16,15 @@ from typing import Hashable, TypeVar, Union import xarray as xr +import numpy as np import pyearthtools.data +from pyearthtools.data.transforms.coordinates import Drop from pyearthtools.pipeline.operation import Operation +from pyearthtools.data.transforms.attributes import SetType +from pyearthtools.utils.decorators import BackwardsCompatibility + T = TypeVar("T", xr.Dataset, xr.DataArray) @@ -44,7 +49,7 @@ def __init__( Args: dimensions (Union[str, list[str]]): - Specified order of dimensions to tranpose dataset to + Specified order of dimensions to transpose dataset to append (bool, optional): Append extra dims, if false, prepend dims. Defaults to True. preserve_order (bool, optional): @@ -88,20 +93,43 @@ def undo_func(self, sample: T) -> T: return sample +def weak_cast_to_int(value): + """ + Basically, turns integer floats to int types, otherwise + does nothing. Used in CoordinateFlatten. + """ + try: + if int(value) == value: + value = int(value) + except Exception: + pass + return value + + class CoordinateFlatten(Operation): - """Flatten and Expand on a coordinate""" + """Flatten a coordinate in a dataset into separate variables.""" _override_interface = "Serial" - def __init__(self, coordinate: Union[Hashable, list[Hashable]], *coords: Hashable, skip_missing: bool = False): + def __init__(self, coordinate: Hashable, skip_missing: bool = False): """ - Flatten and expand on coordinate/s + Flatten a coordinate in an xarray Dataset, putting the data at each value of the coordinate into a separate + data variable. + + The output data variables will be named "". For example, if the input + Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values + [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. Args: - coordinate (Union[Hashable,list[Hashable]]): + coordinate (Hashable): Coordinate to flatten and expand on. skip_missing (bool, optional): - Whether to skip data without the dims. Defaults to False + Whether to skip data that does not have any of the listed coordinates. If True, will return such data + unchanged. Defaults to False. + + Raises: + ValueError: + If coordinate not found in the dataset and skip_missing==False. """ super().__init__( split_tuples=True, @@ -110,12 +138,111 @@ def __init__(self, coordinate: Union[Hashable, list[Hashable]], *coords: Hashabl ) self.record_initialisation() - coordinate = [coordinate, *coords] if not isinstance(coordinate, (list, tuple)) else [*coordinate, *coords] - self.coords = coordinate + self._coordinate = coordinate self._skip_missing = skip_missing - def apply_func(self, ds): - return pyearthtools.data.transforms.coordinates.Flatten(self.coords, skip_missing=self._skip_missing)(ds) + def apply_func(self, dataset: xr.Dataset) -> xr.Dataset: + discovered_coord = list(set(self._coordinate).intersection(set(dataset.coords))) + + if len(discovered_coord) == 0: + if self._skip_missing: + return dataset + + raise ValueError( + f"{self._coordinate} could not be found in dataset with coordinates {list(dataset.coords)}.\n" + "Set 'skip_missing' to True to skip this." + ) + + discovered_coord = str(discovered_coord[0]) + + coords = dataset.coords + new_ds = xr.Dataset(coords={co: v for co, v in coords.items() if not co == discovered_coord}) + new_ds.attrs.update( + {f"{discovered_coord}-dtype": str(dataset[discovered_coord].encoding.get("dtype", "int32"))} + ) + + for var in dataset: + if discovered_coord not in dataset[var].coords: + new_ds[var] = dataset[var] + continue + + coord_size = dataset[var][discovered_coord].values + coord_size = coord_size if isinstance(coord_size, np.ndarray) else np.array(coord_size) + + if coord_size.size == 1 and False: + coord_val = weak_cast_to_int(dataset[var][discovered_coord].values) + new_ds[f"{var}{coord_val}"] = Drop(discovered_coord, ignore_missing=True)(dataset[var]) + + else: + for coord_val in dataset[discovered_coord]: + coord_val = weak_cast_to_int(coord_val.values.item()) + + selected = dataset[var].sel(**{discovered_coord: coord_val}) # type: ignore + selected = selected.drop_vars(discovered_coord) # type: ignore + selected.attrs.update(**{discovered_coord: coord_val}) + + new_ds[f"{var}{coord_val}"] = selected + return new_ds + + def undo_func(self, ds): + return pyearthtools.pipeline.operations.xarray.reshape.coordinate_expand(self._coordinate)(ds) + + +@BackwardsCompatibility(CoordinateFlatten) +def coordinate_flatten(*args, **kwargs) -> Operation: ... + + +class CoordinateExpand(Operation): + """Inverse operation to `CoordinateFlatten`""" + + def __init__(self, coordinate: Hashable): + """ + Inverse operation to [flatten][pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten] + + Will find flattened variables and regroup them upon the extra coordinate + + Args: + coordinate (Hashable): + Coordinate to unflatten. + """ + super().__init__() + self.record_initialisation() + + if not isinstance(coordinate, (list, tuple)): + coordinate = (coordinate,) + + self._coordinate = coordinate + + def apply_func(self, dataset: xr.Dataset) -> xr.Dataset | xr.DataArray: + dataset = type(dataset)(dataset) + + for coord in self._coordinate: + dtype = dataset.attrs.get(f"{coord}-dtype", "int32") + components = [] + for var in list(dataset.data_vars): + var_data = dataset[var] + if coord in var_data.attrs: + value = var_data.attrs.pop(coord) + var_data = ( + var_data.to_dataset(name=var.replace(str(value), "")) + .assign_coords(**{coord: [value]}) + .set_coords(coord) + ) + components.append(var_data) + + dataset = xr.combine_by_coords(components) # type: ignore + dataset = SetType(**{str(coord): dtype})(dataset) + + ## Add stored encoding if there + if f"{coord}-dtype" in dataset.attrs: + dtype = dataset.attrs.pop(f"{coord}-dtype") + dataset[coord].encoding.update(dtype=dtype) + + return dataset def undo_func(self, ds): - return pyearthtools.data.transforms.coordinates.Expand(self.coords)(ds) + return pyearthtools.pipeline.operations.xarray.reshape.coordinate_flatten(self._coordinate)(ds) + + +@BackwardsCompatibility(CoordinateExpand) +def coordinate_expand(*args, **kwargs) -> Operation: ... diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py new file mode 100644 index 00000000..48dd8d57 --- /dev/null +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -0,0 +1,132 @@ +# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyearthtools.pipeline.operations.xarray import reshape + +import xarray as xr +import pytest + +SIMPLE_DA1 = xr.DataArray( + [ + [ + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], + ], + [ + [1.9, 1.0, 1.5], + [1.7, 2.4, 1.1], + [1.4, 1.5, 3.3], + ], + ], + coords=[[10, 20], [0, 1, 2], [5, 6, 7]], + dims=["height", "lat", "lon"], +) + +SIMPLE_DA2 = xr.DataArray([[9.1, 2.3, 3.2], [2.2, 1.1, 0.2]], coords=[[1, 2], [3, 4, 5]], dims=["a", "b"]) + +SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) +SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) + +COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + + +def test_Dimensions(): + d = reshape.Dimensions(["lat", "lon", "height"]) + output = d.apply_func(SIMPLE_DA1) + assert output.dims == ("lat", "lon", "height") + + +def test_Dimensions_one_input(): + d = reshape.Dimensions(["lat"]) + output = d.apply_func(SIMPLE_DA1) + assert output.dims[0] == "lat" + + +def test_Dimensions_prepend(): + d = reshape.Dimensions(["lat"], append=False) + output = d.apply_func(SIMPLE_DA1) + assert output.dims[-1] == "lat" + + +def test_Dimensions_preserve_order(): + d = reshape.Dimensions(["lat"], preserve_order=True) + output = d.apply_func(SIMPLE_DA1) + reversed_output = d.undo_func(output) + assert reversed_output.dims == output.dims + + +def test_weak_cast_to_int(): + + wcti = reshape.weak_cast_to_int + + assert wcti(5.0) == 5 + assert isinstance(wcti(5.0), int) + + assert wcti("hello") == "hello" + + +def test_CoordinateFlatten(): + f = reshape.CoordinateFlatten(["height"]) + output = f.apply(SIMPLE_DS2) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: + assert vbl in variables + + +def test_CoordinateFlatten_complicated_dataset(): + """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" + f = reshape.CoordinateFlatten(["height"]) + output = f.apply(COMPLICATED_DS1) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "MSLP"]: + assert vbl in variables + + +def test_CoordinateFlatten_skip_missing(): + f = reshape.CoordinateFlatten(["scrupulosity"]) + with pytest.raises(ValueError): + f.apply(SIMPLE_DS1) + f2 = reshape.CoordinateFlatten(["scrupulosity"], skip_missing=True) + output2 = f2.apply(SIMPLE_DS1) + assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." + + +def test_undo_CoordinateFlatten(): + f = reshape.CoordinateFlatten(["height"]) + f_output = f.apply(SIMPLE_DS2) + f_undone = f.undo(f_output) + variables = list(f_undone.keys()) + for vbl in ["Temperature", "Humidity", "WombatsPerKm2"]: + assert vbl in variables + + +def test_CoordinateExpand_reverses_CoordinateFlatten(): + f = reshape.CoordinateFlatten(["height"]) + f_output = f.apply(SIMPLE_DS2) + e = reshape.CoordinateExpand(["height"]) + e_output = e.apply(f_output) + variables = list(e_output.keys()) + assert "Temperature" in variables + + +def test_undo_CoordinateExpand(): + f = reshape.CoordinateFlatten(["height"]) + f_output = f.apply(SIMPLE_DS2) + e = reshape.CoordinateExpand(["height"]) + e_output = e.apply(f_output) + e_undone = e.undo(e_output) + variables = list(e_undone.keys()) + for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: + assert vbl in variables