Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ use databend_common_expression::FunctionContext;
use databend_common_expression::HashMethodKind;
use databend_common_sql::plans::JoinType;

use crate::pipelines::processors::transforms::memory::outer_left_join::OuterLeftHashJoin;
use crate::pipelines::processors::transforms::new_hash_join::common::CStyleCell;
use crate::pipelines::processors::transforms::new_hash_join::grace::GraceHashJoinState;
use super::common::CStyleCell;
use super::grace::GraceHashJoinState;
use super::memory::outer_left_join::OuterLeftHashJoin;
use super::memory::NestedLoopJoin;
use crate::pipelines::processors::transforms::BasicHashJoinState;
use crate::pipelines::processors::transforms::GraceHashJoin;
use crate::pipelines::processors::transforms::InnerHashJoin;
Expand Down Expand Up @@ -126,13 +127,29 @@ impl HashJoinFactory {
}

match typ {
JoinType::Inner => Ok(Box::new(InnerHashJoin::create(
&self.ctx,
self.function_ctx.clone(),
self.hash_method.clone(),
self.desc.clone(),
self.create_basic_state(id)?,
)?)),
JoinType::Inner => {
let state = self.create_basic_state(id)?;
let nested_loop_desc = self
.desc
.create_nested_loop_desc(&settings, &self.function_ctx)?;

let inner = InnerHashJoin::create(
&settings,
self.function_ctx.clone(),
self.hash_method.clone(),
self.desc.clone(),
state.clone(),
nested_loop_desc
.as_ref()
.map(|desc| desc.nested_loop_join_threshold)
.unwrap_or_default(),
)?;

match nested_loop_desc {
Some(desc) => Ok(Box::new(NestedLoopJoin::create(inner, state, desc))),
None => Ok(Box::new(inner)),
}
}
JoinType::Left => Ok(Box::new(OuterLeftHashJoin::create(
&self.ctx,
self.function_ctx.clone(),
Expand All @@ -148,11 +165,12 @@ impl HashJoinFactory {
match typ {
JoinType::Inner => {
let inner_hash_join = InnerHashJoin::create(
&self.ctx,
&self.ctx.get_settings(),
self.function_ctx.clone(),
self.hash_method.clone(),
self.desc.clone(),
self.create_basic_state(id)?,
0,
)?;

Ok(Box::new(GraceHashJoin::create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,17 +354,10 @@ impl BasicHashJoin {
let mut progress = ProgressValues::default();
let mut plain = vec![];
while let Some(chunk_index) = self.state.steal_chunk_index() {
let chunk_mut = &mut self.state.chunks.as_mut()[chunk_index];

let mut chunk_block = DataBlock::empty();
std::mem::swap(chunk_mut, &mut chunk_block);

let chunk_block = &self.state.chunks[chunk_index];
progress.rows += chunk_block.num_rows();
progress.bytes += chunk_block.memory_size();

*chunk_mut = chunk_block.clone();

plain.push(chunk_block);
plain.push(chunk_block.clone());
}
debug_assert!(matches!(
*self.state.hash_table,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use std::ops::Deref;
use std::sync::Arc;

use databend_common_base::base::ProgressValues;
use databend_common_catalog::table_context::TableContext;
use databend_common_column::bitmap::Bitmap;
use databend_common_exception::ErrorCode;
use databend_common_exception::Result;
Expand All @@ -27,10 +26,10 @@ use databend_common_expression::DataBlock;
use databend_common_expression::FilterExecutor;
use databend_common_expression::FunctionContext;
use databend_common_expression::HashMethodKind;
use databend_common_settings::Settings;

use super::basic::BasicHashJoin;
use super::basic_state::BasicHashJoinState;
use super::LoopJoinStream;
use crate::pipelines::processors::transforms::build_runtime_filter_packet;
use crate::pipelines::processors::transforms::new_hash_join::hashtable::basic::ProbeStream;
use crate::pipelines::processors::transforms::new_hash_join::hashtable::basic::ProbedRows;
Expand All @@ -41,10 +40,8 @@ use crate::pipelines::processors::transforms::new_hash_join::join::JoinStream;
use crate::pipelines::processors::transforms::new_hash_join::performance::PerformanceContext;
use crate::pipelines::processors::transforms::HashJoinHashTable;
use crate::pipelines::processors::transforms::JoinRuntimeFilterPacket;
use crate::pipelines::processors::transforms::NestedLoopDesc;
use crate::pipelines::processors::transforms::RuntimeFiltersDesc;
use crate::pipelines::processors::HashJoinDesc;
use crate::sessions::QueryContext;

pub struct InnerHashJoin {
pub(crate) basic_hash_join: BasicHashJoin,
Expand All @@ -53,35 +50,23 @@ pub struct InnerHashJoin {
pub(crate) function_ctx: FunctionContext,
pub(crate) basic_state: Arc<BasicHashJoinState>,
pub(crate) performance_context: PerformanceContext,
nested_loop_filter: Option<FilterExecutor>,
nested_loop_field_reorder: Option<Vec<usize>>,
}

impl InnerHashJoin {
pub fn create(
ctx: &QueryContext,
settings: &Settings,
function_ctx: FunctionContext,
method: HashMethodKind,
desc: Arc<HashJoinDesc>,
state: Arc<BasicHashJoinState>,
nested_loop_join_threshold: usize,
) -> Result<Self> {
let settings = ctx.get_settings();
let block_size = settings.get_max_block_size()? as usize;

let context = PerformanceContext::create(block_size, desc.clone(), function_ctx.clone());

let (nested_loop_filter, nested_loop_field_reorder, nested_loop_join_threshold) =
match desc.create_nested_loop_desc(&settings, &function_ctx)? {
Some(NestedLoopDesc {
filter,
field_reorder,
nested_loop_join_threshold,
}) => (Some(filter), field_reorder, nested_loop_join_threshold),
None => (None, None, 0),
};

let basic_hash_join = BasicHashJoin::create(
&settings,
settings,
function_ctx.clone(),
method,
desc.clone(),
Expand All @@ -95,8 +80,6 @@ impl InnerHashJoin {
function_ctx,
basic_state: state,
performance_context: context,
nested_loop_filter,
nested_loop_field_reorder,
})
}
}
Expand Down Expand Up @@ -131,23 +114,6 @@ impl Join for InnerHashJoin {

self.basic_hash_join.finalize_chunks();

match &*self.basic_state.hash_table {
HashJoinHashTable::Null => {
return Err(ErrorCode::AbortedQuery(
"Aborted query, because the hash table is uninitialized.",
))
}
HashJoinHashTable::NestedLoop(build_blocks) => {
let nested = Box::new(LoopJoinStream::new(data, build_blocks));
return Ok(InnerHashJoinFilterStream::create(
nested,
self.nested_loop_filter.as_mut().unwrap(),
self.nested_loop_field_reorder.as_deref(),
));
}
_ => (),
}

let probe_keys = self.desc.probe_key(&data, &self.function_ctx)?;

let mut keys = DataBlock::new(probe_keys, data.num_rows());
Expand Down Expand Up @@ -175,7 +141,12 @@ impl Join for InnerHashJoin {
&mut self.performance_context.probe_result,
)
}
HashJoinHashTable::Null | HashJoinHashTable::NestedLoop(_) => unreachable!(),
HashJoinHashTable::Null => {
return Err(ErrorCode::AbortedQuery(
"Aborted query, because the hash table is uninitialized.",
));
}
HashJoinHashTable::NestedLoop(_) => unreachable!(),
});

match &mut self.performance_context.filter_executor {
Expand Down Expand Up @@ -292,7 +263,7 @@ impl<'a> JoinStream for InnerHashJoinStream<'a> {
}
}

struct InnerHashJoinFilterStream<'a> {
pub(super) struct InnerHashJoinFilterStream<'a> {
inner: Box<dyn JoinStream + 'a>,
filter_executor: &'a mut FilterExecutor,
field_reorder: Option<&'a [usize]>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,72 @@
// limitations under the License.

use std::collections::VecDeque;
use std::sync::Arc;

use databend_common_base::base::ProgressValues;
use databend_common_exception::Result;
use databend_common_expression::types::DataType;
use databend_common_expression::BlockEntry;
use databend_common_expression::DataBlock;
use databend_common_expression::Scalar;

use super::inner_join::InnerHashJoinFilterStream;
use crate::pipelines::processors::transforms::new_hash_join::join::EmptyJoinStream;
use crate::pipelines::processors::transforms::BasicHashJoinState;
use crate::pipelines::processors::transforms::HashJoinHashTable;
use crate::pipelines::processors::transforms::Join;
use crate::pipelines::processors::transforms::JoinRuntimeFilterPacket;
use crate::pipelines::processors::transforms::JoinStream;
use crate::pipelines::processors::transforms::NestedLoopDesc;
use crate::pipelines::processors::transforms::RuntimeFiltersDesc;

pub struct LoopJoinStream<'a> {
pub struct NestedLoopJoin<T> {
inner: T,
basic_state: Arc<BasicHashJoinState>,
desc: NestedLoopDesc,
}

impl<T> NestedLoopJoin<T> {
pub fn create(inner: T, basic_state: Arc<BasicHashJoinState>, desc: NestedLoopDesc) -> Self {
Self {
inner,
basic_state,
desc,
}
}
}

impl<T: Join> Join for NestedLoopJoin<T> {
fn add_block(&mut self, data: Option<DataBlock>) -> Result<()> {
self.inner.add_block(data)
}

fn final_build(&mut self) -> Result<Option<ProgressValues>> {
self.inner.final_build()
}

fn build_runtime_filter(&self, desc: &RuntimeFiltersDesc) -> Result<JoinRuntimeFilterPacket> {
self.inner.build_runtime_filter(desc)
}

fn probe_block(&mut self, data: DataBlock) -> Result<Box<dyn JoinStream + '_>> {
if data.is_empty() || *self.basic_state.build_rows == 0 {
return Ok(Box::new(EmptyJoinStream));
}
let HashJoinHashTable::NestedLoop(build_blocks) = &*self.basic_state.hash_table else {
return self.inner.probe_block(data);
};

let nested = Box::new(LoopJoinStream::new(data, build_blocks));
Ok(InnerHashJoinFilterStream::create(
nested,
&mut self.desc.filter,
self.desc.field_reorder.as_deref(),
))
}
}

struct LoopJoinStream<'a> {
probe_rows: VecDeque<Vec<Scalar>>,
probe_types: Vec<DataType>,
build_blocks: &'a [DataBlock],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use std::any::Any;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::marker::PhantomPinned;
use std::sync::Arc;

use databend_common_exception::Result;
Expand Down Expand Up @@ -42,6 +43,7 @@ pub struct TransformHashJoin {
stage_sync_barrier: Arc<Barrier>,
projection: ColumnSet,
rf_desc: Arc<RuntimeFiltersDesc>,
_p: PhantomPinned,
}

impl TransformHashJoin {
Expand All @@ -67,6 +69,7 @@ impl TransformHashJoin {
finished: false,
build_data: None,
}),
_p: PhantomPinned,
}))
}
}
Expand Down Expand Up @@ -117,8 +120,7 @@ impl Processor for TransformHashJoin {
}
}

#[allow(clippy::missing_transmute_annotations)]
fn process(&mut self) -> Result<()> {
fn process<'a>(&'a mut self) -> Result<()> {
match &mut self.stage {
Stage::Finished => Ok(()),
Stage::Build(state) => {
Expand All @@ -144,7 +146,9 @@ impl Processor for TransformHashJoin {
if let Some(probe_data) = state.input_data.take() {
let stream = self.join.probe_block(probe_data)?;
// This is safe because both join and stream are properties of the struct.
state.stream = Some(unsafe { std::mem::transmute(stream) });
state.stream = Some(unsafe {
std::mem::transmute::<Box<dyn JoinStream + 'a>, Box<dyn JoinStream>>(stream)
});
}

if let Some(mut stream) = state.stream.take() {
Expand All @@ -161,7 +165,11 @@ impl Processor for TransformHashJoin {
if let Some(final_stream) = self.join.final_probe()? {
state.initialize = true;
// This is safe because both join and stream are properties of the struct.
state.stream = Some(unsafe { std::mem::transmute(final_stream) });
state.stream = Some(unsafe {
std::mem::transmute::<Box<dyn JoinStream + 'a>, Box<dyn JoinStream>>(
final_stream,
)
});
} else {
state.finished = true;
}
Expand Down
Loading