@@ -66,3 +66,32 @@ def test_pipeline_run_loader(self,):
6666 assert elements .__class__ .__name__ == 'MultiModalLoader'
6767 assert len (elements ) == 14
6868 assert elements .elements [0 ].metadata .to_dict ()['filename' ] == 'Multimodal_sample_file.pdf'
69+
70+ def test_pipeline_summarize (self ,):
71+ """Tests for pipeline run with summarizer"""
72+ from clarifai_datautils .multimodal import Pipeline
73+ from clarifai_datautils .multimodal .pipeline .cleaners import Clean_extra_whitespace
74+ from clarifai_datautils .multimodal .pipeline .PDF import PDFPartitionMultimodal
75+ from clarifai_datautils .multimodal .pipeline .summarizer import ImageSummarizer
76+
77+ pipeline = Pipeline (
78+ name = 'pipeline-1' ,
79+ transformations = [
80+ PDFPartitionMultimodal (chunking_strategy = "by_title" , max_characters = 1024 ),
81+ Clean_extra_whitespace (),
82+ ImageSummarizer ()
83+ ])
84+ elements = pipeline .run (files = PDF_FILE_PATH , loader = False )
85+ assert len (elements ) == 15
86+ assert isinstance (elements , list )
87+ assert elements [0 ].metadata .to_dict ()['filename' ] == 'Multimodal_sample_file.pdf'
88+ assert elements [0 ].metadata .to_dict ()['page_number' ] == 1
89+ assert elements [0 ].metadata .to_dict ()['email_address' ] == ['test_extraction@gmail.com' ]
90+ assert elements [6 ].__class__ .__name__ == 'Table'
91+ assert elements [- 2 ].__class__ .__name__ == 'Image'
92+ assert elements [- 2 ].metadata .is_original == True
93+ assert elements [- 2 ].metadata .input_id is not None
94+ id = elements [- 2 ].metadata .input_id
95+ assert elements [- 1 ].__class__ .__name__ == 'CompositeElement'
96+ assert elements [- 1 ].metadata .is_original == False
97+ assert elements [- 1 ].metadata .source_input_id == 'summarized_' + id
0 commit comments