22
33from __future__ import annotations
44
5- import json
6- from itertools import chain
7- from typing import TYPE_CHECKING , Any
5+ from typing import TYPE_CHECKING
6+
7+ import jupytext
8+ from jupytext .config import JupytextConfiguration
89
910from gitingest .utils .exceptions import InvalidNotebookError
1011from gitingest .utils .logging_config import get_logger
@@ -24,7 +25,8 @@ def process_notebook(file: Path, *, include_output: bool = True) -> str:
2425 file : Path
2526 The path to the Jupyter notebook file.
2627 include_output : bool
27- Whether to include cell outputs in the generated script (default: ``True``).
28+ Whether to include cell outputs in the generated script (Not supported by Jupytext).
29+ This parameter is kept for backward compatibility but is ignored.
2830
2931 Returns
3032 -------
@@ -37,123 +39,24 @@ def process_notebook(file: Path, *, include_output: bool = True) -> str:
3739 If the notebook file is invalid or cannot be processed.
3840
3941 """
40- try :
41- with file .open (encoding = "utf-8" ) as f :
42- notebook : dict [str , Any ] = json .load (f )
43- except json .JSONDecodeError as exc :
44- msg = f"Invalid JSON in notebook: { file } "
45- raise InvalidNotebookError (msg ) from exc
46-
47- # Check if the notebook contains worksheets
48- worksheets = notebook .get ("worksheets" )
49- if worksheets :
50- logger .warning (
51- "Worksheets are deprecated as of IPEP-17. Consider updating the notebook. "
52- "(See: https://github.com/jupyter/nbformat and "
53- "https://github.com/ipython/ipython/wiki/IPEP-17:-Notebook-Format-4#remove-multiple-worksheets "
54- "for more information.)" ,
42+ if include_output :
43+ # Jupytext does not support including outputs in the generated script
44+ # We log a debug message to inform the user
45+ logger .debug (
46+ "Jupytext does not support including outputs in the generated script. 'include_output' is ignored."
5547 )
5648
57- if len (worksheets ) > 1 :
58- logger .warning (
59- "Multiple worksheets detected. Combining all worksheets into a single script." ,
60- )
61-
62- cells = list (chain .from_iterable (ws ["cells" ] for ws in worksheets ))
63-
64- else :
65- cells = notebook ["cells" ]
66-
67- result = ["# Jupyter notebook converted to Python script." ]
68-
69- for cell in cells :
70- cell_str = _process_cell (cell , include_output = include_output )
71- if cell_str :
72- result .append (cell_str )
73-
74- return "\n \n " .join (result ) + "\n "
75-
76-
77- def _process_cell (cell : dict [str , Any ], * , include_output : bool ) -> str | None :
78- """Process a Jupyter notebook cell and return the cell content as a string.
79-
80- Parameters
81- ----------
82- cell : dict[str, Any]
83- The cell dictionary from a Jupyter notebook.
84- include_output : bool
85- Whether to include cell outputs in the generated script.
86-
87- Returns
88- -------
89- str | None
90- The cell content as a string, or ``None`` if the cell is empty.
91-
92- Raises
93- ------
94- ValueError
95- If an unexpected cell type is encountered.
96-
97- """
98- cell_type = cell ["cell_type" ]
99-
100- # Validate cell type and handle unexpected types
101- if cell_type not in ("markdown" , "code" , "raw" ):
102- msg = f"Unknown cell type: { cell_type } "
103- raise ValueError (msg )
104-
105- cell_str = "" .join (cell ["source" ])
106-
107- # Skip empty cells
108- if not cell_str :
109- return None
110-
111- # Convert Markdown and raw cells to multi-line comments
112- if cell_type in ("markdown" , "raw" ):
113- return f'"""\n { cell_str } \n """'
114-
115- # Add cell output as comments
116- outputs = cell .get ("outputs" )
117- if include_output and outputs :
118- # Include cell outputs as comments
119- raw_lines : list [str ] = []
120- for output in outputs :
121- raw_lines += _extract_output (output )
122-
123- cell_str += "\n # Output:\n # " + "\n # " .join (raw_lines )
124-
125- return cell_str
126-
127-
128- def _extract_output (output : dict [str , Any ]) -> list [str ]:
129- """Extract the output from a Jupyter notebook cell.
130-
131- Parameters
132- ----------
133- output : dict[str, Any]
134- The output dictionary from a Jupyter notebook cell.
135-
136- Returns
137- -------
138- list[str]
139- The output as a list of strings.
140-
141- Raises
142- ------
143- ValueError
144- If an unknown output type is encountered.
145-
146- """
147- output_type = output ["output_type" ]
148-
149- if output_type == "stream" :
150- return output ["text" ]
49+ try :
50+ # Read the notebook using jupytext
51+ notebook = jupytext .read (file )
15152
152- if output_type in ("execute_result" , "display_data" ):
153- return output ["data" ]["text/plain" ]
53+ # Convert to Python script
54+ # using "py:percent" format to preserve cell structure
55+ config = JupytextConfiguration ()
56+ # We can add more config here if needed
15457
155- if output_type == "error" :
156- return [f"Error: { output ['ename' ]} : { output ['evalue' ]} " ]
58+ return jupytext .writes (notebook , fmt = "py:percent" )
15759
158- msg = f"Unknown output type: { output_type } "
159- raise ValueError (msg )
60+ except Exception as exc :
61+ msg = f"Error processing notebook { file } : { exc } "
62+ raise InvalidNotebookError (msg ) from exc
0 commit comments