This notebook checks if the available general tutorials run without errors.
This particulary tests the installed python packages and their interoperability for standard features and examples. Running these checks can take a lot of time and there for will not finish in the max. cpu-time we provide for a process on a login node. Hence the jupyter kernel for this notebook will eventually be killed by the system. These Sanity Checks are primarily usefull for system administrators.
If you want to run them anyway - ensure your Jupyter is running as a batch job with far more compute time available.
import os
# alternative 1: create new directory and download
from git import Repo
dwnldir = os.getenv('PROJECT', os.getenv('HOME', '.')) + '/' + os.getenv('USER', '')
repo = Repo.clone_from("https://gitlab.version.fz-juelich.de/jupyter4jsc/j4j_notebooks.git", dwnldir)
workdir = dwnldir + '/' + 'j4j_notebooks'
# alternative 2: use existing download
#workdir = '/p/project/ccstvs/' + os.getenv('USER', '') + '/j4j_notebooks'
print(workdir)
from typing import Any, Tuple, Dict, Mapping
from collections import defaultdict
from nbformat import NotebookNode
def count_source(source: str) -> Tuple[int, int, int]:
"""Count number of non-blank lines, words, and non-whitespace characters.
:param source: string to count
:return: number of non-blank lines, words, and non-whitespace characters
"""
lines = [line for line in source.split('\n') if line and not line.isspace()]
words = source.split()
chars = ''.join(words)
return len(lines), len(words), len(chars)
REQUIRED_NB_FIELDS = {"metadata", "nbformat_minor", "nbformat", "cells"}
REQUIRED_NB_METADATA_FIELDS = {"kernelspec", "language_info"}
CELL_TYPES = ('markdown', 'code', 'raw', )
REQUIRED_CELL_FIELDS = {
'markdown': {"cell_type", "metadata", "source"},
'code': {"cell_type", "metadata", "source", "execution_count", "outputs"},
'raw': {"cell_type", "metadata", "source"}
}
OPTIONAL_CELL_FIELDS = {
'markdown': {"attachments"},
'code': set(),
'raw': {"attachments"}
}
OPTIONAL_OUTPUT_TYPES = {
'execute_result': {'data', 'metadata' ,'execution_count'},
'stream': {'name', 'text'},
'display_data': {'data', 'metadata', },
'error': {'ename', 'evalue', 'traceback'},
}
CELL_STATISTICS = (
'cell_types', #: cell type counts
'sources', #: cell sources counts
'cell_metadata', #: cell metadata counts, including separate ``tags``
'cell_attachments', #: cell attachment MIME type counts, and total
'code_execution', #: code cell execution count statistics
'code_outputs', #: code cell counts per output_type, subcounts per ``stream`` and ``error``, and total
'cell_extra', #: counts for extra (unknown) fields in cells
)
# dictionary keys for source statistics
EMPTY_SOURCES = 'total empty sources'
SOURCE_LINES = 'total source lines'
SOURCE_WORDS = 'total source words'
SOURCE_CHARS = 'total source chars'
EMPTY_SOURCES_MD = 'markdown empty sources'
SOURCE_LINES_MD = 'markdown source lines'
SOURCE_WORDS_MD = 'markdown source words'
SOURCE_CHARS_MD = 'markdown source chars'
EMPTY_SOURCES_CODE = 'code empty sources'
SOURCE_LINES_CODE = 'code source lines'
SOURCE_WORDS_CODE = 'code source words'
SOURCE_CHARS_CODE = 'code source chars'
EMPTY_SOURCES_RAW = 'raw empty sources'
SOURCE_LINES_RAW = 'raw source lines'
SOURCE_WORDS_RAW = 'raw source words'
SOURCE_CHARS_RAW = 'raw source chars'
def nb_cell_stats(nb: NotebookNode) -> Dict[str, Dict[str, int]]:
"""Count occurrences of various elements in notebook cells.
:param nb: notebook to inspect
:return: dictionary of dictionaries with counts per section;
each section has its own key; see CELL_STATISTICS
"""
# process the notebook cells
result = {key: defaultdict(int) for key in CELL_STATISTICS}
# traverse all cells and gather statistics
for index, cell in enumerate(nb.cells): # index can be used for debug output
result['cell_types']['total cell count'] += 1 # count all cells
ct = cell.cell_type
result['cell_types'][ct] += 1 # count each cell type
# compute source statistics
lines, words, chars = count_source(cell.source) # cell.source should always be present
empty_cell = chars == 0
if empty_cell:
result['sources'][EMPTY_SOURCES] += 1
if ct == 'markdown':
result['sources'][EMPTY_SOURCES_MD] += 1
elif ct == 'code':
result['sources'][EMPTY_SOURCES_CODE] += 1
elif ct == 'raw':
result['sources'][EMPTY_SOURCES_RAW] += 1
if chars:
result['sources'][SOURCE_LINES] += lines
result['sources'][SOURCE_WORDS] += words
result['sources'][SOURCE_CHARS] += chars
if ct == 'markdown':
result['sources'][SOURCE_LINES_MD] += lines
result['sources'][SOURCE_WORDS_MD] += words
result['sources'][SOURCE_CHARS_MD] += chars
elif ct == 'code':
result['sources'][SOURCE_LINES_CODE] += lines
result['sources'][SOURCE_WORDS_CODE] += words
result['sources'][SOURCE_CHARS_CODE] += chars
elif ct == 'raw':
result['sources'][SOURCE_LINES_RAW] += lines
result['sources'][SOURCE_WORDS_RAW] += words
result['sources'][SOURCE_CHARS_RAW] += chars
# count each metadata key
for attr in cell.metadata: # cell.metadata should always be present
result['cell_metadata'][attr] += 1
# count each tag in tags metadata
if 'tags' in cell.metadata:
for tag in cell.metadata.tags:
result['cell_metadata']['tag ' + tag] += 1
# count each attachment mime type
if 'attachments' in cell:
result['cell_attachments']['total count of cells with attachments'] += 1
for attachment in cell.attachments.values():
for key in attachment:
result['cell_attachments']['total attachments count'] += 1
result['cell_attachments'][key] += 1
# count non-standard fields in cells
for field in cell:
if field not in REQUIRED_CELL_FIELDS[ct].union(OPTIONAL_CELL_FIELDS[ct]):
result['cell_extra'][field] += 1
return result
from colorama import Fore, Back, Style
DEFAULT_WIDTH = 10
def print_dict(d: Dict[str, Any], header: str=None, width: int=DEFAULT_WIDTH) -> None:
"""Print dictionary d with section header.
:param d: dictionary to print
:param header: header of the table
:param width: width of the left column
"""
if d:
if header:
print('{}:'.format(header))
for key in sorted(d):
if key == 'raw':
style = Fore.RED
else:
style = ''
left = str(d[key])
print(style + ' {:>{}} {}'.format(left, width, key) + Style.RESET_ALL)
from pathlib import Path
from nbformat import NotebookNode
from typing import List, Union
import nbformat
import sys
def read_nb(nb_path: Path) -> Union[None, NotebookNode]:
"""Read notebook from given path, and return it.
Uses ``args.debug``: in debug mode, a read error results in an exception, else it returns ``None``.
:param nb_path: path to read from
:param args: to check debug mode
:return: notebook read from ``nb_path`` or None if reading failed``
"""
try:
nb = nbformat.read(nb_path.open(encoding='utf-8'), as_version=4)
except Exception as e:
ename = type(e).__name__
print('Reading of "{}" failed ({}):\n {}'.format(nb_path.name, ename, e), file=sys.stderr)
return None
return nb
import os
import papermill as pm
from papermill.exceptions import PapermillExecutionError
failed_notebooks = list()
dirbase = workdir + '/001-Jupyter/001-Tutorials/'
for dirpath, dirs, files in os.walk(dirbase):
dirs.sort()
if os.path.basename(dirpath).startswith('.'):
continue
for filename in sorted(files):
if filename.endswith('.ipynb') and not filename.startswith('papermill_'):
print(os.path.join(dirpath,filename))
if filename == "SanityCheck-Tutorials.ipynb":
continue
if filename.endswith('_skip.ipynb'):
print("... skipping")
continue
try:
os.chdir(dirpath)
nb_path = os.path.join(dirpath, filename)
# get notebook statistics
nb = read_nb(Path(nb_path))
cell_stats = nb_cell_stats(nb)
print_dict(cell_stats['cell_types'], "Cell types")
print_dict(cell_stats['sources'], "Cell sources")
print_dict(cell_stats['cell_metadata'], "Cell metadata fields")
print_dict(cell_stats['cell_attachments'], "Cell attachments")
# execute notebook
try:
nb = pm.execute_notebook(
nb_path,
os.path.join(dirpath, 'papermill_' + filename),
#kernel_name="Python3"
)
except:
print("FAILED !!!!")
os.chdir(dirbase)
except PapermillExecutionError as e:
failed_notebooks.append([os.path.join(dirpath, filename), e.evalue])
print(e.evalue)
failed_notebooks
import os
import papermill as pm
from papermill.exceptions import PapermillExecutionError
failed_notebooks = list()
dirbase = workdir + '/001-Jupyter/001-Tutorials/'
for dirpath, dirs, files in os.walk(dirbase):
if os.path.basename(dirpath).startswith('.'):
continue
for filename in files:
if filename.endswith('.ipynb') and filename.startswith('papermill_'):
nb_path = os.path.join(dirpath,filename)
print(nb_path)
os.remove(nb_path)
import os
import papermill as pm
from papermill.exceptions import PapermillExecutionError
failed_notebooks = list()
dirbase = workdir + '/001-Jupyter/001-Tutorials/'
for dirpath, dirs, files in os.walk(dirbase):
if os.path.basename(dirpath).startswith('.'):
continue
for filename in files:
if filename.endswith('.ipynb') and not filename.startswith('papermill_'):
nb_path = os.path.join(dirpath,filename)
print(nb_path)
!jupyter nbconvert --clear-output --inplace "{nb_path}"