Source code for dxchange.writer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# #########################################################################
# Copyright (c) 2015, UChicago Argonne, LLC. All rights reserved.         #
#                                                                         #
# Copyright 2015. UChicago Argonne, LLC. This software was produced       #
# under U.S. Government contract DE-AC02-06CH11357 for Argonne National   #
# Laboratory (ANL), which is operated by UChicago Argonne, LLC for the    #
# U.S. Department of Energy. The U.S. Government has rights to use,       #
# reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR    #
# UChicago Argonne, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR        #
# ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is     #
# modified to produce derivative works, such modified software should     #
# be clearly marked, so as not to confuse it with the version available   #
# from ANL.                                                               #
#                                                                         #
# Additionally, redistribution and use in source and binary forms, with   #
# or without modification, are permitted provided that the following      #
# conditions are met:                                                     #
#                                                                         #
#     * Redistributions of source code must retain the above copyright    #
#       notice, this list of conditions and the following disclaimer.     #
#                                                                         #
#     * Redistributions in binary form must reproduce the above copyright #
#       notice, this list of conditions and the following disclaimer in   #
#       the documentation and/or other materials provided with the        #
#       distribution.                                                     #
#                                                                         #
#     * Neither the name of UChicago Argonne, LLC, Argonne National       #
#       Laboratory, ANL, the U.S. Government, nor the names of its        #
#       contributors may be used to endorse or promote products derived   #
#       from this software without specific prior written permission.     #
#                                                                         #
# THIS SOFTWARE IS PROVIDED BY UChicago Argonne, LLC AND CONTRIBUTORS     #
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT       #
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS       #
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UChicago     #
# Argonne, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,        #
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,    #
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;        #
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER        #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT      #
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN       #
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         #
# POSSIBILITY OF SUCH DAMAGE.                                             #
# #########################################################################

"""
Module for data exporting data files.
"""

import dxchange.dtype as dt
import numpy as np
import os
import h5py
import logging
import re
from itertools import cycle

__author__ = "Doga Gursoy, Francesco De Carlo"
__copyright__ = "Copyright (c) 2015-2016, UChicago Argonne, LLC."
__version__ = "0.1.0"
__docformat__ = 'restructuredtext en'
__all__ = ['write_dxf',
           'write_hdf5',
           'write_netcdf4',
           'write_npy',
           'write_tiff',
           'write_tiff_stack',
           'write_vtr',
           'write_aps_1id_report',
           ]

logger = logging.getLogger(__name__)


def _check_import(modname):
    try:
        return __import__(modname)
    except ImportError:
        logger.warn('Warning: ' + modname + ' module not found')
        return None

dxfile = _check_import('dxfile')
netCDF4 = _check_import('netCDF4')


def get_body(fname):
    """
    Get file name after extension removed.
    """
    body = os.path.splitext(fname)[0]
    return body


def get_extension(fname):
    """
    Get file extension.
    """
    return '.' + fname.split(".")[-1]


def remove_trailing_digits(text):
    digit_string = re.search(r'\d+$', text)
    if digit_string is not None:
        number_of_digits = len(digit_string.group())
        text = ''.join(text[:-number_of_digits])
        return (text, number_of_digits)
    else:
        return (text, 0)


def _init_dirs(fname):
    """
    Initialize directories for saving output files.

    Parameters
    ----------
    fname : str
        Output file name.
    """
    dname = os.path.dirname(os.path.abspath(fname))
    if not os.path.exists(dname):
        os.makedirs(dname)


def _suggest_new_fname(fname, digit):
    """
    Suggest new string with an attached (or increased) value indexing
    at the end of a given string.

    For example if "myfile.tiff" exist, it will return "myfile-1.tiff".

    Parameters
    ----------
    fname : str
        Output file name.
    digit : int, optional
        Number of digits in indexing stacked files.

    Returns
    -------
    str
        Indexed new string.
    """
    if os.path.isfile(fname):
        body = get_body(fname)
        ext = get_extension(fname)
        indq = 1
        file_exist = False
        while not file_exist:
            fname = body + '-' + '{0:0={1}d}'.format(indq, digit) + ext
            if not os.path.isfile(fname):
                file_exist = True
            else:
                indq += 1
    return fname


def _init_write(arr, fname, ext, dtype, overwrite):
    if not (isinstance(fname, str)):
        fname = 'tmp/data' + ext
    else:
        if not fname.endswith(ext):
            fname = fname + ext
    fname = os.path.abspath(fname)
    if not overwrite:
        fname = _suggest_new_fname(fname, digit=1)
    _init_dirs(fname)
    if dtype is not None:
        arr = dt.as_dtype(arr, dtype)
    return fname, arr


def _write_hdf5_dataset(h5object, data, dname, appendaxis, maxshape):
    """
    Create a dataset and write data to a specific hdf5 object
    (file or group).

    Parameters
    ----------
    h5object: h5py object
        hdf5 object to write the dataset to.
    data : ndarray
        Array data to be saved.
    dname : str
        dataset name
    appendaxis : int
        Axis of dataset to which data will be appended.
        If given will create a resizable dataset.
    maxshape: tuple
        maxshape of resizable dataset.
        Only used if dataset has not been created.
    """
    if appendaxis is not None:
        if dname not in h5object:
            h5object.create_dataset(dname, data=data,
                                    maxshape=maxshape)
        else:
            size = h5object[dname].shape
            newsize = list(size)
            newsize[appendaxis] += data.shape[appendaxis]
            h5object[dname].resize(newsize)

            slices = 3 * [slice(None, None, None), ]
            slices[appendaxis] = slice(size[appendaxis], None, None)
            h5object[dname][tuple(slices)] = data
    else:
        h5object.create_dataset(dname, data=data)


[docs]def write_hdf5(
        data, fname='tmp/data.h5', gname='exchange', dname='data',
        dtype=None, overwrite=False, appendaxis=None, maxsize=None):
    """
    Write data to hdf5 file in a specific group.

    Parameters
    ----------
    data : ndarray
        Array data to be saved.
    fname : str
        File name to which the data is saved. ``.h5`` extension
        will be appended if it does not already have one.
    gname : str, optional
        Path to the group inside hdf5 file where data will be written.
    dname : str, optional
        Name for dataset where data will be written.
    dtype : data-type, optional
        By default, the data-type is inferred from the input data.
    overwrite: bool, optional
        if True, overwrites the existing file if the file exists.
    appendaxis: int, optional
        Axis where data is to be appended to.
        Must be given when creating a resizable dataset.
    maxsizee: int, optional
        Maximum size that the dataset can be resized to along the
        given axis.
    """
    mode = 'w' if overwrite else 'a'

    if appendaxis is not None:
        overwrite = True  # True if appending to file so fname is not changed
        maxshape = list(data.shape)
        maxshape[appendaxis] = maxsize
    else:
        maxshape = maxsize
    fname, data = _init_write(data, fname, '.h5', dtype, overwrite)

    with h5py.File(fname, mode=mode) as f:
        if 'implements' not in f:
            f.create_dataset('implements', data=gname)
        if gname not in f:
            f.create_group(gname)
        _write_hdf5_dataset(f[gname], data, dname,
                            appendaxis, maxshape)

[docs]def write_netcdf4(
        data, fname='tmp/data.nc', dname='VOLUME',
        dtype=None, overwrite=False, appendaxis=None, maxsize=None):
    """
    Write data to netCDF file.

    Parameters
    ----------
    data : ndarray
        Array data to be saved.
    fname : str
        File name to which the data is saved. 
    """
    mode = 'w' if overwrite else 'a'

    ncfile = netCDF4.Dataset(fname,mode='w',format='NETCDF3_64BIT') 
    x_dim = ncfile.createDimension('NX', data.shape[0])
    y_dim = ncfile.createDimension('NY', data.shape[1])
    z_dim = ncfile.createDimension('NZ', data.shape[2])
    d = ncfile.createVariable(dname, data.dtype, ('NX', 'NY', 'NZ'))
    d[:,:,:] = data
    ncfile.close()
    

[docs]def write_dxf(
        data, fname='tmp/data.h5', axes='theta:y:x',
        dtype=None, overwrite=False):
    """
    Write data to a data exchange hdf5 file.

    Parameters
    ----------
    data : ndarray
        Array data to be saved.
    fname : str
        File name to which the data is saved. ``.h5`` extension
        will be appended if it does not already have one.
    axes : str
        Attribute labels for the data array axes.
    dtype : data-type, optional
        By default, the data-type is inferred from the input data.
    overwrite: bool, optional
        if True, overwrites the existing file if the file exists.
    """
    fname, data = _init_write(data, fname, '.h5', dtype, overwrite)
    f = dxfile.dxtomo.File(fname, mode='w')
    f.add_entry(dxfile.dxtomo.Entry.data(data={
                'value': data, 'units': 'counts',
                'description': 'transmission', 'axes': axes}))
    f.close()


[docs]def write_npy(
        data, fname='tmp/data.npy', dtype=None, overwrite=False):
    """
    Write data to a binary file in NumPy ``.npy`` format.

    Parameters
    ----------
    data : ndarray
        Array data to be saved.
    fname : str
        File name to which the data is saved. ``.npy`` extension
        will be appended if it does not already have one.
    """
    fname, data = _init_write(data, fname, '.npy', dtype, overwrite)
    np.save(fname, data)


[docs]def write_tiff(
        data, fname='tmp/data.tiff', dtype=None, overwrite=False):
    """
    Write image data to a tiff file.

    Parameters
    ----------
    data : ndarray
        Array data to be saved.
    fname : str
        File name to which the data is saved. ``.tiff`` extension
        will be appended if it does not already have one.
    dtype : data-type, optional
        By default, the data-type is inferred from the input data.
    overwrite: bool, optional
        if True, overwrites the existing file if the file exists.
    """
    fname, data = _init_write(data, fname, '.tiff', dtype, overwrite)
    import tifffile
    tifffile.imsave(fname, data)


[docs]def write_tiff_stack(
        data, fname='tmp/data.tiff', dtype=None, axis=0, digit=5,
        start=0, overwrite=False):
    """
    Write data to stack of tiff file.

    Parameters
    ----------
    data : ndarray
        Array data to be saved.
    fname : str
        Base file name to which the data is saved. ``.tiff`` extension
        will be appended if it does not already have one.
    dtype : data-type, optional
        By default, the data-type is inferred from the input data.
    axis : int, optional
        Axis along which stacking is performed.
    start : int, optional
        First index of file in stack for saving.
    digit : int, optional
        Number of digits in indexing stacked files.
    overwrite: bool, optional
        if True, overwrites the existing file if the file exists.
    """
    fname, data = _init_write(data, fname, '.tiff', dtype, True)
    body = get_body(fname)
    ext = get_extension(fname)
    _data = np.swapaxes(data, 0, axis)
    for m in range(start, start + data.shape[axis]):
        _fname = body + '_' + '{0:0={1}d}'.format(m, digit) + ext
        if not overwrite:
            _fname = _suggest_new_fname(_fname, digit=1)
        write_tiff(_data[m - start], _fname, overwrite=overwrite)


[docs]def write_vtr(data, fname='tmp/data.vtr', down_sampling=(5, 5, 5)):
    """
    Write the reconstructed data (img stackes) to vtr file (retangular grid)

    Parameters
    ----------
    data          :  np.3darray
        reconstructed 3D image stacks with axis=0 as the omega
    fname         :  str
        file name of the output vtr file
    down_sampling :  tuple 
        down sampling steps along three axes

    Returns
    -------
    None
    """
    # vtk is only used here, therefore doing an in module import
    import vtk
    from vtk.util import numpy_support
    
    # convert to unit8 can significantly reduce the output vtr file
    # size, or just do a severe down-sampling
    data = _normalize_imgstacks(data[::down_sampling[0], 
                                     ::down_sampling[1], 
                                     ::down_sampling[2],]) * 255
    
    # --init rectangular grid
    rGrid = vtk.vtkRectilinearGrid()
    coordArray = [vtk.vtkDoubleArray(),
                  vtk.vtkDoubleArray(),
                  vtk.vtkDoubleArray(),
                 ]
    coords = np.array([np.arange(data.shape[i]) for i in range(3)])
    coords = [0.5 * np.array([3.0 * coords[i][0] - coords[i][0 + int(len(coords[i]) > 1)]] + \
                             [coords[i][j-1] + coords[i][j] for j in range(1,len(coords[i]))] + \
                             [3.0 * coords[i][-1] - coords[i][-1 - int(len(coords[i]) > 1)]]
                            ) 
              for i in range(3)
             ]
    grid = np.array(list(map(len,coords)),'i')
    rGrid.SetDimensions(*grid)
    for i,points in enumerate(coords):
        for point in points:
            coordArray[i].InsertNextValue(point)

    rGrid.SetXCoordinates(coordArray[0])
    rGrid.SetYCoordinates(coordArray[1])
    rGrid.SetZCoordinates(coordArray[2])
    
    # vtk requires x to be the fast axis
    # NOTE:
    #    Proper coordinate transformation is required to connect the 
    #    tomography data with other down-stream analysis (such as FF-HEDM
    #    and NF-HEDM).
    imgstacks = np.swapaxes(data, 0, 2)
    
    VTKarray = numpy_support.numpy_to_vtk(num_array=imgstacks.flatten().astype(np.uint8),
                                          deep=True,
                                          array_type=vtk.VTK_UNSIGNED_CHAR,
                                         )
    VTKarray.SetName('img')
    rGrid.GetCellData().AddArray(VTKarray)
    
    rGrid.Modified()
    if vtk.VTK_MAJOR_VERSION <= 5: 
        rGrid.Update()

    # output to file
    writer = vtk.vtkXMLRectilinearGridWriter()
    writer.SetFileName(fname)
    writer.SetDataModeToBinary()
    writer.SetCompressorTypeToZLib()
    if vtk.VTK_MAJOR_VERSION <= 5: 
        writer.SetInput(rGrid)
    else:                          
        writer.SetInputData(rGrid)
    writer.Write()


[docs]def write_aps_1id_report(df_scanmeta, reportfn):
    """
    Generate report of beam conditions based on given DataFrame of the 
    metadata

    Parameters
    ----------
    df_scanmeta  :  pd.DataFrame
        DataFrame of the parsed metadata
            dxreader.read_aps_1id_metafile(log_file)
    reportfn     :  str
        Output report file name (include path)

    Returns
    -------
    pd.DataFrame
        Updated Dataframe with added beam conditions
    """
    import matplotlib.pyplot as plt

    # add calculation of four beam quality
    # -- Temporal Beam Stability
    df_scanmeta['TBS'] = df_scanmeta['IC-E3']/df_scanmeta['IC-E3'].values[0]
    # -- Vertical Beam Stability
    df_scanmeta['VBS'] = df_scanmeta['IC-E1']/df_scanmeta['IC-E2']
    # -- Beam Loss at Slit
    df_scanmeta['BLS'] = (df_scanmeta['IC-E1'] + df_scanmeta['IC-E2'])/df_scanmeta['IC-E3']
    # -- Beam Loss during Travel 
    df_scanmeta['BLT'] = df_scanmeta['IC-E5']/df_scanmeta['IC-E3']
    # -- corresponding color code
    pltlbs  = ['TBS',  'VBS',     'BLS', 'BLT' ]
    pltclrs = ['red', 'blue', 'magenta', 'cyan']

    # start plot
    fig = plt.figure(figsize=(8,3))
    ax  = fig.add_subplot(111)
    lnclrs = cycle(['gray', 'lime', 'gray', 'black'])
    # -- plot one segment at a time
    for lb, clr in zip(pltlbs, pltclrs):
        addlabel=True
        for layerID in df_scanmeta['layerID'].unique():
            tmpdf = df_scanmeta[df_scanmeta['layerID'] == layerID]
            for imgtype in tmpdf['type'].unique():
                # plot the main curve
                currentSlice = tmpdf[tmpdf['type'] == imgtype]

                ax.plot(currentSlice['Date'], currentSlice[lb], 
                        linewidth=0.2, 
                        color=clr,
                        label=lb if addlabel else '_nolegend_',
                        alpha=0.5,
                        )
                addlabel = False
                # add the vertical guard
                tmpx = currentSlice['Date'].values
                tmpclr = next(lnclrs)
                for x in [tmpx[0], tmpx[-1]]:
                    ax.plot([x, x], [1e-4, 1e2], 
                            color=tmpclr,
                            linewidth=0.05,
                            linestyle='dashed',
                            alpha=0.1,
                           )
    # -- set canvas property
    ax.set_yscale('log')
    plt.legend(loc=0)
    plt.ylim([0.9, 2.0])  # 10% as cut range
    plt.xticks(rotation=45)
    # -- save the figure (both pdf and png)
    plt.savefig(reportfn, 
                transparent=True, 
                bbox_inches='tight', 
                pad_inches=0.1,
               )
    # -- clear/close figure
    plt.close()
    
    return df_scanmeta


def _normalize_imgstacks(img):
    """
    Normalize image stacks on a per layer base

    Parameters
    ----------
    img  :  np.3darray
        img stacks to be normalized
    
    Returns
    -------
    np.3darray
        normalized image stacks
    """
    return img/np.amax(img.reshape(img.shape[0], 
                                   img.shape[1]*img.shape[2],
                                  ), 
                       axis=1,
                      ).reshape(img.shape[0],1,1)