Source code for dxchange.writer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# #########################################################################
# Copyright (c) 2015, UChicago Argonne, LLC. All rights reserved.         #
#                                                                         #
# Copyright 2015. UChicago Argonne, LLC. This software was produced       #
# under U.S. Government contract DE-AC02-06CH11357 for Argonne National   #
# Laboratory (ANL), which is operated by UChicago Argonne, LLC for the    #
# U.S. Department of Energy. The U.S. Government has rights to use,       #
# reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR    #
# UChicago Argonne, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR        #
# ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is     #
# modified to produce derivative works, such modified software should     #
# be clearly marked, so as not to confuse it with the version available   #
# from ANL.                                                               #
#                                                                         #
# Additionally, redistribution and use in source and binary forms, with   #
# or without modification, are permitted provided that the following      #
# conditions are met:                                                     #
#                                                                         #
#     * Redistributions of source code must retain the above copyright    #
#       notice, this list of conditions and the following disclaimer.     #
#                                                                         #
#     * Redistributions in binary form must reproduce the above copyright #
#       notice, this list of conditions and the following disclaimer in   #
#       the documentation and/or other materials provided with the        #
#       distribution.                                                     #
#                                                                         #
#     * Neither the name of UChicago Argonne, LLC, Argonne National       #
#       Laboratory, ANL, the U.S. Government, nor the names of its        #
#       contributors may be used to endorse or promote products derived   #
#       from this software without specific prior written permission.     #
#                                                                         #
# THIS SOFTWARE IS PROVIDED BY UChicago Argonne, LLC AND CONTRIBUTORS     #
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT       #
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS       #
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UChicago     #
# Argonne, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,        #
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,    #
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;        #
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER        #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT      #
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN       #
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         #
# POSSIBILITY OF SUCH DAMAGE.                                             #
# #########################################################################

"""
Module for data exporting data files.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import dxchange.dtype as dt
import numpy as np
import os
import six
import h5py
import logging
import re
from itertools import cycle

__author__ = "Doga Gursoy, Francesco De Carlo"
__copyright__ = "Copyright (c) 2015-2016, UChicago Argonne, LLC."
__version__ = "0.1.0"
__docformat__ = 'restructuredtext en'
__all__ = ['write_dxf',
           'write_hdf5',
           'write_netcdf4',
           'write_npy',
           'write_tiff',
           'write_tiff_stack',
           'write_vtr',
           'write_aps_1id_report',
           ]

logger = logging.getLogger(__name__)


def _check_import(modname):
    try:
        return __import__(modname)
    except ImportError:
        logger.warn('Warning: ' + modname + ' module not found')
        return None

dxfile = _check_import('dxfile')
netCDF4 = _check_import('netCDF4')


def get_body(fname):
    """
    Get file name after extension removed.
    """
    body = os.path.splitext(fname)[0]
    return body


def get_extension(fname):
    """
    Get file extension.
    """
    return '.' + fname.split(".")[-1]


def remove_trailing_digits(text):
    digit_string = re.search('\d+$', text)
    if digit_string is not None:
        number_of_digits = len(digit_string.group())
        text = ''.join(text[:-number_of_digits])
        return (text, number_of_digits)
    else:
        return (text, 0)


def _init_dirs(fname):
    """
    Initialize directories for saving output files.

    Parameters
    ----------
    fname : str
        Output file name.
    """
    dname = os.path.dirname(os.path.abspath(fname))
    if not os.path.exists(dname):
        os.makedirs(dname)


def _suggest_new_fname(fname, digit):
    """
    Suggest new string with an attached (or increased) value indexing
    at the end of a given string.

    For example if "myfile.tiff" exist, it will return "myfile-1.tiff".

    Parameters
    ----------
    fname : str
        Output file name.
    digit : int, optional
        Number of digits in indexing stacked files.

    Returns
    -------
    str
        Indexed new string.
    """
    if os.path.isfile(fname):
        body = get_body(fname)
        ext = get_extension(fname)
        indq = 1
        file_exist = False
        while not file_exist:
            fname = body + '-' + '{0:0={1}d}'.format(indq, digit) + ext
            if not os.path.isfile(fname):
                file_exist = True
            else:
                indq += 1
    return fname


def _init_write(arr, fname, ext, dtype, overwrite):
    if not (isinstance(fname, six.string_types)):
        fname = 'tmp/data' + ext
    else:
        if not fname.endswith(ext):
            fname = fname + ext
    fname = os.path.abspath(fname)
    if not overwrite:
        fname = _suggest_new_fname(fname, digit=1)
    _init_dirs(fname)
    if dtype is not None:
        arr = dt.as_dtype(arr, dtype)
    return fname, arr


def _write_hdf5_dataset(h5object, data, dname, appendaxis, maxshape):
    """
    Create a dataset and write data to a specific hdf5 object
    (file or group).

    Parameters
    ----------
    h5object: h5py object
        hdf5 object to write the dataset to.
    data : ndarray
        Array data to be saved.
    dname : str
        dataset name
    appendaxis : int
        Axis of dataset to which data will be appended.
        If given will create a resizable dataset.
    maxshape: tuple
        maxshape of resizable dataset.
        Only used if dataset has not been created.
    """
    if appendaxis is not None:
        if dname not in h5object:
            h5object.create_dataset(dname, data=data,
                                    maxshape=maxshape)
        else:
            size = h5object[dname].shape
            newsize = list(size)
            newsize[appendaxis] += data.shape[appendaxis]
            h5object[dname].resize(newsize)

            slices = 3 * [slice(None, None, None), ]
            slices[appendaxis] = slice(size[appendaxis], None, None)
            h5object[dname][tuple(slices)] = data
    else:
        h5object.create_dataset(dname, data=data)


[docs]def write_hdf5( data, fname='tmp/data.h5', gname='exchange', dname='data', dtype=None, overwrite=False, appendaxis=None, maxsize=None): """ Write data to hdf5 file in a specific group. Parameters ---------- data : ndarray Array data to be saved. fname : str File name to which the data is saved. ``.h5`` extension will be appended if it does not already have one. gname : str, optional Path to the group inside hdf5 file where data will be written. dname : str, optional Name for dataset where data will be written. dtype : data-type, optional By default, the data-type is inferred from the input data. overwrite: bool, optional if True, overwrites the existing file if the file exists. appendaxis: int, optional Axis where data is to be appended to. Must be given when creating a resizable dataset. maxsizee: int, optional Maximum size that the dataset can be resized to along the given axis. """ mode = 'w' if overwrite else 'a' if appendaxis is not None: overwrite = True # True if appending to file so fname is not changed maxshape = list(data.shape) maxshape[appendaxis] = maxsize else: maxshape = maxsize fname, data = _init_write(data, fname, '.h5', dtype, overwrite) with h5py.File(fname, mode=mode) as f: if 'implements' not in f: f.create_dataset('implements', data=gname) if gname not in f: f.create_group(gname) _write_hdf5_dataset(f[gname], data, dname, appendaxis, maxshape)
[docs]def write_netcdf4( data, fname='tmp/data.nc', dname='VOLUME', dtype=None, overwrite=False, appendaxis=None, maxsize=None): """ Write data to netCDF file. Parameters ---------- data : ndarray Array data to be saved. fname : str File name to which the data is saved. """ mode = 'w' if overwrite else 'a' ncfile = netCDF4.Dataset(fname,mode='w',format='NETCDF3_64BIT') x_dim = ncfile.createDimension('NX', data.shape[0]) y_dim = ncfile.createDimension('NY', data.shape[1]) z_dim = ncfile.createDimension('NZ', data.shape[2]) d = ncfile.createVariable(dname, data.dtype, ('NX', 'NY', 'NZ')) d[:,:,:] = data ncfile.close()
[docs]def write_dxf( data, fname='tmp/data.h5', axes='theta:y:x', dtype=None, overwrite=False): """ Write data to a data exchange hdf5 file. Parameters ---------- data : ndarray Array data to be saved. fname : str File name to which the data is saved. ``.h5`` extension will be appended if it does not already have one. axes : str Attribute labels for the data array axes. dtype : data-type, optional By default, the data-type is inferred from the input data. overwrite: bool, optional if True, overwrites the existing file if the file exists. """ fname, data = _init_write(data, fname, '.h5', dtype, overwrite) f = dxfile.dxtomo.File(fname, mode='w') f.add_entry(dxfile.dxtomo.Entry.data(data={ 'value': data, 'units': 'counts', 'description': 'transmission', 'axes': axes})) f.close()
[docs]def write_npy( data, fname='tmp/data.npy', dtype=None, overwrite=False): """ Write data to a binary file in NumPy ``.npy`` format. Parameters ---------- data : ndarray Array data to be saved. fname : str File name to which the data is saved. ``.npy`` extension will be appended if it does not already have one. """ fname, data = _init_write(data, fname, '.npy', dtype, overwrite) np.save(fname, data)
[docs]def write_tiff( data, fname='tmp/data.tiff', dtype=None, overwrite=False): """ Write image data to a tiff file. Parameters ---------- data : ndarray Array data to be saved. fname : str File name to which the data is saved. ``.tiff`` extension will be appended if it does not already have one. dtype : data-type, optional By default, the data-type is inferred from the input data. overwrite: bool, optional if True, overwrites the existing file if the file exists. """ fname, data = _init_write(data, fname, '.tiff', dtype, overwrite) import tifffile tifffile.imsave(fname, data)
[docs]def write_tiff_stack( data, fname='tmp/data.tiff', dtype=None, axis=0, digit=5, start=0, overwrite=False): """ Write data to stack of tiff file. Parameters ---------- data : ndarray Array data to be saved. fname : str Base file name to which the data is saved. ``.tiff`` extension will be appended if it does not already have one. dtype : data-type, optional By default, the data-type is inferred from the input data. axis : int, optional Axis along which stacking is performed. start : int, optional First index of file in stack for saving. digit : int, optional Number of digits in indexing stacked files. overwrite: bool, optional if True, overwrites the existing file if the file exists. """ fname, data = _init_write(data, fname, '.tiff', dtype, True) body = get_body(fname) ext = get_extension(fname) _data = np.swapaxes(data, 0, axis) for m in range(start, start + data.shape[axis]): _fname = body + '_' + '{0:0={1}d}'.format(m, digit) + ext if not overwrite: _fname = _suggest_new_fname(_fname, digit=1) write_tiff(_data[m - start], _fname, overwrite=overwrite)
[docs]def write_vtr(data, fname='tmp/data.vtr', down_sampling=(5, 5, 5)): """ Write the reconstructed data (img stackes) to vtr file (retangular grid) Parameters ---------- data : np.3darray reconstructed 3D image stacks with axis=0 as the omega fname : str file name of the output vtr file down_sampling : tuple down sampling steps along three axes Returns ------- None """ # vtk is only used here, therefore doing an in module import import vtk from vtk.util import numpy_support # convert to unit8 can significantly reduce the output vtr file # size, or just do a severe down-sampling data = _normalize_imgstacks(data[::down_sampling[0], ::down_sampling[1], ::down_sampling[2],]) * 255 # --init rectangular grid rGrid = vtk.vtkRectilinearGrid() coordArray = [vtk.vtkDoubleArray(), vtk.vtkDoubleArray(), vtk.vtkDoubleArray(), ] coords = np.array([np.arange(data.shape[i]) for i in range(3)]) coords = [0.5 * np.array([3.0 * coords[i][0] - coords[i][0 + int(len(coords[i]) > 1)]] + \ [coords[i][j-1] + coords[i][j] for j in range(1,len(coords[i]))] + \ [3.0 * coords[i][-1] - coords[i][-1 - int(len(coords[i]) > 1)]] ) for i in range(3) ] grid = np.array(list(map(len,coords)),'i') rGrid.SetDimensions(*grid) for i,points in enumerate(coords): for point in points: coordArray[i].InsertNextValue(point) rGrid.SetXCoordinates(coordArray[0]) rGrid.SetYCoordinates(coordArray[1]) rGrid.SetZCoordinates(coordArray[2]) # vtk requires x to be the fast axis # NOTE: # Proper coordinate transformation is required to connect the # tomography data with other down-stream analysis (such as FF-HEDM # and NF-HEDM). imgstacks = np.swapaxes(data, 0, 2) VTKarray = numpy_support.numpy_to_vtk(num_array=imgstacks.flatten().astype(np.uint8), deep=True, array_type=vtk.VTK_UNSIGNED_CHAR, ) VTKarray.SetName('img') rGrid.GetCellData().AddArray(VTKarray) rGrid.Modified() if vtk.VTK_MAJOR_VERSION <= 5: rGrid.Update() # output to file writer = vtk.vtkXMLRectilinearGridWriter() writer.SetFileName(fname) writer.SetDataModeToBinary() writer.SetCompressorTypeToZLib() if vtk.VTK_MAJOR_VERSION <= 5: writer.SetInput(rGrid) else: writer.SetInputData(rGrid) writer.Write()
[docs]def write_aps_1id_report(df_scanmeta, reportfn): """ Generate report of beam conditions based on given DataFrame of the metadata Parameters ---------- df_scanmeta : pd.DataFrame DataFrame of the parsed metadata dxreader.read_aps_1id_metafile(log_file) reportfn : str Output report file name (include path) Returns ------- pd.DataFrame Updated Dataframe with added beam conditions """ import matplotlib.pyplot as plt # add calculation of four beam quality # -- Temporal Beam Stability df_scanmeta['TBS'] = df_scanmeta['IC-E3']/df_scanmeta['IC-E3'].values[0] # -- Vertical Beam Stability df_scanmeta['VBS'] = df_scanmeta['IC-E1']/df_scanmeta['IC-E2'] # -- Beam Loss at Slit df_scanmeta['BLS'] = (df_scanmeta['IC-E1'] + df_scanmeta['IC-E2'])/df_scanmeta['IC-E3'] # -- Beam Loss during Travel df_scanmeta['BLT'] = df_scanmeta['IC-E5']/df_scanmeta['IC-E3'] # -- corresponding color code pltlbs = ['TBS', 'VBS', 'BLS', 'BLT' ] pltclrs = ['red', 'blue', 'magenta', 'cyan'] # start plot fig = plt.figure(figsize=(8,3)) ax = fig.add_subplot(111) lnclrs = cycle(['gray', 'lime', 'gray', 'black']) # -- plot one segment at a time for lb, clr in zip(pltlbs, pltclrs): addlabel=True for layerID in df_scanmeta['layerID'].unique(): tmpdf = df_scanmeta[df_scanmeta['layerID'] == layerID] for imgtype in tmpdf['type'].unique(): # plot the main curve currentSlice = tmpdf[tmpdf['type'] == imgtype] ax.plot(currentSlice['Date'], currentSlice[lb], linewidth=0.2, color=clr, label=lb if addlabel else '_nolegend_', alpha=0.5, ) addlabel = False # add the vertical guard tmpx = currentSlice['Date'].values tmpclr = next(lnclrs) for x in [tmpx[0], tmpx[-1]]: ax.plot([x, x], [1e-4, 1e2], color=tmpclr, linewidth=0.05, linestyle='dashed', alpha=0.1, ) # -- set canvas property ax.set_yscale('log') plt.legend(loc=0) plt.ylim([0.9, 2.0]) # 10% as cut range plt.xticks(rotation=45) # -- save the figure (both pdf and png) plt.savefig(reportfn, transparent=True, bbox_inches='tight', pad_inches=0.1, ) # -- clear/close figure plt.close() return df_scanmeta
def _normalize_imgstacks(img): """ Normalize image stacks on a per layer base Parameters ---------- img : np.3darray img stacks to be normalized Returns ------- np.3darray normalized image stacks """ return img/np.amax(img.reshape(img.shape[0], img.shape[1]*img.shape[2], ), axis=1, ).reshape(img.shape[0],1,1)