Source code for dnaMD.dnaMD

#!/usr/bin/env python
#
#
# This file is part of do_x3dna
#
# Author: Rajendra Kumar
# Copyright (C) 2014-2025  Rajendra Kumar
#
# do_x3dna uses 3DNA package (http://x3dna.org).
# Please cite the original publication of the 3DNA package:
# Xiang-Jun Lu & Wilma K. Olson (2003)
# 3DNA: a software package for the analysis, rebuilding and visualization of
# three-dimensional nucleic acid structures
# Nucleic Acids Res. 31(17), 5108-21.
#
# do_x3dna is a free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# do_x3dna is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with do_x3dna.  If not, see <http://www.gnu.org/licenses/>.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#============================================================================

import numpy as np
import re
import os
import sys
import string
import random
import math
import subprocess as sub
try:
    from scipy.interpolate import splprep, splev
    from scipy.spatial import distance as sp_dist
    scipy_imported = True
except:
    scipy_imported = False

HAVE_H5PY = False
try:
    import h5py
    HAVE_H5PY = True
except:
    pass


# parameter count -- equal to basepairs
basePairParameters = ['shear', 'stretch', 'stagger', 'buckle', 'propeller', 'opening' ]
backboneDihedrals =  [  'alpha S1', 'beta S1', 'gamma S1', 'delta S1', 'epsilon S1', 'zeta S1', 'chi S1',
                        'alpha S2', 'beta S2', 'gamma S2', 'delta S2', 'epsilon S2', 'zeta S2', 'chi S2'
                     ]

# parameter count -- less than one of basepairs
baseStepParameters = [ 'shift', 'slide', 'rise', 'tilt', 'roll', 'twist' ]
helicalBaseStepParameters = [ 'x-disp', 'y-disp', 'h-rise', 'inclination', 'tip', 'h-twist' ]
helicalRadiusParameters = [ 'radius s-1', 'radius s-2']
helicalAxisParameters = [   'helical x-axis', 'helical y-axis', 'helical z-axis' ]
helicalAxisSmoothParameters =  [ 'helical x-axis smooth', 'helical y-axis smooth', 'helical z-axis smooth' ]
groovesParameters = [ 'major groove', 'major groove refined',
                      'minor groove', 'minor groove refined' ]
bendingParameters = [ 'curvature',  'tangent']


# Parameters affected by masked
maskedParameters = helicalAxisSmoothParameters + bendingParameters

# Combine pasepair type parameters
basePairTypeParameters = basePairParameters + backboneDihedrals

# Combine basestep type parameters
baseStepTypeParameters = baseStepParameters + helicalBaseStepParameters \
                + helicalRadiusParameters + helicalAxisParameters \
                + helicalAxisSmoothParameters + groovesParameters \
                + bendingParameters


def getParameterType(param):
    if param in basePairTypeParameters:
        return 'bp'

    if param in baseStepTypeParameters:
        return 'bps'




[docs]
class DNA:
    """DNA class stores all data obtained from the input files.

    **To initialize this class:** ::

        dna = DNA(60)       # 60 is the number of basepairs

    This class also contains several methods (functions) that are discussed in following sections.

    The data inside the class is organized as a dictionary in following architecture:

    .. code-block:: bash

                 |------- bp  ---- BP number      ----  Parameter array (1D/2D)
        data-----|
                 |------- bps ---- BP Step Number ----  Parameter array (1D/2D)


    Parameters
    ----------
    num_bp : int
        Number of base-pairs in the DNA.
    filename : str
        Name of HDF5 file
    startBP : int
        Number ID of first basepair.


    Attributes
    ----------
    num_bp : int
        Number of base-pairs in the DNA.
    num_step : int
        Number of base-steps in the DNA.
    filename : str
        Name of HDF5 file
    startBP : int
        Number ID of first basepair.
    smooth_axis : bool
        If axis is smoothed and global axis is already determined.
    data : dictionary
        Dictionary of data. All data are stored in this dictionary.
    time : ndarray
        Trajectory time.
    mask : ndarray
        Boolean array indicating which frame of trajectory should be masked.
    h5 : object
        h5py.File object to read the data directly from file.


    """

    def __init__(self, num_bp, filename=None, startBP=1):
        self.num_bp = num_bp
        self.num_step = num_bp - 1
        self.filename = filename
        self.startBP = startBP
        self.smooth_axis = False
        self.data = dict()
        self.data['bp'] = dict()
        self.data['bps'] = dict()
        self.time = []
        self.mask = None
        self.h5 = None

        # Check h5py is installed or not
        if self.filename is not None and not HAVE_H5PY:
            raise ImportError( 'h5py module is not installed. Please use "pip install h5py" or "pip3 install h5py" to install h5py.' )

        for i in range(self.startBP, self.startBP + num_bp):
            self.data['bp'][str(i)] = dict()

        for i in range(self.startBP, self.startBP + self.num_step):
            self.data['bps'][str(i)] = dict()


        self._openFile()

    def __del__(self):
        if self.filename is not None and self.h5 is not None:
            try:
                self.h5.close()
            except:
                pass


[docs]
    def _openFile(self):
        """ Open the HDF5 file for reading/writing.

        This methad is called during the initialization of this class.
        """
        if self.filename is not None:
            self.h5 = h5py.File(self.filename, 'a')
        else:
            return

        if 'bp' not in self.h5:
            self.h5.create_group('bp')

        if 'bps' not in self.h5:
            self.h5.create_group('bps')

        for bp_type in ['bp', 'bps']:
            for bp_num in self.data[bp_type]:

                # Create group in case if new hdf5 file is opened
                if bp_num not in self.h5[bp_type]:
                    self.h5[bp_type].create_group(bp_num)

                # Sync data if old file is opened
                for parameters in self.h5[bp_type][bp_num]:
                    self.data[bp_type][bp_num][parameters] = self.h5[bp_type][bp_num][parameters][:]

        if 'mask' in self.h5:
            self.mask = self.h5['mask'][:]

        if 'time' in self.h5:
            self.time = self.h5['time'][:]

        if 'num_bp' in self.h5.attrs:
            self.num_bp = self.h5.attrs['num_bp']
        else:
            self.h5.attrs['num_bp'] = self.num_bp

        if 'num_step' in self.h5.attrs:
            self.num_step = self.h5.attrs['num_step']
        else:
            self.h5.attrs['num_step'] = self.num_step

        if 'startBP' in self.h5.attrs:
            self.startBP = self.h5.attrs['startBP']
        else:
            self.h5.attrs['startBP'] = self.startBP



[docs]
    def _set_time(self, time):
        """ Set time in both class and hdf5 file
        """
        if len(self.time) == 0 :
            self.time = np.array(time)
            if self.h5 is not None:
                self.h5.create_dataset('time', self.time.shape, dtype=self.time.dtype, data=self.time, compression="gzip", shuffle=True, scaleoffset=3)
        else:
            if(len(time) != len(self.time)):
                raise AssertionError("\nTime or number of frame mismatch in input files.\n Exiting...\n")



[docs]
    def _set_mask(self, mask):
        """ Set mask array in both class and hdf5 file
        """
        self.mask = mask.copy()
        if self.h5 is not None:
            if 'mask' in self.h5:
                self.h5.pop('mask')
            self.h5.create_dataset('mask', mask.shape, dtype=self.mask.dtype, data=mask, compression="gzip", shuffle=True)


    def _set_data(self, data, bp_type, bp_num, parameter, scaleoffset=3):
        if self.h5 is None:
            self.data[bp_type][bp_num][parameter] = data
        else:
            if parameter in self.h5[bp_type][bp_num]:          # Remove old data
                self.h5[bp_type][bp_num].pop( parameter )

            self.h5[bp_type][bp_num].create_dataset(parameter, data.shape, dtype=data.dtype, data=data, compression="lzf", shuffle=True, scaleoffset=scaleoffset)
            self.data[bp_type][bp_num][parameter] = self.h5[bp_type][bp_num][parameter]


[docs]
    def get_parameters(self, parameter, bp, bp_range=True, masked=False):
        """To get the parameters over all frame for the given range of base pair/steps

        parameters
        ----------
        parameter : str
            Input parameter name.
            Parameter from following list are accepted:

                * ``shear``
                * ``stretch``
                * ``stagger``
                * ``buckle``
                * ``propeller``
                * ``opening``
                * ``shift``
                * ``slide``
                * ``rise``
                * ``tilt``
                * ``roll``
                * ``twist``
                * ``x-disp``
                * ``y-disp``
                * ``h-Rise``
                * ``inclination``
                * ``tip``
                * ``h-Twist``
                * ``helical X-axis``
                * ``helical Y-axis``
                * ``helical z-axis``
                * ``helical X-axis smooth``
                * ``helical Y-axis smooth``
                * ``helical z-axis smooth``
                * ``curvature``
                * ``tangent``
                * ``radius S-1``
                * ``radius S-2``
                * ``major Groove``
                * ``major Groove Refined``
                * ``minor Groove``
                * ``minor Groove Refined``
                * ``alpha S1``
                * ``beta S1``
                * ``gamma S1``
                * ``delta S1``
                * ``epsilon S1``
                * ``zeta S1``
                * ``chi S1``
                * ``alpha S2``
                * ``beta S2``
                * ``gamma S2``
                * ``delta S2``
                * ``epsilon S2``
                * ``zeta S2``
                * ``chi S2``

        bp : 1D list
            List of base-pairs to analyze
            Example: ::

                bp = [6]                                # bp_range = False
                bp = [4,15]                             # bp_range = True
                bp = range(4,15)                        # bp_range = False
                bp = np.arange(4,15)                    # bp_range = False
                bp = [2,5,6,7,9,12,18]                  # bp_range = False

        bp_range : bool
            ``Default=True``: As shown above, if ``True``, bp is taken as a range otherwise list or numpy array.

        masked : bool
            ``Default=False``: To skip specific frames/snapshots. dnaMD.DNA.mask array should
            be set to use this functionality. This array contains boolean (either ``True`` or ``False``)
            value for each frame to mask the frames. Presently, mask array is automatically generated
            during :meth:`DNA.generate_smooth_axis` method to skip those frames where 3D fitting
            curve was not successful within the given critera.


        Returns
        -------
        parameters : 2D list
            ``parameters[bp][nframe] (2D list)``: where bp is number of base pairs/steps and nframe is total number of frames in the trajectory.

        """

        bpIndex, dum = get_idx_of_bp_parameters(bp, [], bp_range, startBP=self.startBP)
        append = False
        empty = False
        key = 'dummy'
        idx = 0
        data = []
        midx = []

        # Masking values according to mask array
        if masked and self.mask is None:
            print(" WARNING: mask is not set. Mask is set during helical axis smoothing. \n")
            masked = False

        for i in range(len(self.time)):
            if masked:
                if self.mask[i] == False:
                    midx.append(i)
            else:
                midx.append(i)

        param_type = getParameterType(parameter)
        if param_type is None:
            raise ValueError('ERROR: Incorrect parameter keyword: \"{0}\" .\n' .format(parameter))

        outData, bp_nums = [], []
        for i in range(len(bpIndex)):
            num = str( bpIndex[i]+self.startBP )
            if parameter in self.data[param_type][num]:
                tData = self.data[param_type][num][parameter][:]
                outData.append(tData[midx])
                bp_nums.append(num)
            else:
                raise ValueError('ERROR: The parameter \"{0}\" for base pair/step \"{1}\" is not set/loaded.\n' .format(parameter, num))

        return outData, bpIndex



[docs]
    def time_vs_parameter(self, parameter, bp, merge=False, merge_method='mean', masked=False):
        """To get the parameter of either a specfic base-pair/step or a DNA segment as a function of time.

        parameters
        ----------
        parameter : str
            Name of a base-pair or base-step or helical parameter.
            For details about accepted keywords, see ``parameter`` in the
            method :meth:`DNA.get_parameters`.

        bp : 1D list or array
            base-pairs to analyze
            Example: ::

                bp = [6]                                  # merge = False
                bp = [4,15]                               # merge = True

        merge : bool
            ``Default=False``. As shown above, if ``True``, bp should a list of
            range otherwise a list of single value. If ``bp = True``, the
            parameter for the respective DNA segment could be merged or
            calculated by ``merge_method``.

        merge_method : str
            Method to calculate the parameter of a DNA segment from local
            parameters of all base-pairs/steps that are between the range
            given through ``bp``.
            Currently accepted keywords are as follows:

                * ``merge_method = mean``: Average of local parameters
                * ``merge_method = sum``: Sum of local parameters

        masked : bool
            ``Default=False``. To skip specific frames/snapshots.
            ``DNA.mask`` array should be set to use this functionality.
            This array contains boolean (either ``True`` or ``False``) value
            for each frame to mask the frames. Presently, mask array is
            automatically generated during :meth:`DNA.generate_smooth_axis` to
            skip those frames where 3D fitting curve was not successful within
            the given criteria.


        Returns
        -------
        time : 1D array
            Array containing time of each frame from trajectory
        value : 1D array
            Array containing parameter values for each frame from trajectory

        """
        if not (isinstance(bp, list) or isinstance(bp, np.ndarray)):
            raise AssertionError(
                "type %s is not list or np.ndarray" % type(bp))

        if (len(bp) > 1) and (merge == False):
            raise AssertionError(
                "bp %s contains more than two values, whereas merge=False. Use either one value in bp or merge=True" % bp)
            exit(1)

        if len(bp) == 1:
            merge = False

        if (merge == True) and not ((merge_method == 'mean') or (merge_method == 'sum')):
            raise AssertionError(
                "merge method %s is not available." % merge_method)

        # Masking values according to mask array
        midx = []
        if masked and self.mask is None:
            print(" WARNING: mask is not set. Mask is set during helical axis smoothing. \n")
            masked = False

        for i in range(len(self.time)):
            if masked:
                if self.mask[i] == False:
                    midx.append(i)
            else:
                midx.append(i)

        if len(bp) == 1:
            param_value, bp_idx = self.get_parameters(parameter, bp, bp_range=False, masked=masked)
        else:
            param_value, bp_idx = self.get_parameters(parameter, bp, bp_range=True, masked=masked)

        if (merge == True) and (merge_method == 'mean'):
            return self.time, np.mean(param_value, axis=0)

        elif (merge == True) and (merge_method == 'sum'):
            return self.time[midx], np.sum(param_value, axis=0)

        else:
            return self.time[midx], param_value[0]



[docs]
    def parameter_distribution(self, parameter, bp, bins=30, merge=False, merge_method='mean', masked=False):
        """To get the parameter distribution of either a specific base-pair/step or a DNA segment over the MD simulation.

        parameters
        ----------
        parameter : str
            Name of a base-pair or base-step or helical parameter
            For details about accepted keywords, see ``parameter`` in the method
            :meth:`DNA.get_parameters`.

        bp : 1D list or array
            base-pairs to analyze
            Example: ::

                bp = [6]                                  # merge = False
                bp = [4,15]                               # merge = True

        bins int
            Number of bins to calculate histogram

        merge : bool
            ``Default=False``: As shown above, if ``True``, bp should a list of
            range otherwise a list of single value. If ``bp = True``, the
            parameter for the respective DNA segment could be merged or
            calculated by ``merge_method``.

        merge_method : str
            Method to calculate the parameter of a DNA segment from local
            parameters of all base-pairs/steps that are between the range given
            through ``bp``.
            Currently accepted keywords are as follows:

                * ``merge_method = mean``: Average of local parameters
                * ``merge_method = sum``: Sum of local parameters

        masked : bool
            ``Default=False``. To skip specific frames/snapshots.
            ``DNA.mask`` array should be set to use this functionality.
            This array contains boolean (either ``True`` or ``False``) value
            for each frame to mask the frames. Presently, mask array is
            automatically generated during :meth:`DNA.generate_smooth_axis` to
            skip those frames where 3D fitting curve was not successful within
            the given criteria.


        Returns
        -------
        values : 1D array
            Array containing parameter values
        density : 1D array
            Array containing density for respective parameter values

        """
        if not (isinstance(bp, list) or isinstance(bp, np.ndarray)):
            raise AssertionError(
                "type %s is not list or np.ndarray" % type(bp))

        if (len(bp) > 1) and (merge == False):
            raise AssertionError(
                "bp %s contains more than two values, whereas merge=False. Use either one value in bp or merge=True" % bp)
            exit(1)

        if len(bp) == 1:
            merge = False

        if (merge == True) and not ((merge_method == 'mean') or (merge_method == 'sum')):
            raise AssertionError(
                "merge method %s is not available." % merge_method)
            exit(1)

        if len(bp) == 1:
            param_value, bp_idx = self.get_parameters(
                parameter, bp, bp_range=False, masked=masked)
        else:
            param_value, bp_idx = self.get_parameters(
                parameter, bp, bp_range=True, masked=masked)

        if (merge == True) and (merge_method == 'mean'):
            param_value = np.mean(param_value, axis=0)

        elif (merge == True) and (merge_method == 'sum'):
            param_value = np.sum(param_value, axis=0)

        else:
            param_value = param_value[0]

        density, bin_edges = np.histogram(param_value, bins=bins, density=True)
        bin_width = bin_edges[1] - bin_edges[0]

        density = np.insert(density, 0, 0.0)
        density = np.append(density, 0.0)

        values = []
        for i in range(len(bin_edges) - 1):
            values.append((bin_edges[i] + bin_edges[i + 1]) / 2)

        values = np.asarray(values)
        values = np.append(values, values[-1] + bin_width)
        values = np.insert(values, 0, values[0] - bin_width)

        return np.array(values), density



[docs]
    def set_base_pair_parameters(self, filename, bp, parameters='all', bp_range=True):
        """To read and store basepairs parameters (shear, stretch, stagger, buckle, propeller and opening) from an input file.

        parameters
        ----------
        filename : str
            Input file, which is generated from do_x3dna. e.g. L-BP_g.dat

        bp : 1D list or array
            base-pairs to analyze
            Example: ::

                bp = [6]                                # bp_range = False
                bp = [4,15]                             # bp_range = True
                bp = range(4,15)                        # bp_range = False
                bp = np.arange(4,15)                    # bp_range = False
                bp = [2,5,6,7,9,12,18]                  # bp_range = False

        parameters : 1D list
            List of base-pairs parameters as follows:

                * ``shear``
                * ``stretch``
                * ``stagger``
                * ``buckle4``
                * ``propeller``
                * ``opening6``

            By default all six parameters will be extracted from the file.

        bp_range : bool
            ``Default=True``: As shown above, if ``True``, bp is taken as a range otherwise list or numpy array.

        """
        if not (isinstance(bp, list) or isinstance(bp, np.ndarray)):
            raise AssertionError(
                "type %s is not list or np.ndarray" % type(bp))

        if not (isinstance(parameters, list) or isinstance(parameters, np.ndarray)):
            if parameters == 'all':
                parameters = ['shear', 'stretch', 'stagger', 'buckle', 'propeller', 'opening']
            else:
                raise ValueError("ERROR: {0} is not accepted parameters!!! It should be either \"all\" or list of parameter names.".format(parameters))

        targetParameters = { 1:'shear', 2:'stretch', 3:'stagger', 4:'buckle', 5:'propeller', 6:'opening' }
        targetParametersReverse = dict((v,k) for k,v in targetParameters.items())

        # Check if requested parameters found within input file
        gotParametersInputFile = checkParametersInputFile(filename)
        if gotParametersInputFile is None:
            raise IOError(' Something wrong in input file {0}.\n Cannot read parameters.\n File should be an output from do_x3dna.'.format(filename))
        for parameter in parameters:
            if parameter not in gotParametersInputFile:
                raise ValueError(' Parameter {0} not found in input file. \n This file contains following parameters: \n {1}'.format(parameter, gotParametersInputFile))


        InputParamIndex = []
        for parameter in parameters:
            if parameter in targetParameters.values():
                InputParamIndex.append( targetParametersReverse[parameter] )
            else:
                print('\nWARNING: base pair parameters \"{0}\" not accepted. Skipping it !!\n' .format(parameter))

        if not InputParamIndex:
            raise ValueError("No acceptable base-pair parameters found!!!")

        data, time = read_param_file(filename, InputParamIndex, bp, bp_range, startBP=self.startBP)
        self._set_time(time)
        bpIndex, OutParamIndex = get_idx_of_bp_parameters(bp, InputParamIndex, bp_range, startBP=self.startBP)

        for i in range(len(data)):
            for j in range(len(data[i])):
                bp_num = str( bpIndex[i]+self.startBP )
                param = targetParameters[OutParamIndex[j]+1]
                self._set_data(data[i][j], 'bp', bp_num, param, scaleoffset=2)



[docs]
    def set_major_minor_groove(self, filename, bp_step, parameters='all', step_range=True):
        """To read and store Major and Minor grooves from an input file.

        * Minor groove : direct P-P distance
        * Minor Groove Refined : refined P-P distance which take into account the directions of the sugar-phosphate backbones
        * Major groove : direct P-P distance
        * Major Groove Refined : refined P-P distance which take into account the directions of the sugar-phosphate backbones

        .. warning::

                * The major and minor grooves (direct P-P) cannot be calculated for first and last two base-steps
                * The major and minor grooves (refined P-P) cannot be calculated for first and last three base-steps

        Parameters
        ----------
        filename : str
            Input file, which is generated from do_x3dna. e.g. MGroove_g.dat

        bp_step : 1D list or array
            base-steps to analyze
            Example: ::

                bp_step = [6]                                # step_range = False
                bp_step = [4,15]                             # step_range = True
                bp_step = range(4,15)                        # step_range = False
                bp_step = np.arange(4,15)                    # step_range = False
                bp_step = [2,5,6,7,9,12,18]                  # step_range = False

        parameters : 1D list
            List of groove parameter names as follows:
                * ``minor groove``
                * ``minor groove refined``
                * ``major groove``
                * ``major groove refined``

            By default all four groove parameters will be extracted from the file.

        step_range : bool
            ``Default=True``: As shown above, if ``True``, bp_step is taken as a range otherwise list or numpy array.

        """
        if not (isinstance(bp_step, list) or isinstance(bp_step, np.ndarray)):
            raise AssertionError(
                "type %s is not list or np.ndarray" % type(bp_step))

        if not (isinstance(parameters, list) or isinstance(parameters, np.ndarray)):
            if parameters == 'all':
                parameters = ['minor groove', 'minor groove refined', 'major groove', 'major groove refined']
            else:
                raise ValueError("ERROR: {0} is not accepted parameters!!! It should be either \"all\" or list of parameter names.".format(parameters))

        targetParameters = { 1:'minor groove', 2:'minor groove refined', 3:'major groove', 4:'major groove refined' }
        targetParametersReverse = dict((v,k) for k,v in targetParameters.items())

        # Check if requested parameters found within input file
        gotParametersInputFile = checkParametersInputFile(filename)
        if gotParametersInputFile is None:
            raise IOError(' Something wrong in input file {0}.\n Cannot read parameters.\n File should be an output from do_x3dna.'.format(filename))
        for parameter in parameters:
            if parameter not in gotParametersInputFile:
                raise ValueError(' Parameter {0} not found in input file. \n This file contains following parameters: \n {1}'.format(parameter, gotParametersInputFile))

        InputParamIndex = []
        for parameter in parameters:
            if parameter in targetParameters.values():
                InputParamIndex.append( targetParametersReverse[parameter] )
            else:
                print('\nWARNING: base pair parameters \"{0}\" not accepted. Skipping it !!\n' .format(parameter))

        data, time = read_param_file(filename, InputParamIndex, bp_step, step_range, word=True, startBP=self.startBP)
        self._set_time(time)

        bpIndex, OutParamIndex = get_idx_of_bp_parameters(bp_step, InputParamIndex, step_range, startBP=self.startBP)

        for i in range(len(data)):
            for j in range(len(data[i])):
                # terminal base-steps do not have major and minor grooves
                if data[i][j][0] != None:
                    bp_num = str( bpIndex[i]+self.startBP )
                    param = targetParameters[OutParamIndex[j]+1]
                    self._set_data(np.asarray(data[i][j], dtype=float), 'bps', bp_num, param, scaleoffset=1)



[docs]
    def set_backbone_dihedrals(self, filename, bp, parameters='all', bp_range=True):
        """To read and store backbone dihedrals (alpha, beta, gamma, delta, epsilon and zeta) and chi dihedral of both strands from an input file.

        .. note::

                * alpha:   O3'(i-1)-P-O5'-C5'
                * beta:    P-O5'-C5'-C4'
                * gamma:   O5'-C5'-C4'-C3'
                * delta:   C5'-C4'-C3'-O3'
                * epsilon: C4'-C3'-O3'-P(i+1)
                * zeta:    C3'-O3'-P(i+1)-O5'(i+1)
                * chi for pyrimidines(Y): O4'-C1'-N1-C2
                * chi for purines(R): O4'-C1'-N9-C4


        Parameters
        ----------
        filename : str
            Input file, which is generated from do_x3dna. e.g. BackBoneCHiDihedrals_g.dat

        bp : 1D list or array
            base-pairs to analyze
            Example: ::

                bp = [6]                                # bp_range = False
                bp = [4,15]                             # bp_range = True
                bp = range(4,15)                        # bp_range = False
                bp = np.arange(4,15)                    # bp_range = False
                bp = [2,5,6,7,9,12,18]                  # bp_range = False

        parameters : str or 1D list
            Either ``All`` to extract all angles or list of angles as follows:

                * ``alpha S1``
                * ``beta S1``
                * ``gamma S1``
                * ``delta S1``
                * ``epsilon S1``
                * ``zeta S1``
                * ``chi S1``
                * ``alpha S2``
                * ``beta S2``
                * ``gamma S2``
                * ``delta S2``
                * ``epsilon S2``
                * ``zeta S2``
                * ``chi S2``

        bp_range : bool
            ``Default=True``: As shown above, if ``True``, bp is taken as a range otherwise list or numpy array.

        """
        if not (isinstance(bp, list) or isinstance(bp, np.ndarray)):
            raise AssertionError(
                "type %s is not list or np.ndarray" % type(bp))

        if not (isinstance(parameters, list) or isinstance(parameters, np.ndarray)):
            if parameters == 'all':
                parameters = [ 'alpha S1', 'beta S1', 'gamma S1', 'delta S1', 'epsilon S1', 'zeta S1', 'chi S1',
                               'alpha S2', 'beta S2', 'gamma S2', 'delta S2', 'epsilon S2', 'zeta S2', 'chi S2'  ]
            else:
                raise ValueError(" ERROR: {0} is not accepted parameters!!! It should be either \"all\" or list of parameter names.".format(parameters) )

        targetParameters = { 1:'alpha S1', 2:'beta S1',  3:'gamma S1',  4:'delta S1',  5:'epsilon S1',  6:'zeta S1',  7:'chi S1',
                             8:'alpha S2', 9:'beta S2', 10:'gamma S2', 11:'delta S2', 12:'epsilon S2', 13:'zeta S2', 14:'chi S2'  }
        targetParametersReverse = dict((v,k) for k,v in targetParameters.items())

        # Check if requested parameters found within input file
        gotParametersInputFile = checkParametersInputFile(filename)
        if gotParametersInputFile is None:
            raise IOError(' Something wrong in input file {0}.\n Cannot read parameters.\n File should be an output from do_x3dna.'.format(filename))
        for parameter in parameters:
            if parameter not in gotParametersInputFile:
                raise ValueError(' Parameter {0} not found in input file. \n This file contains following parameters: \n {1}'.format(parameter, gotParametersInputFile))

        InputParamIndex = []
        for parameter in parameters:
            if parameter in targetParameters.values():
                InputParamIndex.append( targetParametersReverse[parameter] )
            else:
                print('\nWARNING: base pair parameters \"{0}\" not accepted. Skipping it !!\n' .format(parameter))

        if not InputParamIndex:
            raise ValueError("No acceptable base-pair parameters found!!!")

        data, time = read_param_file(filename, InputParamIndex, bp, bp_range, word=True, startBP=self.startBP)
        self._set_time(time)

        bpIndex, OutParamIndex = get_idx_of_bp_parameters(bp, InputParamIndex, bp_range, startBP=self.startBP)

        for i in range(len(data)):
            bp_num = str( bpIndex[i]+self.startBP )
            for j in range(len(data[i])):
                if data[i][j][0] != None:
                    param = targetParameters[OutParamIndex[j]+1]
                    self._set_data(np.asarray(data[i][j], dtype=float), 'bp', bp_num, param, scaleoffset=1)



[docs]
    def set_helical_radius(self, filename, bp, atomname='P', full=False, bp_range=True):
        """To read and set local helical radius of both strand

        Parameters
        ----------
        filename : str
            Input file, which is generated from do_x3dna. e.g. HelixRad_g.dat

        bp : 1D list or array
            base-pairs to analyze
            Example: ::

                bp = [6]                                # bp_range = False
                bp = [4,15]                             # bp_range = True
                bp = range(4,15)                        # bp_range = False
                bp = np.arange(4,15)                    # bp_range = False
                bp = [2,5,6,7,9,12,18]                  # bp_range = False

        atomname : str
            Atom name to consider for the DNA helix (accepted keywords:
            ``P``, ``O4*``, ``O4'``, ``C1*`` and ``C1``)

        full : bool
            To calculate full helical radius. Overrides atomname option and
            uses atom ``P``, and 1 A is added to the radius calculated by 3DNA
            package

        bp_range : bool
            ``Default=True``: As shown above, if ``True``, bp is taken as a
            range otherwise list or numpy array.

        """
        if not (isinstance(bp, list) or isinstance(bp, np.ndarray)):
            raise AssertionError(
                "type %s is not list or np.ndarray" % type(bp))

        if not ((atomname == 'P') or (atomname == 'O4*') or (atomname == 'C1*') or (atomname == 'O4\'') or (atomname == 'C1\'')):
            print (
                '\n This atomname {0} is not implemented... Exiting\n' .format(atomname))

        # Check if requested parameters found within input file
        gotParametersInputFile = checkParametersInputFile(filename)
        if gotParametersInputFile is None:
            raise IOError(' Something wrong in input file {0}.\n Cannot read parameters.\n File should be an output from do_x3dna.'.format(filename))
        for p in helicalRadiusParameters:
            if p not in gotParametersInputFile:
                raise ValueError(' Helical radius not found in input file. \n This file contains following parameters: \n {0}'.format(gotParametersInputFile))

        parameter = []
        if (atomname == 'P') or (full):
            parameter = [1, 4]

        if (atomname == 'O4*' or atomname == 'O4\'') and (not full):
            parameter = [2, 5]

        if (atomname == 'C1*' or atomname == 'C1\'') and (not full):
            parameter = [3, 6]


        data, time = read_param_file(filename, [1, 2, 3, 4, 5, 6], bp, bp_range, startBP=self.startBP)
        self._set_time(time)

        bp_idx, param_idx = get_idx_of_bp_parameters(bp, parameter, bp_range, startBP=self.startBP)

        if full:
            data = np.add(data, 1.0)

        for i in range(len(data)):
            bp_num = str( bp_idx[i]+self.startBP )
            if (atomname == 'P') or (full):
                self._set_data(data[i][0], 'bps', bp_num, 'radius s-1', scaleoffset=1)
                self._set_data(data[i][3], 'bps', bp_num, 'radius s-2', scaleoffset=1)

            if (atomname == 'O4*' or atomname == 'O4\''):
                self._set_data(data[i][1], 'bps', bp_num, 'radius s-1', scaleoffset=1)
                self._set_data(data[i][4], 'bps', bp_num, 'radius s-2', scaleoffset=1)

            if (atomname == 'C1*' or atomname == 'C1\''):
                self._set_data(data[i][2], 'bps', bp_num, 'radius s-1', scaleoffset=1)
                self._set_data(data[i][5], 'bps', bp_num, 'radius s-2', scaleoffset=1)



[docs]
    def set_base_step_parameters(self, filename, bp_step, parameters='all', step_range=True, helical=False):
        """To read and store base-step (Shift, Slide, Rise, Tilt, Roll and Twist) and
        helical base-step (X-disp, Y-disp, h-Rise, Inclination, Tip and h-Twist) parameters from an input file

        Parameters
        ----------
        filename : str
            Input file, which is generated from do_x3dna. e.g. L-BPS_g.dat or L-BPH_g.dat.

        bp_step : 1D list or array
            base-steps to analyze
            Example: ::

                bp_step = [6]                                # step_range = False
                bp_step = [4,15]                             # step_range = True
                bp_step = range(4,15)                        # step_range = False
                bp_step = np.arange(4,15)                    # step_range = False
                bp_step = [2,5,6,7,9,12,18]                  # step_range = False

        parameters : str or 1D list
            Either ``All`` to extract all parameter available in input file
            or list of either base-step or helical base-step parameter as follows:

            If helical = ``False``:

                * ``shift``
                * ``slide``
                * ``rise``
                * ``tilt``
                * ``roll``
                * ``twist``

            If helical = ``True``:

                * ``x-disp``
                * ``y-disp``
                * ``h-rise``
                * ``inclination``
                * ``tip``
                * ``h-twist``

        step_range : bool
            ``Dfault=True``: As shown above, if ``True``, bp_step is taken as a range otherwise list or numpy array.

        helical : bool
            If ``True``, parameters in input file will be considered as helical base-steps parameters
            If ``False``, parameters will be considered as base-steps parameters.

        """
        if not (isinstance(bp_step, list) or isinstance(bp_step, np.ndarray)):
            raise AssertionError(
                "type %s is not list or np.ndarray" % type(bp_step))

        if not (isinstance(parameters, list) or isinstance(parameters, np.ndarray)):
            if parameters == 'all' and not helical:
                parameters = ['shift', 'slide', 'rise', 'tilt', 'roll', 'twist']
            elif parameters == 'all' and helical:
                parameters = ['x-disp', 'y-disp', 'h-rise', 'inclination', 'tip', 'h-twist']
            else:
                raise ValueError(" ERROR: {0} is not accepted parameters!!! It should be either \"all\" or list of parameter names.".format(parameters) )

        targetParameters = None
        targetParametersReverse = None
        if helical:
            targetParameters = { 1:'x-disp', 2:'y-disp', 3:'h-rise', 4:'inclination', 5:'tip', 6:'h-twist' }
        else:
            targetParameters = { 1:'shift', 2:'slide', 3:'rise', 4:'tilt', 5:'roll', 6:'twist' }
        targetParametersReverse = dict((v,k) for k,v in targetParameters.items())

        # Check if requested parameters found within input file
        gotParametersInputFile = checkParametersInputFile(filename)
        if gotParametersInputFile is None:
            raise IOError(' Something wrong in input file {0}.\n Cannot read parameters.\n File should be an output from do_x3dna.'.format(filename))
        for parameter in parameters:
            if parameter not in gotParametersInputFile:
                raise ValueError(' Parameter {0} not found in input file. \n This file contains following parameters: \n {1}'.format(parameter, gotParametersInputFile))

        InputParamIndex = []
        for parameter in parameters:
            if parameter in targetParameters.values():
                InputParamIndex.append( targetParametersReverse[parameter] )
            else:
                print('\nWARNING: base pair parameters \"{0}\" not accepted. Skipping it !!\n' .format(parameter))

        if not InputParamIndex:
            raise ValueError("No acceptable base-pair parameters found!!!")

        data, time = read_param_file(filename, InputParamIndex, bp_step, step_range, startBP=self.startBP)
        self._set_time(time)

        bpIndex, OutParamIndex = get_idx_of_bp_parameters(bp_step, InputParamIndex, step_range, startBP=self.startBP)

        for i in range(len(data)):
            for j in range(len(data[i])):
                bp_num = str( bpIndex[i]+self.startBP )
                param = targetParameters[OutParamIndex[j]+1]
                self._set_data(data[i][j], 'bps', bp_num, param, scaleoffset=2)



[docs]
    def get_mean_error(self, bp, parameter, err_type='std', bp_range=True, merge_bp=1, merge_method='mean', masked=False, tool='gmx analyze'):
        """To calculate average and error of the given parameter for the given set of base-pairs/steps

        .. warning::
                To calculate errors by using ``error = 'acf'`` or ``error = 'block'``,
                GROMACS tool ``g_analyze`` or ``gmx analyze`` should be present in ``$PATH``.

        Parameters
        ----------

        bp : 1D list or array
            base-pairs to analyze
            Example: ::

                bp = [6]                                # bp_range = False
                bp = [4,15]                             # bp_range = True
                bp = range(4,15)                        # bp_range = False
                bp = np.arange(4,15)                    # bp_range = False
                bp = [2,5,6,7,9,12,18]                  # bp_range = False

        parameter : str
            Name of a parameter. For details about accepted keywords, see ``parameter`` in the method :meth:`DNA.get_parameters`.

        error : str
            Method of error estimation.
            Currently accepted method as follows:

                * ``error = 'std'``   : Standard Deviation
                * ``error = 'acf'``   : Standard error using autocorrelation time (requires: ``g_analyze`` or ``gmx analyze``)
                * ``error = 'block'`` : Standard error using block averaging method (requires: ``g_analyze`` or ``gmx analyze``)

        bp_range : bool
            ``Default=True``: As shown above, if ``True``, bp is taken as a range otherwise list or numpy array.

        merge_bp : int
            Number of base-pairs or steps to merge for creating the small DNA segments

        merge_method : str
            Method to calculate the parameter of a DNA segment from local
            parameters of all base-pairs/steps that are between the range
            given through ``bp``.
            Currently accepted keywords are as follows:

                * ``merge_method = mean``: Average of local parameters
                * ``merge_method = sum``: Sum of local parameters

        masked : bool
            ``Default=False``. To skip specific frames/snapshots.
            ``DNA.mask`` array should be set to use this functionality.
            This array contains boolean (either ``True`` or ``False``) value
            for each frame to mask the frames. Presently, mask array is
            automatically generated during :meth:`DNA.generate_smooth_axis` to
            skip those frames where 3D fitting curve was not successful within
            the given criteria

        tool : str
            Gromacs tool ``g_analyze`` or ``gmx analyze`` or ``gmx_mpi analyze`` etc.
            It will be used to calculate autocorrelation time or  block averaging error.
            It should be present in ``$PATH``

        Returns
        -------
        basepairs/steps : 1D array
            Number of base pair-steps. If ``merge_bp>1``, middle number will be returned.
        values : 1D array
            Average values of the parameter
        errors : 1D array
            Error values for corresponding average values

        """

        merge = False

        if(not bp_range) and (merge_bp > 1):
            print (
                "\nERROR: Merging of base pairs/steps only possible with given base pair/steps range\n")
            exit(1)

        if(bp_range) and (merge_bp > 1):
            merge = True

        if (merge == True) and not ((merge_method == 'mean') or (merge_method == 'sum')):
            raise AssertionError(
                "merge method %s is not available." % merge_method)
            exit(1)

        data, bp_idx = self.get_parameters(parameter, bp, bp_range, masked=masked)

        bp_number = np.add(bp_idx, self.startBP)
        data = np.array(data)

        # Merging base pairs/step data for the given parameters
        merge_data = []
        mid_bin = []
        if(merge):
            i = 0
            while(1):
                start = i
                if((i + merge_bp) >= len(bp_idx)):
                    end = i + (len(bp_idx) - i)
                else:
                    end = i + merge_bp

                if(start < end):
                    mid_bin.append(
                        int(start + (end - start) / 2) + bp_number[0])
                    if (merge_method == 'mean'):
                        merge_data.append(np.mean(data[start:end], axis=0))
                    if (merge_method == 'sum'):
                        merge_data.append(np.sum(data[start:end], axis=0))

                if(i >= len(bp_idx)):
                    break

                i += merge_bp

            merge_data = np.array(merge_data)

        if (err_type == 'std'):
            if(merge):
                error = np.std(merge_data, axis=1)
            else:
                error = np.std(data, axis=1)

        if (err_type == 'acf') or (err_type == 'block'):
            if(merge):
                error = get_error(self.time, merge_data, len(mid_bin), err_type=err_type, tool=tool)
            else:
                error = get_error(self.time, data, len(bp_idx), err_type=err_type, tool=tool)

        if(merge):
            return mid_bin, np.mean(merge_data, axis=1), error
        else:
            return bp_number, np.mean(data, axis=1), error



[docs]
    def set_helical_axis(self, filename, step_range=False, step=None):
        """
        To read and set local helical-axis positions from an input file.

        Parameters
        ----------
        filename : str
            Input file, which is generated from do_x3dna. e.g. HelAxis_g.dat
        step_range : bool
            * ``step_range = True`` : read axis coordinates of base-steps for the given range of base-steps
            * ``step_range = False``: read axis coordinates of all base-steps
        step : list
            List containing lower and higher limit of base-steps range.
                * This option only works with ``step_range=True``.
                * This list should not contain more than two number.
                * First number should be less than second number.

            Example for base-step 4 to 15:
                ``step = [4,15]         # step_range = True``

        """

        if (step_range):
            if not isinstance(step, list):
                raise AssertionError("type %s is not list" % type(step))
            if (len(step) > 2):
                print (
                    "ERROR: Range for helical axis should be list of two numbers, e.g. step=[1, 20] \n")
                exit(1)

        if (step_range) and (step == None):
            raise ValueError(
                "See, documentation for step  and step_range usage!!!")

        # Check if requested parameters found within input file
        gotParametersInputFile = checkParametersInputFile(filename)
        if gotParametersInputFile is None:
            raise IOError(' Something wrong in input file {0}.\n Cannot read parameters.\n File should be an output from do_x3dna.'.format(filename))
        for p in helicalAxisParameters:
            if p not in gotParametersInputFile:
                raise ValueError(' Helical axis not found in input file. \n This file contains following parameters: \n {0}'.format(gotParametersInputFile))

        targetParameters = { 1:'helical x-axis', 2:'helical y-axis', 3:'helical z-axis' }

        if (step_range):
            if (len(step) != 2):
                raise ValueError("See, documentation for step usage!!!")

            if step[0] > step[1]:
                raise ValueError("See, documentation for step usage!!!")
            data, time = read_param_file(filename, [1, 2, 3], step, True, startBP=self.startBP)
        else:
            data, time = read_param_file(filename, [1, 2, 3], [1, self.num_step], True, startBP=self.startBP)

        self._set_time(time)

        if (step_range):
            bp_idx, param_idx = get_idx_of_bp_parameters(step, [], True, startBP=self.startBP)
        else:
            bp_idx, param_idx = get_idx_of_bp_parameters([1, self.num_step], [], True, startBP=self.startBP)

        for i in range(len(data)):
            for j in range(len(data[i])):
                bp_num = str( bp_idx[i]+self.startBP )
                param = targetParameters[j+1]
                self._set_data(data[i][j], 'bps', bp_num, param, scaleoffset=2)



[docs]
    def generate_smooth_axis(self, step_range=False, step=None, smooth=500.0, spline=3, fill_point=6, cut_off_angle=20):
        """To determine the global helical axis by smoothing local axis using spline interpolation.

        .. note::
            A 3D curve is fitted on local helical axis that are calculated using
            ``do_x3dna`` tool. Sometimes in few frames, fitting **may not** be
            accurate and produces artifact. To record these frames, ``DNA.mask``
            array containing boolean values are generated. If value is ``True``,
            fitting might not be correct and vice versa. This array could be
            used in later analysis to skip/mask the frames containing inaccurate
            axis.

        .. warning::
                This function requires `SciPy package <http://www.scipy.org/>`_.

        Parameters
        ----------
        step_range : bool
            * ``step_range = True`` : Make axis smooth for the given range of base-steps
            * ``step_range = False``: Make axis smooth for for entire DNA. If original helical-axis of any base-step
                will be found to be not available, error will be raised.

        step : list
            List containing lower and higher limit of base-steps range.
                * This option only works with ``step_range=True``.
                * This list should not contain more than two number.
                * First number should be less than second number.

            Example for base-step 4 to 15:
                ``step = [4,15]         # step_range = True``

        smooth : float
            A smoothing condition. For more details, see about ``s = None``, which is passed into
            `scipy.interpolate.splprep() <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.interpolate.splprep.html>`_ method.

            .. warning ::
                * Lower value may lead to an artifact of local sharp kink in the smoothed axis.
                * Higher value may lead to the calculation of wrong helical axis.

        spline : int
            Degree of spline. For more details, see about ``k = 3``, which is passed into
            `scipy.interpolate.splprep() <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.interpolate.splprep.html>`_ method.

        fill_point : int
            Number of intrapolated points between two adjacent helical-axis coordinates.

        cut_off_angle : float
            Cut-off bending angle to define sharp kink in fitted curve. If angle
            in fitted curve is larger than this cut-off, refitting will be performed
            after deleting few of the original helical axis positions. If after
            this deletions, bending angle will not reduce below cut-off angle,
            value of ``smooth`` will be increased by 100 and entire cycle of
            fitting-refitting will be performed. When, value of ``smooth`` increases
            to more than 10000 during this fitting-refitting cycles, fitting process
            will be stopped with a warning message.

            To record the frames with bad fitting, ``True`` value will be
            stored in ``DNA.mask`` array for respective frame.

        """

        if not scipy_imported:
            raise ImportError(
                "SciPy package is not available. Please visit http://www.scipy.org/install.html for download and installation instructions.\n")

        if (step_range) and (step == None):
            raise ValueError(
                "See, documentation for step  and step_range usage!!!")

        bp_idx = []

        if step_range:
            if (len(step) != 2):
                raise ValueError("See, documentation for step usage!!!")

            if step[0] > step[1]:
                raise ValueError("See, documentation for step usage!!!")
            RawX, bp_idx = self.get_parameters( 'helical x-axis', step, bp_range=True)
            RawY, dummy = self.get_parameters( 'helical y-axis', step, bp_range=True)
            RawZ, dummy = self.get_parameters( 'helical z-axis', step, bp_range=True)
        else:
            RawX, bp_idx = self.get_parameters( 'helical x-axis', [1, self.num_step], bp_range=True)
            RawY, dummy = self.get_parameters( 'helical y-axis', [1, self.num_step], bp_range=True)
            RawZ, dummy = self.get_parameters( 'helical z-axis', [1, self.num_step], bp_range=True)

        RawX = np.array(RawX).T
        RawY = np.array(RawY).T
        RawZ = np.array(RawZ).T

        smoothX, smoothY, smoothZ = [], [], []

        nframes = len(self.time)

        maskArray = np.zeros(nframes, dtype=bool)

        for i in range(nframes):

            frame_number = i + 1
            if((frame_number < 100) and (frame_number % 10 == 0)) or ((frame_number < 1000) and (frame_number % 100 == 0)) or (frame_number % 1000 == 0):
                sys.stdout.write("\rFitting spline curve on helical axis of frame %d out of %d frames" % (
                    frame_number, nframes))
                sys.stdout.flush()

            xsmooth, ysmooth, zsmooth, mask = fit_axis(bp_idx, frame_number, RawX[i], RawY[i], RawZ[i], smooth, spline, fill_point, cut_off_angle)
            maskArray[i] = mask

            smoothX.append(xsmooth)
            smoothY.append(ysmooth)
            smoothZ.append(zsmooth)

        sys.stdout.write("\nFinished spline curve fitting...\n")
        sys.stdout.flush()

        smoothX = np.asarray(smoothX).T
        smoothY = np.asarray(smoothY).T
        smoothZ = np.asarray(smoothZ).T

        self.smooth_axis = True
        for i in range(len(bp_idx)):
            bp_num = str( bp_idx[i]+self.startBP )
            self._set_data(smoothX[i], 'bps', bp_num, 'helical x-axis smooth', scaleoffset=2)
            self._set_data(smoothY[i], 'bps', bp_num, 'helical y-axis smooth', scaleoffset=2)
            self._set_data(smoothZ[i], 'bps', bp_num, 'helical z-axis smooth', scaleoffset=2)

        # Set mask array
        self._set_mask(maskArray)



[docs]
    def write_haxis_pdb(self, filename='helical_axis.pdb', step_range=False, step=None, write_smooth_axis=True, write_orig_axis=False, write_curv=False, scale_curv=1):
        """To write trajectory of helcial-axis as a PDB format file.

        Both local helical axis and global (smoothed) axis can be written to PDB file.
        For global axis, curvature could be written in B-factor field of PDB file.

        Parameters
        ----------
        filename : str
            Name of the output PDB format file.

        step_range : bool
            * ``step_range = True`` : Make axis smooth for the given range of base-steps
            * ``step_range = False``: Make axis smooth for entire DNA. If original helical-axis of any base-step will
                be found to be not available, error will be raised.

        step : list
            List containing lower and higher limit of base-steps range.
                * This option only works with ``step_range=True``.
                * This list should not contain more than two number.
                * First number should be less than second number.

            Example for base-step 4 to 15:
                ``step = [4,15]         # step_range = True``

        write_smooth_axis : bool
            Write coordinates of smoothed helical axis as chain A.

        write_orig_axis : bool
            Write coordinates of original helical axis (output from do_x3dna) as chain B.

        write_curv : bool
            Write curvature of smoothed helical axis in B-factor column of PDB file.

        scale_curv : int
            Scaling of curvature. ``curvature * scale_curv`` is written in  B-factor column of PDB file.

        """

        if (step_range) and (step == None):
            raise ValueError(
                "See, documentation for step  and step_range usage!!!")

        if not write_orig_axis and not write_smooth_axis:
            raise ValueError(
                "Nothing to write as both \"write_orig_axis=False\" and \"write_smooth_axis=False\" !!!")

        if step_range:
            if (len(step) != 2):
                raise ValueError("See, documentation for step usage!!!")

            if step[0] > step[1]:
                raise ValueError("See, documentation for step usage!!!")

            # Original helical axis
            if (write_orig_axis):
                RawX, bp_idx = self.get_parameters(
                    'helical x-axis', step, bp_range=True)
                RawY, dummy = self.get_parameters(
                    'helical y-axis', step, bp_range=True)
                RawZ, dummy = self.get_parameters(
                    'helical z-axis', step, bp_range=True)

            # Smoothed helical axis
            if (write_smooth_axis):
                SmoothX, bp_idx = self.get_parameters(
                    'helical x-axis smooth', step, bp_range=True)
                SmoothY, bp_idx = self.get_parameters(
                    'helical y-axis smooth', step, bp_range=True)
                SmoothZ, bp_idx = self.get_parameters(
                    'helical z-axis smooth', step, bp_range=True)

            # curvature
            if (write_curv):
                curvature, bp_idx = self.get_parameters(
                    'curvature', step, bp_range=True)

        else:

            # Original helical axis
            if (write_orig_axis):
                RawX, bp_idx = self.get_parameters(
                    'helical x-axis', [1, self.num_step], bp_range=True)
                RawY, dummy = self.get_parameters(
                    'helical y-axis', [1, self.num_step], bp_range=True)
                RawZ, dummy = self.get_parameters(
                    'helical z-axis', [1, self.num_step], bp_range=True)

            # Smoothed helical axis
            if (write_smooth_axis):
                SmoothX, bp_idx = self.get_parameters(
                    'helical x-axis smooth', [1, self.num_step], bp_range=True)
                SmoothY, bp_idx = self.get_parameters(
                    'helical y-axis smooth', [1, self.num_step], bp_range=True)
                SmoothZ, bp_idx = self.get_parameters(
                    'helical z-axis smooth', [1, self.num_step], bp_range=True)

            # curvature
            if (write_curv):
                curvature, bp_idx = self.get_parameters(
                    'curvature', [1, self.num_step], bp_range=True)

        if (write_orig_axis):
            RawX = np.array(RawX).T
            RawY = np.array(RawY).T
            RawZ = np.array(RawZ).T

        if (write_smooth_axis):
            SmoothX = np.array(SmoothX).T
            SmoothY = np.array(SmoothY).T
            SmoothZ = np.array(SmoothZ).T

        if (write_curv):
            curvature = np.array(curvature).T

        f = open(filename, 'w')

        for i in range(len(self.time)):
            f.write('%-6s    %4d\n' % ("MODEL", i + 1))

            bfactor = 0.00

            if (write_smooth_axis):
                for j in range(len(SmoothX[i])):

                    if (write_curv):
                        bfactor = curvature[i][j] * scale_curv

                    f.write('%-6s%5d %4s%1s%3s %1s%4d%1s   %8.3f%8.3f%8.3f%6.2f%6.2f\n' % ("ATOM", j + 1, "CA",
                                                                                           " ", "AXS", "A", j + 1, " ", SmoothX[i][j], SmoothY[i][j], SmoothZ[i][j], 1.00, bfactor))

                for j in range(len(SmoothX[i]) - 1):
                    f.write('CONECT %4d %4d\n' % (j + 1, j + 2))

                f.write("TER\n")

            if (write_orig_axis):
                atomstart = 0
                if (write_smooth_axis):
                    atomstart = len(SmoothX[i])

                for j in range(len(RawX[i])):
                    f.write('%-6s%5d %4s%1s%3s %1s%4d%1s   %8.3f%8.3f%8.3f%6.2f%6.2f\n' % ("ATOM", j + 1 +
                                                                                           atomstart, "O", " ", "AXS", "B", j + 1, " ", RawX[i][j], RawY[i][j], RawZ[i][j], 1.00, 0.00))

                for j in range(len(RawX[i]) - 1):
                    f.write('CONECT %4d %4d\n' %
                            (j + 1 + atomstart, j + 2 + atomstart))

                f.write("TER\n")

            f.write("ENDMDL\n")

        f.close()



[docs]
    def calculate_curvature_tangent(self, step_range=False, step=None, store_tangent=False):
        """To calculate curvatures and tangent vectors along the helical axis.

        The curvature and tangent vectors are calculated using Frenet-Serret formula.
        The calculated values are stored in ``DNA.data`` dictionary and also in HDF5 file.

        Parameters
        ----------
        step_range : bool
            * ``step_range = True`` : Calculate curvature and tangent vectors for the given range of base-steps
            * ``step_range = False``: Calculate curvature and tangent vectors for entire DNA. If smoothed helical-axis of any base-step will be found to be not available, error will be raised.

        step : list
            List containing lower and higher limit of base-steps range.
                * This option only works with ``step_range=True``.
                * This list should not contain more than two number.
                * First number should be less than second number.

            Example for base-step 4 to 15:
                ``step = [4,15]         # step_range = True``

        store_tangent : bool
            * ``store_tangent = True`` : The calculated tangent vectors will be stored for later use.
            * ``store_tangent = False``:  The calculated tangent vectors will be discarded.

            In case of HDF5 file, calculated tangents will be stored in this
            file and it will not add cost to memory. However, without HDF5 file,
            storing tangents in ``DNA.data`` will be expansive for memory.

        """

        if not self.smooth_axis:
            raise ValueError(
                "The helical axis is not smooth. At first, smooth the axis using generate_smooth_axis() method as described in http://rjdkmr.github.io/do_x3dna/apidoc.html#dnaMD.DNA.generate_smooth_axis.")

        if (step_range) and (step == None):
            raise ValueError(
                "See, documentation for step  and step_range usage!!!")

        if step_range:
            if (len(step) != 2):
                raise ValueError("See, documentation for step usage!!!")

            if step[0] > step[1]:
                raise ValueError("See, documentation for step usage!!!")

            X, bp_idx = self.get_parameters(
                'helical x-axis smooth', step, bp_range=True)
            Y, bp_idx = self.get_parameters(
                'helical y-axis smooth', step, bp_range=True)
            Z, bp_idx = self.get_parameters(
                'helical z-axis smooth', step, bp_range=True)
        else:
            X, bp_idx = self.get_parameters(
                'helical x-axis smooth', [1, self.num_step], bp_range=True)
            Y, bp_idx = self.get_parameters(
                'helical y-axis smooth', [1, self.num_step], bp_range=True)
            Z, bp_idx = self.get_parameters(
                'helical z-axis smooth', [1, self.num_step], bp_range=True)

        X = np.asarray(X).T
        Y = np.asarray(Y).T
        Z = np.asarray(Z).T

        curvature, tangent = [], []

        for i in range(len(self.time)):

            # Curvature calculation
            xyz = np.vstack((X[i], Y[i], Z[i])).T
            T, N, B, k_temp, t_temp = frenet_serret(xyz)

            curvature.append(k_temp.flatten())

            if(store_tangent):
                tangent.append(T)

        curvature = np.asarray(curvature).T
        for i in range(len(bp_idx)):
            bp_num = str( bp_idx[i]+self.startBP )
            self._set_data(curvature[i], 'bps', bp_num, 'curvature',  scaleoffset=3)

        if(store_tangent):
            tangent = np.asarray(tangent)
            final_tan = []
            for i in range(len(tangent[0])):
                temp = []
                for j in range(len(tangent)):
                    temp.append(tangent[j][i])
                final_tan.append(np.asarray(temp))

            for i in range(len(bp_idx)):
                bp_num = str( bp_idx[i]+self.startBP )
                self._set_data(np.asarray(final_tan[i]), 'bps', bp_num, 'tangent',  scaleoffset=3)



[docs]
    def calculate_angle_bw_tangents(self, base_step, cumulative=False, masked=False):
        """To calculate angle (Radian) between two tangent vectors of global helical axis.

        Parameters
        ----------
        base_step : 1D list
            List of two base-steps for which angle will be calculated.
            For example: **base_step** = ``[5, 50]`` either of following can be calculated.

                (1) angle between tangent vector 5th and 50th base-steps.
                (2) summation over 44 angles that are formed between adjacent tangent
                    vectors of 5-50 bp DNA segment.

            See below two choose between these two types.

        cumulative : bool)
            ``Default: False``: If it is false, first type of angle is calculated
            otherwise second type of angle is calculated as explained in the above
            example of option ``base_step``.

        masked : bool
            ``Default=False``. To skip specific frames/snapshots.
            ``DNA.mask`` array should be set to use this functionality.
            This array contains boolean (either ``True`` or ``False``) value
            for each frame to mask the frames. Presently, mask array is
            automatically generated during :meth:`DNA.generate_smooth_axis` to
            skip those frames where 3D fitting curve was not successful within
            the given criteria.

        Returns
        -------
        angle : 1D array
            Array of calculated angle of length is equal to number of frames.
            When ``masked`` is applied, length of this array can be smaller than
            total number of frames.

        """

        if (len(base_step) != 2):
            raise ValueError("See, documentation for step usage!!!")

        if base_step[0] > base_step[1]:
            raise ValueError("See, documentation for step usage!!!")

        angle = []
        if cumulative:
            angle_tmp = []
            tangent, idx = self.get_parameters(
                'tangent', bp=base_step, bp_range=True, masked=masked)
            for i in range(len(idx) - 1):
                angle_tmp.append(vector_angle(tangent[i], tangent[i + 1]))
            angle = np.asarray(angle_tmp).sum(axis=0)

        else:
            tangent1, idx1 = self.get_parameters(
                'tangent', bp=[base_step[0]], bp_range=False, masked=masked)
            tangent2, idx2 = self.get_parameters(
                'tangent', bp=[base_step[1]], bp_range=False, masked=masked)
            angle = vector_angle(tangent1[0], tangent2[0])

        return np.asarray(angle)



[docs]
    def calculate_2D_angles_bw_tangents(self, paxis, base_step, masked=True):
        """ To calculate angles (Radian) in 2D plane between two tangent vectors of global helical axis.

        .. list-table::  Principal axis and respective 2D planes
            :widths: 1, 4
            :header-rows: 1
            :name: gui-table

            * - Principal Axis
              - 2D planes

            * - X-axis
              - XY and XZ planes

            * - Y-axis
              - XY and YZ planes

            * - Z-axis
              - XZ and YZ planes

        Parameters
        ----------
        paxis : str
            Principal axis parallel to the DNA axis. For example, IF DNA helical
            axis is parallel or aligned with Z-axis, paxis should be ``'Z'``.
        base_step : 1D list
            List of two base-steps for which angle will be calculated.
            For example: **base_step** = ``[5, 50]`` angle between tangent vector
            5th and 50th base-steps will be calculated.
        masked : bool
            ``Default=False``. To skip specific frames/snapshots.
            ``DNA.mask`` array should be set to use this functionality.
            This array contains boolean (either ``True`` or ``False``) value
            for each frame to mask the frames. Presently, mask array is
            automatically generated during :meth:`DNA.generate_smooth_axis` to
            skip those frames where 3D fitting curve was not successful within
            the given criteria.

        Returns
        -------
        angle-one : 1D array
            Array of calculated angle in first plane of length is equal to
            number of frames. When ``masked`` is applied, length of this array
            can be smaller than total number of frames.
        angle-two : 1D array
            Array of calculated angle in second plane of length is equal to
            number of frames. When ``masked`` is applied, length of this array
            can be smaller than total number of frames.

        """

        if (len(base_step) != 2):
            raise ValueError("See, documentation for step usage!!!")

        if base_step[0] > base_step[1]:
            raise ValueError("See, documentation for step usage!!!")

        axisDict = {'X': 0, 'Y': 1, 'Z': 2 }

        if paxis == 'Z':
            planes = [0, 1]
        elif paxis == 'X':
            planes = [1, 2]
        elif paxis == 'Y':
            planes = [0, 2]
        else:
            raise ValueError('{0} is not accepted keyword. USE: X Y or Z for DNA-axis.')

        tangent1, idx1 = self.get_parameters('tangent', bp=[base_step[0]], bp_range=False, masked=masked)
        tangent2, idx2 = self.get_parameters('tangent', bp=[base_step[1]], bp_range=False, masked=masked)


        angles = []
        for plane in planes:
            if plane == 0:
                norm = [1, 0, 0]
            elif plane == 1:
                norm = [0, 1, 0]
            else:
                norm = [0, 0, 1]

            temp1 = tangent1[0].T[plane].copy()
            temp2 = tangent2[0].T[plane].copy()

            tangent1[0].T[plane] = np.zeros(tangent1[0].T[plane].shape)
            tangent2[0].T[plane] = np.zeros(tangent2[0].T[plane].shape)

            angle = vector_angle(tangent1[0], tangent2[0], norm=norm)

            tangent1[0].T[plane]  = temp1
            tangent2[0].T[plane]  = temp2

            angles.append(angle)
        '''
        angles = []
        shape = (tangent1[0].shape[0], 2)
        for plane in planes:
            if plane == 0:
                norm = [1, 0, 0]
            elif plane == 1:
                norm = [0, 1, 0]
            else:
                norm = [0, 0, 1]

            temp1 = np.zeros(shape)
            temp2 = np.zeros(shape)

            print(tangent1[0].T.shape, temp1.T.shape)

            temp1[:, 0] = tangent1[0].T[plane][:]
            temp1[:, 1] = tangent1[0].T[axisDict[paxis]][:]
            temp2[:, 0] = tangent2[0].T[plane][:]
            temp2[:, 1] = tangent2[0].T[axisDict[paxis]][:]


            #tangent1[0].T[plane] = np.zeros(tangent1[0].T[plane].shape)
            #tangent2[0].T[plane] = np.zeros(tangent2[0].T[plane].shape)

            #angle = vector_angle(temp1, temp2)
            angle = []
            #print(np.linalg.det([temp1.T,temp2.T]).shape)
            for i in range(temp1.shape[0]):
                a = np.math.atan2(np.linalg.det([temp1[i],temp2[1]]), np.dot(temp1[i],temp2[1]))
                angle.append(a)
            angle = np.asarray(angle)

            #tangent1[0].T[plane]  = temp1
            #tangent2[0].T[plane]  = temp2

            angles.append(angle)
        '''



        return np.asarray(angles[0]), np.asarray(angles[1])




def checkParametersInputFile(filename):
    """Check the do_x3dna output file and return list of parameters present in the file.
    """
    fin = open(filename, 'r')
    line = fin.readline()
    line2 = fin.readline()
    fin.close()

    temp = re.split('\s+', line)
    temp2 = re.split('\s+', line2)

    if temp[0] == '#Minor':
        return groovesParameters

    if temp[0] == '#Shift':
        return baseStepParameters

    if temp[0] == '#X-disp':
        return helicalBaseStepParameters

    if temp[0] == '#Shear':
        return basePairParameters

    if temp[0] == '#Position':
        return helicalAxisParameters

    if temp2[0] == '#P':
        return helicalRadiusParameters

    if temp2[0] == '#alpha':
        return backboneDihedrals



[docs]
def setParametersFromFile(dna, filename, parameters=None, bp=None):
    """Read a specific parameter from the do_x3dna output file.
    It automatically load the input parameter from a file to dna object or HDF5 file.
    It automatically decides from input parameter, what will be format of input file.

    Parameters
    ----------
    dna : :class:`DNA`
        Input :class:`DNA` instance.
    filename : str
        Input filename. This file should be output from do_x3dna.
    parameter : str, list, None
        Name of parameter. For details about accepted keywords, see ``parameter`` in the method :meth:`DNA.get_parameters`.
        Note that parameter that are calculated from do_x3dna cannot be used here.
        In case of `Ǹone`, parameters name will be automatically determine from the input file.
    bp : list
        List containing lower and higher limit of base-pair/step range.
            * This list should not contain more than two number.
            * First number should be less than second number.

        Example for base-pairs/steps 4 to 15:
            ``bp = [4,15]         # step_range = True``

        If ``None``, all base-pairs/steps will be considered.

    """

    gotParameterList = False
    param_type = None

    # In case of none try to determine from file
    if parameters is None:
        parameters = checkParametersInputFile(filename)
        if parameters is None:
            raise AssertionError(" Cannot determine the parameters name from file {0}.".format(filename))

    if isinstance(parameters, list) or isinstance(parameters, np.ndarray):
        gotParameterList = True
        parameter = list(parameters)
        param_type = getParameterType(parameter[0])
    else:
        param_type = getParameterType(parameters)

    if bp is None:
        if param_type == 'bps':
            bp = [dna.startBP, dna.num_step]
        else:
            bp = [dna.startBP, dna.num_bp]

    if len(bp) == 1:
        bp_range = False
    else:
        bp_range = True

    if not gotParameterList:
        tempParamName = parameters
        inputParameter = [ parameters ]
    else:
        tempParamName = parameters[0]
        inputParameter = parameter

    sys.stdout.write('\nLoading parameters: {0}'.format(inputParameter))

    success = False
    if tempParamName in basePairParameters:
        dna.set_base_pair_parameters(filename, bp, parameters=inputParameter, bp_range=bp_range)
        success = True

    if tempParamName in baseStepParameters:
        dna.set_base_step_parameters(filename, bp, parameters=inputParameter, step_range=bp_range, helical=False)
        success = True

    if tempParamName in helicalBaseStepParameters:
        dna.set_base_step_parameters(filename, bp, parameters=inputParameter, step_range=bp_range, helical=True)
        success = True

    if tempParamName in groovesParameters:
        dna.set_major_minor_groove(filename, bp, parameters=inputParameter, step_range=bp_range)
        success = True

    if tempParamName in backboneDihedrals:
        dna.set_backbone_dihedrals(filename, bp, parameters=inputParameter, bp_range=bp_range)
        success = True

    if tempParamName in helicalRadiusParameters:
        dna.set_helical_radius(filename, bp, full=True, bp_range=bp_range)
        success = True

    if tempParamName in helicalAxisParameters:
        if len(bp) == 1:
            raise AssertionError("Axis cannot be read for a single base-step.\n Use a segment spanned over several basepairs.")
        dna.set_helical_axis(filename, step_range=True, step=bp)
        success = True

    if not success:
        raise ValueError ('Not able to load these parameters: {0}... '.format(parameter))




[docs]
def localDeformationVsBPS(dnaRef, bpRef, dnaProbe, bpProbe, parameter, err_type='std', bp_range=True, merge_bp=1,
                          merge_method='mean', masked=False, tool='gmx analyze'):
    """To calculate deformation of local parameters in probe DNA with respect to a reference DNA as a function of the bp/s.

    .. note:: Deformation = Reference_DNA(parameter) - Probe_DNA(parameter)

    .. warning:: Number of segments/bp/bps should match between probe and reference DNA.

    .. warning::
            To calculate errors by using ``error = 'acf'`` or ``error = 'block'``,
            GROMACS tool ``g_analyze`` or ``gmx analyze`` should be present in ``$PATH``.

    Parameters
    ----------
    dnaRef  : :class:`DNA`
        Reference DNA

    bpRef : 1D list array
        base-pairs or base-steps to consider from Reference DNA
        Example: ::

            bp = [6]                                # bp_range = False
            bp = [4,15]                             # bp_range = True
            bp = range(4,15)                        # bp_range = False
            bp = np.arange(4,15)                    # bp_range = False


    dnaProbe : :class:`DNA`
        probe DNA. Number of base-pairs in Reference and probe DNA **should be** same.

    bpProbe : 1D list or array
        base-pairs or base-steps to consider from Reference DNA. Foe more, see above example of ``bpSubj``.

    parameter : str
        Name of a base-pair or base-step or helical base-step parameter
        For details about accepted keywords, see ``parameter`` in the method :meth:`DNA.get_parameters`.

    error_type : str
        Method of error estimation.
        Currently accepted method as follows:

            * ``error = 'std'``   : Standard Deviation
            * ``error = 'acf'``   : Standard error using autocorrelation time (requires: ``g_analyze`` or ``gmx analyze``)
            * ``error = 'block'`` : Standard error using block averaging method (requires: ``g_analyze`` or ``gmx analyze``)

    bp_range : bool
        ``Default=True``: As shown above, if ``True``, bp is taken as a range otherwise list or numpy array.

    merge_bp : int
        Number of base-pairs or steps to merge for creating the small DNA segments

    merge_method : str
        Method to calculate the parameter of a DNA segment from local
        parameters of all base-pairs/steps that are between the range
        given through ``bp``.
        Currently accepted keywords are as follows:

            * ``merge_method = mean``: Average of local parameters
            * ``merge_method = sum``: Sum of local parameters

    masked : bool
        ``Default=False``. To skip specific frames/snapshots.
        ``DNA.mask`` array should be set to use this functionality.
        This array contains boolean (either ``True`` or ``False``) value
        for each frame to mask the frames. Presently, mask array is
        automatically generated during :meth:`DNA.generate_smooth_axis` to
        skip those frames where 3D fitting curve was not successful within
        the given criteria

    tool : str
        Gromacs tool ``g_analyze`` or ``gmx analyze`` or ``gmx_mpi analyze`` etc.
        It will be used to calculate autocorrelation time or  block averaging error.
        It should be present in ``$PATH``

    Returns
    -------
    bpRef  : 1D array)
        base-pair/step numbers of reference DNA. If ``merge_bp>1``, middle number will is returned.`
    bpProbe : 1D array
        base-pair/step numbers of probe DNA. If ``merge_bp>1``, middle number will is returned.`
    deviation  : 1D array
        Deviation in the parameter of probe DNA with respect to reference DNA.
    error  : 1D array
        Standard error of respective deviation

    """

    bpRef, RefAvgValue, RefError = dnaRef.get_mean_error(bpRef, parameter, err_type=err_type, bp_range=bp_range,
                                                         merge_bp=merge_bp, merge_method=merge_method, tool=tool,
                                                         masked=masked)

    bpProbe, ProbeAvgValue, ProbeError = dnaProbe.get_mean_error(bpProbe, parameter, err_type=err_type, bp_range=bp_range,
                                                             merge_bp=merge_bp, merge_method=merge_method, tool=tool,
                                                             masked=masked)

    if len(bpRef) != len(bpProbe):
        raise ValueError(
            "Number (%d) of bp/bps/segments in reference DNA does not match with the number (%d) of probe DNA." % (len(bpRef), len(bpProbe)))

    deviation = RefAvgValue - ProbeAvgValue
    error = np.sqrt((RefError**2) + (ProbeError**2))

    return bpRef, bpProbe, deviation, error




[docs]
def dev_parameters_vs_axis(dnaRef, dnaSubj, parameter, bp, axis='Z', bp_range=True, windows=10, err_type='block', tool='gmx analyze'):
    """To calculate deviation in the given parameters of a Subject DNA to Reference DNA along the given axis.

    .. note:: Deviation = Reference_DNA(parameter) - Subject_DNA(parameter)

    .. warning::
            To calculate errors by using ``error = 'acf'`` or ``error = 'block'``,
            GROMACS tool ``g_analyze`` or ``gmx analyze`` should be present in ``$PATH``.

    Parameters
    ----------
    dnaRef  : :class:`DNA`
        Reference DNA
    dnaSubj : :class:`DNA`
        Subject DNA. Number of base-pairs in Reference and Subject DNA **should be** same.
    parameter : str
        Name of a base-pair or base-step or helical base-step parameter
        For details about accepted keywords, see ``parameter`` in the method :meth:`DNA.get_parameters`.

    bp : 1D list or array
        base-pairs to analyze
        Example: ::

            bp = [6]                                # bp_range = False
            bp = [4,15]                             # bp_range = True
            bp = range(4,15)                        # bp_range = False
            bp = np.arange(4,15)                    # bp_range = False
            bp = [2,5,6,7,9,12,18]                  # bp_range = False

    bp_range : bool
        ``Default=True``: As shown above, if ``True``, bp is taken as a range otherwise list or numpy array.

    axis : str
        Axis along which DNA axis is parallel. Keywords: ``X``, ``Y`` and ``Z``.

    windows  : int
        Number of bins along the axis

    err_type : str
        Method of error estimation.
        Currently accepted method as follows:

            * ``error = 'std'``   : Standard Deviation
            * ``error = 'acf'``   : Standard error using autocorrelation time (requires: ``g_analyze`` or ``gmx analyze``)
            * ``error = 'block'`` : Standard error using block averaging method (requires: ``g_analyze`` or ``gmx analyze``)

    tool : str
        Gromacs tool ``g_analyze`` or ``gmx analyze`` or ``gmx_mpi analyze`` etc.
        It will be used to calculate autocorrelation time or  block averaging error.
        It should be present in ``$PATH``

    Returns
    -------
    deviation : 1D array
        length of no. of windows; Deviation in the parameter for two given DNA
    deviation_error : 1D array
        length of no. of windows; Standard error in deviation fo each window/bin
    axis  : 1D array
        length of no. of windows; average position of window/bin along given axis
    axis_error  : 1D array
        length of no. of windows; Standard error in average position of window/bin along given axis

    """
    RefParam, ref_bp_idx = dnaRef.get_parameters(parameter, bp, bp_range)
    RefAxis, dummy = dnaRef.get_parameters(
        'Helical {0}-axis' .format(axis), bp, bp_range)
    SubjParam, subj_bp_idx = dnaSubj.get_parameters(parameter, bp, bp_range)

    mean_axis = np.mean(RefAxis, axis=1)
    meanRefParam = np.mean(RefParam, axis=1)
    meanSubjParam = np.mean(SubjParam, axis=1)

    maxAxis = np.amax(mean_axis)
    minAxis = np.amin(mean_axis)
    axis_range = (maxAxis - minAxis) / windows

    Ref_param_error = get_error(dnaRef.time, RefParam, len(ref_bp_idx), err_type=err_type, tool=tool)
    Ref_axis_error = get_error(dnaRef.time, RefAxis, len(ref_bp_idx), err_type=err_type, tool=tool)
    subj_param_error = get_error(dnaSubj.time, SubjParam, len(subj_bp_idx), err_type=err_type, tool=tool)

    merged_ref_param = []
    merged_subj_Param = []
    merged_Ref_param_error = []
    merged_Ref_axis_error = []
    merged_subj_param_error = []

    final_axis = []

    for i in range(windows):
        start = minAxis + (i * axis_range)
        end = start + axis_range
        idx = []
        for j in range(len(mean_axis)):
            if((start <= mean_axis[j]) and (end > mean_axis[j])):
                idx.append(j)
        if(len(idx) > 0):
            merged_ref_param.append(meanRefParam[idx])
            merged_subj_Param.append(meanSubjParam[idx])
            final_axis.append(start + (end - start) / 2)
            merged_Ref_param_error.append(Ref_param_error[idx])
            merged_Ref_axis_error.append(Ref_axis_error[idx])
            merged_subj_param_error.append(subj_param_error[idx])

    final_ref_param = []
    final_subj_param = []
    final_ref_param_error = []
    final_ref_axis_error = []
    final_subj_param_error = []
    for i in range(len(merged_ref_param)):
        final_ref_param.append(np.sum(merged_ref_param[i]))
        final_subj_param.append(np.sum(merged_subj_Param[i]))

        final_ref_axis_error.append(
            np.sqrt((merged_Ref_axis_error[i]**2).sum()))
        final_ref_param_error.append(
            np.sqrt((merged_Ref_param_error[i]**2).sum()))
        final_subj_param_error.append(
            np.sqrt((merged_subj_param_error[i]**2).sum()))

    deviation, error = get_deviation(
        final_ref_param, final_ref_param_error, final_subj_param, final_subj_param_error)

    return deviation, error, final_axis, final_ref_axis_error




[docs]
def get_error(time, x, sets, err_type='block', tool='gmx analyze'):
    """To estimate error using block averaging method

    .. warning::
            To calculate errors by using ``error = 'acf'`` or ``error = 'block'``,
            GROMACS tool ``g_analyze`` or ``gmx analyze`` should be present in ``$PATH``.

    Parameters
    ----------
    time : 1D list or array
        :attr:`DNA.time`
    x   : 2D list or array
        Shape of (nset, nframe); where *nset* is number of set and *nframe* is
        total number of frames. *nframe* should be equal to length of time
        list/array
    sets : int
        Number of sets (*nset*)
    err_type : str
        Error estimation by autocorrelation method ``err_type='acf'`` or
        block averaging method ``err_type='block'``
    tool : str
        GROMACS tool to calculate error. In older versions it is `g_analyze` while in
        newer versions (above 2016) it is `gmx analyze`.

    Returns
    -------
    error : 1D array
        Of length = number of sets (*nset*)

    """
    for i in range(sets):
        if (len(time) != len(x[i])):
            raise ValueError('\nError: number of frame in time {0} mismatched with {1} of x[{2}]!!\n' .format(
                len(time), len(x[i]), i))

    if not((err_type == 'block') or (err_type == 'acf')):
        print('\nWarning: Method {0} is not implemented. Switching to \'acf\'.\n' .format(
            err_type))
        err_type = 'acf'

    error = []
    char_set = string.ascii_lowercase
    name = ''.join(random.sample(string.ascii_lowercase, 10))

    filename = name + '.xvg'
    eefile = 'ee_' + name + '.xvg'
    acfile = 'acf_' + name + '.xvg'

    fout = open(filename, 'w')
    for i in range(len(time)):
        fout.write('{0}' .format(time[i]))
        for j in range(sets):
            fout.write('    {0}' .format(x[j][i]))
        fout.write("\n")
    fout.close()

    command = '{0} -f {1} -ee {2} -ac {3} -fitfn exp' .format(tool, filename, eefile, acfile)

    try:
        p = sub.Popen(command.split(), stdout=sub.PIPE, stderr=sub.PIPE, universal_newlines=True)
        out, outputerror = p.communicate()
    except Exception as e:
        os.remove(filename)
        raise e

    lines = out.split('\n')

    if (err_type == 'block'):
        for line in lines:
            if(re.match('Set', line)):
                temp = line.split()
                error.append(float(temp[3]))

    if (err_type == 'acf'):
        acf_time = []
        for line in lines:
            if re.match('COR: Correlation time', line) is not None:
                temp = line.split('=')
                acf_time.append(abs(float(temp[1].split()[0])))

        total_time = float(time[-1]) - float(time[0])
        dt = total_time / len(time)
        for i in range(sets):
            if(acf_time[i] >= dt):
                n_indp = total_time / acf_time[i]
                tmp_err = np.std(x[i]) / np.sqrt(n_indp)
            else:
                tmp_err = np.std(x[i]) / np.sqrt(len(time))
            error.append(tmp_err)

    os.remove(filename)
    os.remove(eefile)
    os.remove(acfile)
    if os.path.isfile('fitlog.log'):
        os.remove('fitlog.log')

    return np.array(error)



def get_deviation(Ref, RefErr, x, xerr):
    if (len(Ref) != len(x)):
        print (
            "\nError: Number of base pairs/steps mismatch from reference to target!!\n")
        exit(1)
    Ref = np.array(Ref)
    RefErr = np.array(RefErr)
    x = np.array(x)
    xerr = np.array(xerr)

    covariance = np.cov(Ref, x)

    deviation = Ref - x
    error = np.sqrt((RefErr * RefErr) + (xerr * xerr))

    return deviation, error


def frenet_serret(xyz):
    ''' Frenet-Serret Space Curve Invariants

    Taken from "Diffusion Imaging in Python" `DiPy package<http://nipy.org/dipy/>`_

    Calculates the 3 vector and 2 scalar invariants of a space curve defined by vectors r = (x,y,z).  If z is omitted (i.e. the array xyz has shape (N,2), then the curve is only 2D (planar), but the equations are still valid.

    Similar to http://www.mathworks.com/matlabcentral/fileexchange/11169

    In the following equations the prime ($'$) indicates differentiation with respect to the parameter $s$ of a parametrised curve $\mathbf{r}(s)$.

    - $\mathbf{T}=\mathbf{r'}/|\mathbf{r'}|\qquad$ (Tangent vector)}

    - $\mathbf{N}=\mathbf{T'}/|\mathbf{T'}|\qquad$ (Normal vector)

    - $\mathbf{B}=\mathbf{T}\times\mathbf{N}\qquad$ (Binormal vector)

    - $\kappa=|\mathbf{T'}|\qquad$ (Curvature)

    - $\mathrm{\tau}=-\mathbf{B'}\cdot\mathbf{N}$ (Torsion)

    **Arguments:**

            * xyz : array-like shape (N,3)
                            array representing x,y,z of N points in a track

    **Returns:**

                    * T : array shape (N,3)
                            array representing the tangent of the curve xyz
                    * N : array shape (N,3)
                            array representing the normal of the curve xyz
            * B : array shape (N,3)
                    array representing the binormal of the curve xyz
            * k : array shape (N,1)
                    array representing the curvature of the curve xyz
            * t : array shape (N,1)
                    array representing the torsion of the curve xyz

    Examples:

    Create a helix and calculate its tangent, normal, binormal, curvature and torsion

    >>> from dipy.tracking import metrics as tm
    >>> import numpy as np
    >>> theta = 2*np.pi*np.linspace(0,2,100)
    >>> x=np.cos(theta)
    >>> y=np.sin(theta)
    >>> z=theta/(2*np.pi)
    >>> xyz=np.vstack((x,y,z)).T
    >>> T,N,B,k,t=tm.frenet_serret(xyz)

    '''

    def magn(xyz, n=1):
        ''' magnitude of vector


        '''
        mag = np.sum(xyz**2, axis=1)**0.5
        imag = np.where(mag == 0)
        mag[imag] = np.finfo(float).eps

        if n > 1:
            return np.tile(mag, (n, 1)).T

        return mag.reshape(len(mag), 1)

    xyz = np.asarray(xyz)
    n_pts = xyz.shape[0]

    if n_pts == 0:
        raise ValueError('xyz array cannot be empty')

    dxyz = np.gradient(xyz)[0]
    ddxyz = np.gradient(dxyz)[0]

    # Tangent
    T = np.divide(dxyz, magn(dxyz, 3))

    # Derivative of Tangent
    dT = np.gradient(T)[0]

    # Normal
    N = np.divide(dT, magn(dT, 3))

    # Binormal
    B = np.cross(T, N)

    # Curvature
    k = magn(np.cross(dxyz, ddxyz), 1) / (magn(dxyz, 1)**3)

    # Torsion
    #(In matlab was t=dot(-B,N,2))
    t = np.sum(-B * N, axis=1)
    # return T,N,B,k,t,dxyz,ddxyz,dT

    return T, N, B, k, t


def read_data_file(FileName, cols_equal=True):
    infile = open(FileName, 'r')
    data = []
    len_data = 0
    i = 1
    for line in infile:
        line = line.rstrip('\n')
        if not line.strip():
            continue
        if(re.match('#|@', line) == None):
            temp = map(float, line.split())
            if(cols_equal):
                if (i == 1):
                    len_data = len(temp)
                if (len(temp) != len_data):
                    print('WARNING: Number of column mis match at line {0} in {1}; skipping remaining part\n' .format(
                        i, FileName))
                    break
                data.append(temp)
            i = i + 1
    data = np.array(data).T
    return data


def get_idx_of_bp_parameters(bp, parameters, bp_range, startBP=1):
    param_idx = []
    if(bp_range):
        bp_idx = np.arange(bp[0] - startBP, bp[1])
    else:
        bp_idx = np.subtract(bp, startBP)

    if(len(parameters) != 0):
        #param_idx = np.hstack((np.subtract(parameters,1),[parameters[-1]]))
        param_idx = np.subtract(parameters, 1)

    return bp_idx, param_idx



[docs]
def read_param_file(FileName, parameters, bp, bp_range, word=False, startBP=1):
    """ Read parameters from do_x3dna file.

    It is the main function, which is used to read and extract the parameters
    values from the do_x3dna output files.

    Parameters
    ----------
    FileName : str
        Parameter file produced from do_x3dna.
    parameters : list
        List of column indices that has to be extracted. indices here start
        with one.

    bp : 1D list or array
        base-pairs to analyze
        Example: ::

            bp = [6]                                # bp_range = False
            bp = [4,15]                             # bp_range = True
            bp = range(4,15)                        # bp_range = False
            bp = np.arange(4,15)                    # bp_range = False
            bp = [2,5,6,7,9,12,18]                  # bp_range = False
    bp_range : bool
        ``Default=True``: As shown above, if ``True``, bp is taken as a range otherwise list or numpy array.
    word : bool
        In some parameters, in place of value, ``'---'`` is present in the file.
        If parameter values contain this, use ``True``.
    startBP : int
        Number ID of first base-pair.

    Returns
    -------
    data : 3D array
        Extracted parameters as a 3D array of shape (bp, parameters, time).
    time : 1D array
        Time of each frame
    """

    sys.stdout.write("\nReading file : %s\n" % FileName)
    sys.stdout.flush()

    def get_frame_data(block, parameters, bp_idx):
        block = np.array(block).T
        temp_data = (block[parameters, :])[:, bp_idx].copy()
        return temp_data

    def get_time(line):
        dummy, temp_time = line.split('=')
        return float( temp_time )

    infile = open(FileName, 'r')
    data = []
    time = []
    frame_number = 0

    bp_idx, param_idx = get_idx_of_bp_parameters(bp, parameters, bp_range, startBP=startBP)

    block = []
    for line in infile:

        # Removing last new line character
        line = line.rstrip('\n')

        # Skipping blank/empty line
        if not line.strip():
            continue

        # Getting Time tag and time => Starting of new frame
        if(re.match('# Time', line) != None):

            if((frame_number < 100) and (frame_number % 10 == 0)) or ((frame_number < 1000) and (frame_number % 100 == 0)) or ((frame_number < 10000) and (frame_number % 1000 == 0)) or ((frame_number < 100000) and (frame_number % 10000 == 0)) or ((frame_number < 1000000) and (frame_number % 100000 == 0)):
                sys.stdout.write("\rReading frame %d" % frame_number)
                sys.stdout.flush()

            frame_number += 1

            # if(frame_number==5000):
            # break

            # Getting time
            time.append(get_time(line))

            # Getting parameters/values for base-pairs
            if(len(block) > 0):
                data.append(get_frame_data(block, param_idx, bp_idx))
            block = []
            continue

        # Skipping other lines starting with '#' tag
        if(re.match('#', line) != None):
            continue

        if not word:
            block.append(list(map(float, line.split())))
        else:
            temp = []
            split_line = line.split()
            for word in split_line:
                if word != '---':
                    temp.append(float(word))
                else:
                    temp.append(None)
            block.append(temp)

    # For last frame
    data.append(get_frame_data(block, param_idx, bp_idx))
    block = []
    data_transpose = np.array(data).T

    sys.stdout.write(
        "\nFinished reading.... Total number of frame read =  %d\n" % frame_number)
    sys.stdout.flush()

    return data_transpose, time



def distance(x, y):
    x = np.asarray(x)
    y = np.asarray(y)
    return np.linalg.norm(x - y)



[docs]
def vector_angle(x, y, multiple=True, norm=None):
    """Calculate angle between two vector/s

    Parameters
    ----------
    x : 1D or 2D array
        First vector or array of first vectors
    y : 1D or 2D array
        Second vector or array of second vectors
    multiple : bool
        If ``x`` and ``y`` are array of vector then use ``True`` otherwise
        use ``False``
    norm : vector
        Normal vector. useful to determine signed angle.

    Returns
    -------
    value : float or 1D array
        Calculated angle value. If ``x`` and ``y`` are array of vector, then array
        of angle is returned otherwise a single angle value is returned.
    """
    x = np.asarray(x)
    y = np.asarray(y)

    angle = []
    if multiple:
        # Element-wise angle between arrays of vectors. Taken from following links
        # http://codereview.stackexchange.com/questions/54347/calculating-element-wise-the-angles-between-two-lists-of-vectors
        dot_x_y = np.einsum('ij, ij->i', x, y)
        dot_x_x = np.einsum('ij, ij->i', x, x)
        dot_y_y = np.einsum('ij, ij->i', y, y)
        angle = np.arccos(dot_x_y / (np.sqrt(dot_x_x) * np.sqrt(dot_y_y)))

        if norm is not None:
            cross_x_y = np.cross(x, y)
            sign = np.einsum('ij, ij->i', cross_x_y, np.tile(norm, (x.shape[0], 1)))
            angle = angle * np.sign(sign)
    else:
        # Angle between two vectors
        dot = np.dot(x, y)
        cross = np.cross(x, y)
        cross_modulus = np.sqrt((cross * cross).sum())
        angle = np.arctan2(cross_modulus, dot)
    return angle


####################################


def fit_axis(bp_idx, nframe, RawX, RawY, RawZ, smooth, spline, fill_point, cut_off_angle):

    orig_x = RawX.copy()
    orig_y = RawY.copy()
    orig_z = RawZ.copy()

    orig_s = smooth

    xsmooth, ysmooth, zsmooth = [], [], []

    bsmooth = False
    del_idx = []
    count = 0
    scount = 0
    mask = False
    while not bsmooth:

        count += 1

        if count > 4:
            mask = False
            sys.stdout.write('\n|frame:{0:>10}| WARNING: Fitting failed with \"smooth = {1}\"; Trying with \"smooth = {2}\".....\n' .format(
                nframe, smooth, smooth + 100))
            sys.stdout.flush()

            smooth = smooth + 100
            count = 1

            del orig_x
            del orig_y
            del orig_z

            orig_x = RawX.copy()
            orig_y = RawY.copy()
            orig_z = RawZ.copy()

        points = fill_point * len(orig_x)

        nest = -1
        tckp, u = splprep([orig_x, orig_y, orig_z], s=smooth, k=spline, nest=nest)

        xnew, ynew, znew = splev(np.linspace(0, 1, points), tckp)

        new_axis = np.array([xnew, ynew, znew]).T

        angle = []
        dist = []
        del_idx = []
        last_idx = len(orig_x) - 1

        for nbp in range(len(bp_idx)):
            start = nbp * fill_point
            end = start + fill_point
            xsmooth.append(xnew[start:end].mean())
            ysmooth.append(ynew[start:end].mean())
            zsmooth.append(znew[start:end].mean())

        tmp_vec1, tmp_vec2 = [], []
        for j in range(1, len(xsmooth) - 1):
            prev = np.array([xsmooth[j - 1], ysmooth[j - 1], zsmooth[j - 1]])
            curr = np.array([xsmooth[j], ysmooth[j], zsmooth[j]])
            nex = np.array([xsmooth[j + 1], ysmooth[j + 1], zsmooth[j + 1]])
            tmp_vec1.append(prev - curr)
            tmp_vec2.append(curr - nex)
        angle = np.degrees(vector_angle(tmp_vec1, tmp_vec2))
        del tmp_vec1
        del tmp_vec2

        for j in range(1, len(orig_x) - 1):
            prev = np.array([orig_x[j - 1], orig_y[j - 1], orig_z[j - 1]])
            curr = np.array([orig_x[j], orig_y[j], orig_z[j]])
            nex = np.array([orig_x[j + 1], orig_y[j + 1], orig_z[j + 1]])
            dist.append(distance(prev, curr) + distance(curr, nex))

        for j in range(len(angle)):

            if angle[j] > cut_off_angle and not angle[j] > (180 - cut_off_angle):

                del orig_x
                del orig_y
                del orig_z
                bsmooth = False

                max_idx = np.argsort(dist)[::-1]

                for k in range(count):
                    del_idx.append(max_idx[k] + 1)
                    del_idx.append(max_idx[k] + 2)
                    if max_idx[k] == 0:
                        del_idx.append(0)
                    if max_idx[k] == last_idx:
                        del_idx.append(last_idx)

                del_idx = list(set(del_idx))

                sys.stdout.write('\r|frame:{0:>10}| WARNING: Bending angle [{1}-{2}-{3}] = {4:.2f} is more than cut-off angle {5};\n                     Four maximum distances between three adjacent axis positions = ({6[0]:.1f}, {6[1]:.1f}, {6[2]:.1f}, {6[3]:.1f});\n                     Deleting {7} original helical axis positions to remove possible fitting artifact...\n' .format(
                    nframe, j - 1, j, j + 1, angle[j], cut_off_angle, np.sort(dist)[::-1], del_idx))
                sys.stdout.flush()
                mask = True

                if smooth >= 10000:
                    sys.stdout.write('\n\n|frame:{0:>10}| WARNING: Maximum Bending Angle = {1:.2f} at index {2}, which might be artifact. Please, check by visualizing PDB trajectory file...\n\n' .format(
                        nframe, angle[j], j))
                    sys.stdout.flush()
                    bsmooth = True
                    mask = True
                    break

                orig_x = np.delete(RawX.copy(), del_idx)
                orig_y = np.delete(RawY.copy(), del_idx)
                orig_z = np.delete(RawZ.copy(), del_idx)

                xsmooth, ysmooth, zsmooth = [], [], []

                break
            else:
                bsmooth = True

        del angle
        del dist

    if orig_s != smooth:
        sys.stdout.write('\n')

    return xsmooth, ysmooth, zsmooth, mask
#################################################