Source code for recipes

# recipes.py - Here are some general purpose convenience functions
# 
# Author: Stefan Fuertinger [stefan.fuertinger@esi-frankfurt.de]
# Created: June 25 2013
# Last modified: <2017-09-15 16:07:08>

from __future__ import division
import sys
import re
import fnmatch
import os
from numpy.linalg import norm
import numpy as np
from texttable import Texttable
from datetime import datetime, timedelta
from scipy import ndimage
import matplotlib.pyplot as plt

##########################################################################################
[docs]def query_yes_no(question, default=None):
    """
    Ask a yes/no question via `raw_input()` and return the answer.

    Parameters
    ----------
    question : str
        The question to be printed in the prompt
    default : str
        The presumed answer that is used in case the <Return> key is pressed
        (must be either "yes" or "no"). 
        If `default` is `None` then a definitive answer is required 
        (pressing <Return> will re-print `question` in the prompt)

    Returns
    -------
    answer : bool
        Either `True` if the input was "yes"/"y" or `False` otherwise. 

    Notes
    -----
    This code is a slightly modified version of recipe no. 577058 from ActiveState 
    written by Trent Mick. 

    See also
    --------
    ActiveState : ActiveState Code Recipe #577058 currently available 
                  `here <http://code.activestate.com/recipes/577058/>`_
    """

    # Check mandatory input
    if not isinstance(question,(str,unicode)):
        raise TypeError('Input question has to be a string!')

    # Parse optional `default` answer
    valid = {"yes":True,   "y":True,  "ye":True,
             "no":False,     "n":False}
    if default == None:
        prompt = " [y/n] "
    elif default == "yes":
        prompt = " [Y/n] "
    elif default == "no":
        prompt = " [y/N] "
    else:
        raise ValueError("Invalid default answer: '%s'" % default)

    # Do the actual work
    while True:
        sys.stdout.write(question + prompt)
        choice = raw_input().lower()
        if default is not None and choice == '':
            return valid[default]
        elif choice in valid:
            return valid[choice]
        else:
            sys.stdout.write("Please respond with 'yes' or 'no' "\
                             "(or 'y' or 'n').\n")

##########################################################################################
[docs]def natural_sort(lst): 
    """
    Sort a list/NumPy 1darray in a "natural" way

    Parameters
    ----------
    lst : list or NumPy 1darray
        Python list or 1darray of strings

    Returns
    -------
    lst_sort : list or NumPy 1darray
        Lexicographically sorted version of the input list `lst`

    Notes
    -----
    This function was originally intended to perform a natural sorting of a file-listing
    (see Coding Horror's 
    `note <http://www.codinghorror.com/blog/2007/12/sorting-for-humans-natural-sort-order.html>`_ 
    on this topic for more details). Briefly, an input list `lst` of strings 
    containing digits is sorted such that the actual numerical value of the 
    digits is respected (see Examples for more details). 
    The code below is based on a Stackoverflow submission by Mark Byers, currently available 
    `here <http://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort>`_. 

    Examples
    --------
    Calling `glob` in a directory containing files named `Elm` and `elm` plus 
    two-digit suffixes will result in a file listing sorted as follows:

    >>> lst = ['Elm11', 'Elm12', 'Elm2', 'elm0', 'elm1', 'elm10', 'elm13', 'elm9']

    Using `natural_sort` to order `lst` yields

    >>> natural_sort(lst)
    ['elm0', 'elm1', 'Elm2', 'elm9', 'elm10', 'Elm11', 'Elm12', 'elm13']

    See also
    --------
    None
    """

    # Check our single mandatory input argument
    if not isinstance(lst,(list,np.ndarray)):
        raise TypeError('Input has to be a Python list or NumPy 1darray, not '+type(list).__name__+'!')

    # Convert all list entries to strings to avoid any trouble below
    try:
        lst = np.array(lst,dtype=str).flatten()
    except:
        raise ValueError("Input must be a list/NumPy 1darray of strings!")

    # Do the actual sorting
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(lst, key = alphanum_key)

##########################################################################################
[docs]def get_numlines(fname):
    """
    Get number of lines of an text file

    Parameters
    ----------
    fname : str
        File to be read

    Returns
    -------
    lineno : int
        Number of lines in the file

    Notes
    -----
    This routine is based on this 
    `Stackoverflow submission <http://stackoverflow.com/questions/845058/how-to-get-line-count-cheaply-in-python>`_

    See also
    --------
    None
    """

    # Check if input makes sense
    if not isinstance(fname,(str,unicode)):
        raise TypeError('Filename has to be a string!')
    fname = str(fname)
    if fname.find("~") == 0:
        fname = os.path.expanduser('~') + fname[1:]
    slash = fname.rfind(os.sep)
    if slash >= 0 and not os.path.isdir(fname[:fname.rfind(os.sep)]):
        raise ValueError('Invalid path to file: '+fname+'!')

    # Cycle through lines of the file and do exactly nothing
    with open(fname) as f:
        for lineno, l in enumerate(f):
            pass
    return lineno + 1

##########################################################################################
[docs]def myglob(flpath,spattern):
    """
    Return a glob-like list of paths matching a regular expression 

    Parameters
    ----------
    flpath : str
        Path to search (to search current directory use `flpath=''` or `flpath='.'`)
    spattern : str
        Pattern to search for in `flpath`

    Returns
    -------
    flist : list
        A Python list of all files found in `flpath` that match the input pattern `spattern`

    Examples
    --------
    List all png/PNG files in the folder `MyHolidayFun` found under `Documents`

    >>> myglob('Documents/MyHolidayFun','*.[Pp][Nn][Gg]')
    ['Documents/MyHolidayFun/img1.PNG','Documents/MyHolidayFun/img1.png']
        
    See also
    --------
    glob : Unix-style path-name and pattern expansion in Python
    """

    # Make sure provided path is a string and makes sense
    if not isinstance(flpath,(str,unicode)):
        raise TypeError('Filepath has to be a string!')
    flpath = str(flpath)
    if flpath.find("~") == 0:
        flpath = os.path.expanduser('~') + flpath[1:]
    slash = flpath.rfind(os.sep)
    if slash >= 0 and not os.path.isdir(flpath[:flpath.rfind(os.sep)]):
        raise ValueError('Invalid path: '+flpath+'!')
    if not isinstance(spattern,(str,unicode)):
        raise TypeError('Pattern has to be a string!')

    # If user wants to search current directory, make sure that works as expected
    if (flpath == '') or (flpath.count(' ') == len(flpath)):
        flpath = '.'

    # Append trailing slash to filepath
    else:
        if flpath[-1] != os.sep: flpath = flpath + os.sep

    # Return glob-like list
    return [os.path.join(flpath, fnm) for fnm in fnmatch.filter(os.listdir(flpath),spattern)]

##########################################################################################
[docs]def moveit(fname):
    """
    Check if a file/directory exists, if yes, rename it

    Parameters
    ----------
    fname : str
        A string specifying (the path to) the file/directory to be renamed (if existing)

    Returns
    -------
    Nothing : None

    Notes
    -----
    None

    See also
    --------
    None
    """

    # Check if input makes sense
    if not isinstance(fname,(str,unicode)):
        raise TypeError("File-/Directory-name has to be a string!")
    fname = str(fname)
    if fname.find("~") == 0:
        fname = os.path.expanduser('~') + fname[1:]

    # If file already exists, rename it
    if os.path.isfile(fname):
        now     = datetime.now()
        dot     = fname.rfind('.')
        idx     = len(fname)
        if dot > 0: idx = dot
        newname = fname[:idx] + "_bak_"+\
                  str(now.year)+"_"+\
                  str(now.month)+"_"+\
                  str(now.day)+"_"+\
                  str(now.hour)+"_"+\
                  str(now.minute)+"_"+\
                  str(now.second)+\
                  fname[idx::]
        print "WARNING: File "+fname+" already exists, renaming it to: "+newname+"!"
        os.rename(fname,newname)

    # If directory already exists, rename it
    elif os.path.isdir(fname):
        now     = datetime.now()
        slash   = fname.rfind(os.sep)
        if slash == (len(fname) - 1): fname = fname[:slash]
        newname = fname + "_bak_"+\
                  str(now.year)+"_"+\
                  str(now.month)+"_"+\
                  str(now.day)+"_"+\
                  str(now.hour)+"_"+\
                  str(now.minute)+"_"+\
                  str(now.second)
        print "WARNING: Directory "+fname+" already exists, renaming it to: "+newname+"!"
        shutil.move(fname,newname)
    
##########################################################################################
[docs]def regexfind(arr,expr):
    """
    Find regular expression in a NumPy array

    Parameters
    ----------
    arr : NumPy 1darray
        Array of strings to search 
    expr : str
        Regular expression to search for in the components of `arr`

    Returns
    -------
    ind : NumPy 1darray
        Index array of elements in `arr` that contain expression `expr`. If `expr` was not found
        anywhere in `arr` an empty array is returned

    Examples
    --------
    Suppose the array `arr` is given by

    >>> arr
    array(['L_a', 'L_b', 'R_a', 'R_b'], 
      dtype='|S3')

    If we want to find all elements of `arr` starting with `l_` or `L_` we could use

    >>> regexfind(arr,"[Ll]_*")
    array([0, 1])

    See also
    --------
    None
    """

    # Sanity checks
    try:
        arr = np.array(arr)
    except:
        raise TypeError("Input must be a NumPy array/Python list, not "+type(arr).__name__+"!")
    sha = arr.squeeze().shape
    if len(sha) != 1:
        raise ValueError("Input must be a NumPy 1darray or Python list!")
    for el in arr:
        if not isinstance(el,(str,unicode)):
            raise ValueError("Every element in the input array has to be a string!")
    if not isinstance(expr,(str,unicode)):
        raise TypeError("Input expression has to be a string, not "+type(expr).__name__+"!")

    # Now do something: start by compiling the input expression
    regex = re.compile(expr)

    # Create a generalized function to find matches
    match = np.vectorize(lambda x:bool(regex.match(x)))(arr)

    # Get matching indices and return
    return np.where(match == True)[0]