import numpy as np
from numpy import newaxis

def f(a, b, c):
    return a**b - c
    
def compute_ogrid(K, L, M):
    a, b, c = np.ogrid[0:1:K*1j,0:1:L*1j,0:1:M*1j]
    samples = f(a, b, c)
    return samples.mean()

def compute_mgrid(K, L, M):
    a, b, c = np.mgrid[0:1:K*1j,0:1:L*1j,0:1:M*1j]
    samples = f(a, b, c)
    return samples.mean()

def compute_bcast(K, L, M):
    a, b, c = np.ogrid[0:1:K*1j,0:1:L*1j,0:1:M*1j]
    a, b, c = np.broadcast_arrays(a, b, c)
    samples = f(a, b, c)
    return samples.mean()

if __name__ == "__main__":
    print "Exact:", np.log(2) - 0.5
    print "Approximation (24, 12, 6):", compute_ogrid(24, 12, 6)
    print "Approximation (50, 50, 50):", compute_ogrid(50, 50, 50)

    # Do some profiling
    #
    # Note: IPython's %timeit is way nicer than Python's own timeit module
    #
    import timeit

    print "Time (ogrid 50,50,50)", \
          timeit.timeit('__main__.compute_ogrid(50, 50, 50)', 'import __main__',
                        number=100)/100. * 1000, 'ms'

    print "Time (mgrid 50,50,50)", \
          timeit.timeit('__main__.compute_mgrid(50, 50, 50)', 'import __main__',
                        number=100)/100. * 1000, 'ms'

    print "Time (broadcast 50,50,50)", \
          timeit.timeit('__main__.compute_bcast(50, 50, 50)', 'import __main__',
                        number=100)/100. * 1000, 'ms'

    # The ogrid version is much faster than the mgrid and explitily
    # broadcasted ones, and the explicitly broadcasted version is
    # slightly faster than mgrid.
    #
    # The reason is the following: the ogrid version does less computations:
    # In the expression
    #
    #     a**b - c
    #
    # the part ``a**b`` is computed first, and this broadcasts to an array of
    # shape ``(K, L, 1)``. In the subtraction step broadcasting occurs again,
    # and one gets an array of shape ``(K, L, M)``.
    #
    # In the other approaches, one immediately gets a ``(K, L, M)``
    # shape array out from ``a**b``, so one has to do ``M`` times more
    # exponentiation! This explains the speed difference.
    #
    # The speed difference between the explicitly broadcasted arrays
    # and the mgrid one (at least for Numpy 1.6) is then more
    # difficult to explain. One possibility is that it is due to
    # memory bandwidth issues. The explicitly broadcasted arrays are
    # "ghost arrays" and have therefore less data than the full arrays
    # generated by mgrid. So, less data needs to be moved from the
    # slow main memory to the CPU (see Francesc's talk on Thursday),
    # which makes things faster.
    #
    # Another possibility is that Numpy's internal access pattern
    # optimizations manage to deal more efficiently with data
    # containing 0-strides.
    #
