import h5py
import numpy as np
# Number of observations.
N_OBS = 542
# Number of model cells.
N_MODEL = 179171
[docs]def load_niklas(path):
    """ Load Niklas data.
    Parameters
    ----------
    path: string
        Path to the HDF5.
    Returns
    -------
    Dict[F, dsm, coords]
    """
    dataset = h5py.File(path, 'r')
    # Pre-check dimensions.
    assert(np.allclose(dataset['F_land/data'].shape[0] / (N_OBS * N_MODEL), 1.0))
    assert(np.allclose(dataset['d_land'].shape[1] - 1, N_OBS))
    # The forward operator has been flattened into a list,
    # so we have to rebuild it.
    F = np.reshape(dataset['F_land/data'], (N_OBS, N_MODEL), order = 'F')
    # Make contigous in memory.
    F = np.ascontiguousarray(F, dtype=np.float32)
    # Measurement vector.
    # It has one element too much compared to what F expects,
    # hence we remove the first element, since it is 0.
    # (Maybe was included as reference point.)
    d = np.array(dataset['d_land'], dtype=np.float32)[0, 1:]
    # Coordinates.
    xi = dataset['xi'][:, 0]
    yi = dataset['yi'][:, 0]
    zi = dataset['zi'][:, 0]
    # Have to subtract one to indices due to difference between matlab and python.
    ind = np.array(dataset['ind'], dtype=int) - 1
    # DSM
    # We have arrays of arrays, so we flatten to be one dimensional.
    dsm_x = np.ndarray.flatten(np.array(dataset['x'], dtype=np.float32))
    dsm_y = np.ndarray.flatten(np.array(dataset['y'], dtype=np.float32))
    dsm_z = np.array(dataset['z'], dtype=np.float32)
    # Build a dsm matrix.
    dsm = []
    for i in range(dsm_x.size):
        for j in range(dsm_y.size):
            dsm.append([dsm_x[i], dsm_y[j], dsm_z[i, j]])
    dsm = np.array(dsm, dtype=np.float32)
    # Build a coords matrix.
    coords = []
    for i in range(ind.shape[1]):
        # Read the indices in ind.
        ind_x = ind[2, i]
        ind_y = ind[1, i]
        ind_z = ind[0, i]
        # Get corresponding coords and add to list.
        coords.append([xi[ind_x], yi[ind_y], zi[ind_z]])
    # We put results in a numpy array for ease of use, it makes subsetting
    # easier.
    coords = np.array(coords, dtype=np.float32)
    # IMPORTANT.
    # We make the array Fortran contiguous. This means that column subsetting
    # will return contiguous data, i.e., when we select one of the coordinates
    # (say x) we will get the x-coordinates of all the cells as a contiguous
    # array, so we can loop faster.
    coords = np.asfortranarray(coords)
    # Extract the data points locations.
    data_x = dataset['long'][0, :]
    data_y = dataset['lat'][0, :]
    data_z = dataset['h_true'][0, :]
    # Produce a list of coordinate tuples from that.
    # TODO: This might not be the most elegant way to store the coordinates of
    # the data points. Look into it.
    data_coords = []
    for x, y, z in zip(data_x, data_y, data_z):
        data_coords.append((x, y, z))
    return {'F': F, 'd': d, 'dsm': dsm, 'coords': coords,
            'data_coords': data_coords}