Source code for volcapy.loading

import h5py
import numpy as np

# Number of observations.
N_OBS = 542

# Number of model cells.
N_MODEL = 179171

[docs]def load_niklas(path): """ Load Niklas data. Parameters ---------- path: string Path to the HDF5. Returns ------- Dict[F, dsm, coords] """ dataset = h5py.File(path, 'r') # Pre-check dimensions. assert(np.allclose(dataset['F_land/data'].shape[0] / (N_OBS * N_MODEL), 1.0)) assert(np.allclose(dataset['d_land'].shape[1] - 1, N_OBS)) # The forward operator has been flattened into a list, # so we have to rebuild it. F = np.reshape(dataset['F_land/data'], (N_OBS, N_MODEL), order = 'F') # Make contigous in memory. F = np.ascontiguousarray(F, dtype=np.float32) # Measurement vector. # It has one element too much compared to what F expects, # hence we remove the first element, since it is 0. # (Maybe was included as reference point.) d = np.array(dataset['d_land'], dtype=np.float32)[0, 1:] # Coordinates. xi = dataset['xi'][:, 0] yi = dataset['yi'][:, 0] zi = dataset['zi'][:, 0] # Have to subtract one to indices due to difference between matlab and python. ind = np.array(dataset['ind'], dtype=int) - 1 # DSM # We have arrays of arrays, so we flatten to be one dimensional. dsm_x = np.ndarray.flatten(np.array(dataset['x'], dtype=np.float32)) dsm_y = np.ndarray.flatten(np.array(dataset['y'], dtype=np.float32)) dsm_z = np.array(dataset['z'], dtype=np.float32) # Build a dsm matrix. dsm = [] for i in range(dsm_x.size): for j in range(dsm_y.size): dsm.append([dsm_x[i], dsm_y[j], dsm_z[i, j]]) dsm = np.array(dsm, dtype=np.float32) # Build a coords matrix. coords = [] for i in range(ind.shape[1]): # Read the indices in ind. ind_x = ind[2, i] ind_y = ind[1, i] ind_z = ind[0, i] # Get corresponding coords and add to list. coords.append([xi[ind_x], yi[ind_y], zi[ind_z]]) # We put results in a numpy array for ease of use, it makes subsetting # easier. coords = np.array(coords, dtype=np.float32) # IMPORTANT. # We make the array Fortran contiguous. This means that column subsetting # will return contiguous data, i.e., when we select one of the coordinates # (say x) we will get the x-coordinates of all the cells as a contiguous # array, so we can loop faster. coords = np.asfortranarray(coords) # Extract the data points locations. data_x = dataset['long'][0, :] data_y = dataset['lat'][0, :] data_z = dataset['h_true'][0, :] # Produce a list of coordinate tuples from that. # TODO: This might not be the most elegant way to store the coordinates of # the data points. Look into it. data_coords = [] for x, y, z in zip(data_x, data_y, data_z): data_coords.append((x, y, z)) return {'F': F, 'd': d, 'dsm': dsm, 'coords': coords, 'data_coords': data_coords}