/*
 *  $Id: hdf5file.c 25394 2023-05-31 16:01:44Z yeti-dn $
 *  Copyright (C) 2020-2023 David Necas (Yeti), Petr Klapetek.
 *  E-mail: yeti@gwyddion.net, klapetek@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
 * [FILE-MAGIC-USERGUIDE]
 * Asylum Research Ergo HDF5
 * .h5
 * Read
 **/

/**
 * [FILE-MAGIC-USERGUIDE]
 * Shilps Sciences Lucent HDF5
 * .h5
 * Read
 **/

/**
 * [FILE-MAGIC-MISSING]
 * Avoding clash with a standard file format.
 **/

/*
 * HDF5 changes its APIs between versions incompatibly.  Forward compatibility is mostly preserved, but not
 * guaranteed.  To prevent breakage we need to know which specific version of the API we use and tell the library to
 * provide this one through compatibility macros.
 *
 * Therefore, this file must be compiled with -DH5_USE_18_API
 */

#include "config.h"
#include <stdlib.h>
#include <string.h>
#include <hdf5.h>
#include <hdf5_hl.h>
#include <libgwyddion/gwymacros.h>
#include <libgwyddion/gwymath.h>
#include <libgwyddion/gwyutils.h>
#include <libprocess/datafield.h>
#include <libgwymodule/gwymodule-file.h>
#include <app/gwyapp.h>
#include <app/gwymoduleutils-file.h>

#include "err.h"

#define MAGIC "\x89HDF\r\n\x1a\n"
#define MAGIC_SIZE (sizeof(MAGIC)-1)

#define EXTENSION ".h5"

typedef struct _GenericHDF5File GenericHDF5File;

typedef void (*AttrHandlerFunc)(GenericHDF5File *ghfile,
                                hid_t loc_id,
                                const char *attr_name);

typedef struct {
    gchar *name;
    GwySIUnit *xyunit;
    GwySIUnit *zunit;
    gint xypower10;
    gint zpower10;
    gdouble realcoords[4];
} ErgoChannel;

struct _GenericHDF5File {
    GArray *addr;
    GString *path;
    GString *buf;
    GwyContainer *meta;

    /* Generic gathering of some numeric values. */
    GArray *idlist;
    const gchar *idprefix;
    H5O_type_t idwhat;

    /* File type implementation specifics. */
    AttrHandlerFunc attr_handler;
    gpointer impl;
};

typedef struct {
    GArray *channels;
    gint nframes;
} ErgoFile;

static gboolean module_register(void);

/* Ergo */
static gint          ergo_detect       (const GwyFileDetectInfo *fileinfo,
                                        gboolean only_name);
static GwyContainer* ergo_load         (const gchar *filename,
                                        GwyRunType mode,
                                        GError **error);
static void          ergo_attr_handler (GenericHDF5File *ghfile,
                                        hid_t loc_id,
                                        const char *attr_name);
static GwyContainer* ergo_read_channels(hid_t file_id,
                                        GenericHDF5File *ghfile,
                                        GError **error);
static GwyDataField* ergo_read_image   (hid_t file_id,
                                        guint r,
                                        ErgoChannel *c,
                                        gint frameid,
                                        const gint *xyres,
                                        GString *str,
                                        GError **error);

/* Lucent */
static gint          shilps_detect       (const GwyFileDetectInfo *fileinfo,
                                          gboolean only_name);
static GwyContainer* shilps_load         (const gchar *filename,
                                          GwyRunType mode,
                                          GError **error);
static GwyContainer* shilps_read_channels(hid_t file_id,
                                          GenericHDF5File *ghfile,
                                          GError **error);
static GwyDataField* shilps_read_image   (hid_t file_id,
                                          gint id,
                                          gint xres,
                                          gint yres,
                                          gdouble xreal,
                                          gdouble yreal,
                                          GString *str,
                                          GError **error);

/* Generic */
static hid_t    quick_check_hdf5         (const GwyFileDetectInfo *fileinfo,
                                          gboolean only_name);
static void     generic_hdf5_init        (GenericHDF5File *ghfile);
static void     generic_hdf5_free        (GenericHDF5File *ghfile);
static hid_t    make_string_type_for_attr(hid_t attr_type);
static herr_t   scan_file                (hid_t loc_id,
                                          const char *name,
                                          const H5L_info_t *info,
                                          void *user_data);
static herr_t   process_attribute        (hid_t loc_id,
                                          const char *attr_name,
                                          const H5A_info_t *ainfo,
                                          void *user_data);
static gboolean get_ints_attr            (hid_t file_id,
                                          const gchar *obj_path,
                                          const gchar *attr_name,
                                          gint expected_rank,
                                          const gint *expected_dims,
                                          gint *v,
                                          GError **error);
static gboolean get_int_attr             (hid_t file_id,
                                          const gchar *obj_path,
                                          const gchar *attr_name,
                                          gint *v,
                                          GError **error);
static gboolean get_floats_attr          (hid_t file_id,
                                          const gchar *obj_path,
                                          const gchar *attr_name,
                                          gint expected_rank,
                                          const gint *expected_dims,
                                          gdouble *v,
                                          GError **error);
static gboolean get_float_attr           (hid_t file_id,
                                          const gchar *obj_path,
                                          const gchar *attr_name,
                                          gdouble *v,
                                          GError **error);
static gboolean get_strs_attr            (hid_t file_id,
                                          const gchar *obj_path,
                                          const gchar *attr_name,
                                          gint expected_rank,
                                          const gint *expected_dims,
                                          gchar **v,
                                          GError **error);
static gboolean get_str_attr             (hid_t file_id,
                                          const gchar *obj_path,
                                          const gchar *attr_name,
                                          gchar **v,
                                          GError **error);
static gboolean enumerate_indexed        (GString *path,
                                          const gchar *prefix,
                                          GArray *array);
static void     err_HDF5                 (GError **error,
                                          const gchar *where,
                                          glong code);

static GwyModuleInfo module_info = {
    GWY_MODULE_ABI_VERSION,
    &module_register,
    N_("Imports files based on Hierarchical Data Format (HDF), version 5."),
    "Yeti <yeti@gwyddion.net>",
    "1.1",
    "David Nečas (Yeti) & Petr Klapetek",
    "2020",
};

GWY_MODULE_QUERY2(module_info, hdf5file)

static gboolean
module_register(void)
{
    if (H5open() < 0) {
        g_warning("H5open() failed.");
        return FALSE;
    }
#ifndef DEBUG
    H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
#endif

    gwy_file_func_register("ergofile",
                           N_("Asylum Research Ergo HDF5 files (.h5)"),
                           (GwyFileDetectFunc)&ergo_detect,
                           (GwyFileLoadFunc)&ergo_load,
                           NULL,
                           NULL);

    gwy_file_func_register("shilpsfile",
                           N_("Shilps Sciences Lucent HDF5 files (.h5)"),
                           (GwyFileDetectFunc)&shilps_detect,
                           (GwyFileLoadFunc)&shilps_load,
                           NULL,
                           NULL);

    return TRUE;
}

/*******************************************************************************************************************
 *
 * Asylum Research Ergo
 *
 *******************************************************************************************************************/

static gint
ergo_detect(const GwyFileDetectInfo *fileinfo,
            gboolean only_name)
{
    hid_t file_id;
    gchar *format = NULL;
    gint version[3], dim = 3;
    gint score = 0;

    if ((file_id = quick_check_hdf5(fileinfo, only_name)) < 0)
        return 0;

    if (get_str_attr(file_id, ".", "ARFormat", &format, NULL)) {
        if (get_ints_attr(file_id, ".", "ARVersion", 1, &dim, version, NULL))
            score = 100;
        H5free_memory(format);
    }

    H5Fclose(file_id);

    return score;
}

static GwyContainer*
ergo_load(const gchar *filename,
          G_GNUC_UNUSED GwyRunType mode,
          GError **error)
{
    GwyContainer *container = NULL;
    GenericHDF5File ghfile;
    ErgoFile efile;
    hid_t file_id;
    G_GNUC_UNUSED herr_t status;
    H5O_info_t infobuf;
    guint i;

    file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
    gwy_debug("file_id %d", (gint)file_id);
    status = H5Oget_info(file_id, &infobuf);
    gwy_debug("status %d", status);

    generic_hdf5_init(&ghfile);
    ghfile.impl = &efile;
    ghfile.attr_handler = ergo_attr_handler;
    ghfile.idprefix = "/DataSet/Resolution ";
    ghfile.idwhat = H5O_TYPE_GROUP;
    g_array_append_val(ghfile.addr, infobuf.addr);

    gwy_clear(&efile, 1);
    efile.channels = g_array_new(FALSE, FALSE, sizeof(ErgoChannel));

    status = H5Literate(file_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, scan_file, &ghfile);
    H5Aiterate2(file_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, process_attribute, &ghfile);

    if (get_int_attr(file_id, "DataSetInfo", "NumFrames", &efile.nframes, error)) {
        gwy_debug("nframes %d", efile.nframes);
        container = ergo_read_channels(file_id, &ghfile, error);
    }

    status = H5Fclose(file_id);
    gwy_debug("status %d", status);

    for (i = 0; i < efile.channels->len; i++) {
        ErgoChannel *c = &g_array_index(efile.channels, ErgoChannel, i);
        g_free(c->name);
        GWY_OBJECT_UNREF(c->xyunit);
        GWY_OBJECT_UNREF(c->zunit);
    }
    g_array_free(efile.channels, TRUE);
    generic_hdf5_free(&ghfile);

    return container;
}

static GwyContainer*
ergo_read_channels(hid_t file_id, GenericHDF5File *ghfile, GError **error)
{
    ErgoFile *efile = (ErgoFile*)ghfile->impl;
    GwyContainer *meta, *container = NULL;
    GArray *channels = efile->channels;
    GArray *resolutions = ghfile->idlist;
    GString *str = ghfile->buf;
    GwyDataField *dfield;
    gint expected2[2] = { 2, 2 }, xyres[2];
    gchar *s, *s2[2];
    gint frameid, id = 0;
    guint i, ri, r;

    for (ri = 0; ri < resolutions->len; ri++) {
        r = g_array_index(resolutions, guint, ri);
        for (i = 0; i < channels->len; i++) {
            ErgoChannel *c = &g_array_index(channels, ErgoChannel, i);

            g_string_printf(str, "DataSetInfo/Global/Channels/%s/ImageDims", c->name);

            if (!get_str_attr(file_id, str->str, "DataUnits", &s, error))
                goto fail;
            gwy_debug("zunit of %s is %s", c->name, s);
            c->zunit = gwy_si_unit_new_parse(s, &c->zpower10);
            H5free_memory(s);

            if (!get_strs_attr(file_id, str->str, "DimUnits", 1, expected2, s2, error))
                goto fail;
            gwy_debug("xyunits of %s are %s and %s", c->name, s2[0], s2[1]);
            if (!gwy_strequal(s2[0], s2[1]))
                g_warning("X and Y units differ, using X");
            c->xyunit = gwy_si_unit_new_parse(s2[1], &c->xypower10);
            H5free_memory(s2[0]);
            H5free_memory(s2[1]);

            /* NB: In all dimensions y is first, then x. */
            if (!get_floats_attr(file_id, str->str, "DimScaling", 2, expected2, c->realcoords, error))
                goto fail;
            gwy_debug("dims of %s are [%g, %g], [%g, %g]",
                      c->name, c->realcoords[2], c->realcoords[3], c->realcoords[0], c->realcoords[1]);

            g_string_append_printf(str, "/Resolution %d", r);
            if (!get_ints_attr(file_id, str->str, "DimExtents", 1, expected2, xyres, error))
                goto fail;
            gwy_debug("resid %u res %dx%d", r, xyres[1], xyres[0]);

            for (frameid = 0; frameid < efile->nframes; frameid++) {
                if (!(dfield = ergo_read_image(file_id, r, c, frameid, xyres, str, error)))
                    goto fail;

                if (!container)
                    container = gwy_container_new();

                gwy_container_set_object(container, gwy_app_get_data_key_for_id(id), dfield);
                g_object_unref(dfield);

                gwy_container_set_const_string(container, gwy_app_get_data_title_key_for_id(id), c->name);

                meta = gwy_container_duplicate(ghfile->meta);
                gwy_container_set_object(container, gwy_app_get_data_meta_key_for_id(id), meta);
                g_object_unref(meta);

                id++;
            }
        }
    }

    if (container)
        return container;

    err_NO_DATA(error);

fail:
    GWY_OBJECT_UNREF(container);
    return NULL;
}

static GwyDataField*
ergo_read_image(hid_t file_id,
                guint r, ErgoChannel *c, gint frameid, const gint *xyres,
                GString *str, GError **error)
{
    GwyDataField *dfield;
    hid_t dataset, space;
    gdouble *data;
    gint nitems;
    gdouble q, xreal, yreal, xoff, yoff;
    herr_t status;

    g_string_printf(str, "DataSet/Resolution %u/Frame %d/%s/Image", r, frameid, c->name);
    if ((dataset = H5Dopen(file_id, str->str, H5P_DEFAULT)) < 0) {
        err_HDF5(error, "H5Dopen", dataset);
        return NULL;
    }
    gwy_debug("dataset %s is %d", str->str, (gint)dataset);

    if ((space = H5Dget_space(dataset)) < 0) {
        err_HDF5(error, "H5Dget_space", space);
        H5Dclose(dataset);
        return NULL;
    }
    nitems = H5Sget_simple_extent_npoints(space);
    gwy_debug("dataset space is %d with %d items", (gint)space, nitems);
    if (nitems != xyres[0]*xyres[1]) {
        g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_DATA,
                    _("Dataset %s has %d items, which does not match image resolution %d×%d."),
                    str->str, nitems, xyres[0], xyres[1]);
        H5Sclose(space);
        H5Dclose(dataset);
        return NULL;
    }

    q = pow10(c->xypower10);

    /* NB: In all dimensions y is first, then x. */
    xreal = c->realcoords[3] - c->realcoords[2];
    sanitise_real_size(&xreal, "x size");
    xoff = MIN(c->realcoords[2], c->realcoords[3]);

    yreal = c->realcoords[1] - c->realcoords[0];
    sanitise_real_size(&yreal, "y size");
    yoff = MIN(c->realcoords[0], c->realcoords[1]);

    dfield = gwy_data_field_new(xyres[1], xyres[0], q*xreal, q*yreal, FALSE);
    gwy_data_field_set_xoffset(dfield, q*xoff);
    gwy_data_field_set_yoffset(dfield, q*yoff);
    gwy_si_unit_assign(gwy_data_field_get_si_unit_xy(dfield), c->xyunit);
    gwy_si_unit_assign(gwy_data_field_get_si_unit_z(dfield), c->zunit);

    data = gwy_data_field_get_data(dfield);
    status = H5Dread(dataset, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
    gwy_data_field_invert(dfield, TRUE, FALSE, FALSE);
    if (c->zpower10)
        gwy_data_field_multiply(dfield, pow10(c->zpower10));

    H5Sclose(space);
    H5Dclose(dataset);

    if (status < 0) {
        err_HDF5(error, "H5Dread", status);
        GWY_OBJECT_UNREF(dfield);
    }
    return dfield;
}

static void
append_channel_name(GArray *channels, const gchar *name)
{
    ErgoChannel c;

    gwy_debug("found channel %s", name);
    gwy_clear(&c, 1);
    c.name = g_strdup(name);
    g_strstrip(c.name);
    g_array_append_val(channels, c);
}

/* XXX: Handle /DataSetInfo/ChannelNames which do not have unique paths and we just build them during the scan */
static void
ergo_attr_handler(GenericHDF5File *ghfile, hid_t loc_id, const char *attr_name)
{
    ErgoFile *efile = (ErgoFile*)ghfile->impl;
    G_GNUC_UNUSED H5T_cset_t cset = H5T_CSET_ERROR;
    H5T_class_t type_class;
    hid_t attr, attr_type, str_type, space;
    gboolean is_vlenstr = FALSE;
    gint nitems, i;
    herr_t status;

    if (!gwy_strequal(ghfile->path->str, "/DataSetInfo/ChannelNames"))
        return;

    gwy_debug("handling /DataSetInfo/ChannelNames");
    attr = H5Aopen(loc_id, attr_name, H5P_DEFAULT);
    attr_type = H5Aget_type(attr);
    space = H5Aget_space(attr);
    nitems = H5Sget_simple_extent_npoints(space);
    type_class = H5Tget_class(attr_type);

    if (gwy_strequal(ghfile->path->str, "/DataSetInfo/ChannelNames")) {
        if (type_class == H5T_STRING) {
            is_vlenstr = H5Tis_variable_str(attr_type);
            cset = H5Tget_cset(attr_type);
        }

        if (type_class == H5T_STRING && is_vlenstr) {
            if (nitems == 1) {
                gchar *s;

                str_type = make_string_type_for_attr(attr_type);
                if ((status = H5Aread(attr, str_type, &s)) >= 0) {
                    append_channel_name(efile->channels, s);
                    H5free_memory(s);
                }
                H5Tclose(str_type);
            }
            else if (nitems > 0) {
                gchar **s = g_new(gchar*, nitems);

                str_type = make_string_type_for_attr(attr_type);
                if ((status = H5Aread(attr, str_type, s)) >= 0) {
                    for (i = 0; i < nitems; i++) {
                        append_channel_name(efile->channels, s[i]);
                        H5free_memory(s[i]);
                    }
                }
                H5Tclose(str_type);
                g_free(s);
            }
        }
    }

    H5Sclose(space);
    H5Tclose(attr_type);
    H5Aclose(attr);
}

/*******************************************************************************************************************
 *
 * Shilps Sciences Lucent
 *
 *******************************************************************************************************************/

static gint
shilps_detect(const GwyFileDetectInfo *fileinfo,
              gboolean only_name)
{
    hid_t file_id;
    gchar *company;
    gint score = 0;

    if ((file_id = quick_check_hdf5(fileinfo, only_name)) < 0)
        return 0;

    if (get_str_attr(file_id, ".", "Company", &company, NULL)) {
        if (gwy_strequal(company, "Shilps Sciences"))
            score = 100;
        H5free_memory(company);
    }

    H5Fclose(file_id);

    return score;
}

static GwyContainer*
shilps_load(const gchar *filename,
            G_GNUC_UNUSED GwyRunType mode,
            GError **error)
{
    GwyContainer *container = NULL;
    GenericHDF5File ghfile;
    hid_t file_id;
    G_GNUC_UNUSED herr_t status;
    H5O_info_t infobuf;

    file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
    gwy_debug("file_id %d", (gint)file_id);
    status = H5Oget_info(file_id, &infobuf);
    gwy_debug("status %d", status);

    generic_hdf5_init(&ghfile);
    ghfile.idprefix = "/Session/Scan/Image";
    ghfile.idwhat = H5O_TYPE_DATASET;
    g_array_append_val(ghfile.addr, infobuf.addr);

    status = H5Literate(file_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, scan_file, &ghfile);
    H5Aiterate2(file_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, process_attribute, &ghfile);

    if (ghfile.idlist->len) {
        gwy_debug("nimages %u", ghfile.idlist->len);
        container = shilps_read_channels(file_id, &ghfile, error);
    }

    status = H5Fclose(file_id);
    gwy_debug("status %d", status);
    generic_hdf5_free(&ghfile);

    return container;
}

static GwyContainer*
shilps_read_channels(hid_t file_id, GenericHDF5File *ghfile, GError **error)
{
    const gchar scanpfx[] = "Session/Scan";
    GwyContainer *meta, *container = NULL;
    GArray *images = ghfile->idlist;
    GString *str = ghfile->buf;
    GwyDataField *dfield;
    gint xres, yres;
    gdouble xreal, yreal;
    gchar *channel, *scancyc, *title;
    guint i, id;

    if (!get_int_attr(file_id, scanpfx, "X No", &xres, error)
        || !get_int_attr(file_id, scanpfx, "Y No", &yres, error)
        || !get_float_attr(file_id, scanpfx, "X Range", &xreal, error)
        || !get_float_attr(file_id, scanpfx, "Y Range", &yreal, error))
        goto fail;

    gwy_debug("xres %d, yres %d", xres, yres);
    if (err_DIMENSION(error, xres) || err_DIMENSION(error, yres))
        goto fail;

    gwy_debug("xreal %g, yreal %g", xreal, yreal);
    sanitise_real_size(&xreal, "x size");
    sanitise_real_size(&yreal, "y size");
    /* The dimensions seem to be in microns. */
    xreal *= 1e-6;
    yreal *= 1e-6;

    container = gwy_container_new();
    for (i = 0; i < images->len; i++) {
        id = g_array_index(images, gint, i);

        if (!(dfield = shilps_read_image(file_id, id, xres, yres, xreal, yreal, str, error)))
            goto fail;

        gwy_container_set_object(container, gwy_app_get_data_key_for_id(i), dfield);
        g_object_unref(dfield);

        /* shilps_read_image() fills str->str with the correct prefix. */
        if (get_str_attr(file_id, str->str, "Channel", &channel, NULL)) {
            g_strstrip(channel);
            if (get_str_attr(file_id, str->str, "Scan cycle", &scancyc, NULL)) {
                g_strstrip(scancyc);
                title = g_strdup_printf("%s (%s)", channel, scancyc);
                gwy_container_set_const_string(container, gwy_app_get_data_title_key_for_id(i), title);
                g_free(title);
                H5free_memory(scancyc);
            }
            else
                gwy_container_set_const_string(container, gwy_app_get_data_title_key_for_id(i), channel);
            H5free_memory(channel);
        }
        else
            gwy_app_channel_title_fall_back(container, i);

        meta = gwy_container_duplicate(ghfile->meta);
        gwy_container_set_object(container, gwy_app_get_data_meta_key_for_id(i), meta);
        g_object_unref(meta);
    }

    if (container)
        return container;

    err_NO_DATA(error);

fail:
    GWY_OBJECT_UNREF(container);
    return NULL;
}

static GwyDataField*
shilps_read_image(hid_t file_id, gint id,
                  gint xres, gint yres, gdouble xreal, gdouble yreal,
                  GString *str, GError **error)
{
    GwyDataField *dfield = NULL;
    hid_t dataset, space;
    gdouble *data;
    gfloat *fdata;
    gint i, nitems, power10;
    gchar *zunitstr;
    gdouble q;
    herr_t status = -1;

    g_string_printf(str, "Session/Scan/Image%d", id);
    if ((dataset = H5Dopen(file_id, str->str, H5P_DEFAULT)) < 0) {
        err_HDF5(error, "H5Dopen", dataset);
        return NULL;
    }
    gwy_debug("dataset %s is %d", str->str, (gint)dataset);

    if ((space = H5Dget_space(dataset)) < 0) {
        err_HDF5(error, "H5Dget_space", space);
        H5Dclose(dataset);
        return NULL;
    }
    nitems = H5Sget_simple_extent_npoints(space);
    gwy_debug("dataset space is %d with %d items", (gint)space, nitems);
    if (nitems != xres*yres) {
        g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_DATA,
                    _("Dataset %s has %d items, which does not match image resolution %d×%d."),
                    str->str, nitems, xres, yres);
        H5Sclose(space);
        H5Dclose(dataset);
        return NULL;
    }

    if (!get_str_attr(file_id, str->str, "Units", &zunitstr, NULL))
        zunitstr = NULL;

    dfield = gwy_data_field_new(xres, yres, xreal, yreal, TRUE);
    gwy_si_unit_set_from_string(gwy_data_field_get_si_unit_xy(dfield), "m");
    gwy_si_unit_set_from_string_parse(gwy_data_field_get_si_unit_z(dfield), zunitstr, &power10);
    H5free_memory(zunitstr);

    data = gwy_data_field_get_data(dfield);
    fdata = g_new(gfloat, 2*nitems);
    status = H5Dread(dataset, H5T_IEEE_F32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, fdata);

    H5Sclose(space);
    H5Dclose(dataset);

    if (status < 0) {
        err_HDF5(error, "H5Dread", status);
        GWY_OBJECT_UNREF(dfield);
    }
    else {
        q = pow10(power10);
        for (i = 0; i < nitems; i++)
            data[i] = q*fdata[i];
    }
    g_free(fdata);

    return dfield;
}

/*******************************************************************************************************************
 *
 * More or less general HDF5 utility functions
 *
 *******************************************************************************************************************/

static hid_t
quick_check_hdf5(const GwyFileDetectInfo *fileinfo,
                 gboolean only_name)
{
    if (only_name)
        return -1;

    if (fileinfo->buffer_len <= MAGIC_SIZE || memcmp(fileinfo->head, MAGIC, MAGIC_SIZE) != 0)
        return -1;

    return H5Fopen(fileinfo->name, H5F_ACC_RDONLY, H5P_DEFAULT);
}

static void
generic_hdf5_init(GenericHDF5File *ghfile)
{
    gwy_clear(ghfile, 1);
    ghfile->meta = gwy_container_new();
    ghfile->path = g_string_new(NULL);
    ghfile->buf = g_string_new(NULL);
    ghfile->addr = g_array_new(FALSE, FALSE, sizeof(haddr_t));
    ghfile->idlist = g_array_new(FALSE, FALSE, sizeof(gint));
}

static void
generic_hdf5_free(GenericHDF5File *ghfile)
{
    g_array_free(ghfile->addr, TRUE);
    g_array_free(ghfile->idlist, TRUE);
    g_string_free(ghfile->path, TRUE);
    g_string_free(ghfile->buf, TRUE);
    GWY_OBJECT_UNREF(ghfile->meta);
}

/* NB: loc_id is ‘parent’ location and name is particlar item within it. */
static herr_t
scan_file(hid_t loc_id,
          const char *name,
          G_GNUC_UNUSED const H5L_info_t *info,
          void *user_data)
{
    GenericHDF5File *ghfile = (GenericHDF5File*)user_data;
    herr_t status, return_val = 0;
    H5O_info_t infobuf;
    GArray *addr = ghfile->addr;
    GString *path = ghfile->path;
    guint i, len = path->len;

    status = H5Oget_info_by_name(loc_id, name, &infobuf, H5P_DEFAULT);
    if (status < 0)
        return status;

    /* Detect loops. */
    for (i = 0; i < addr->len; i++) {
        if (g_array_index(addr, haddr_t, i) == infobuf.addr)
            return -1;
    }

    g_array_append_val(addr, infobuf.addr);
    g_string_append_c(path, '/');
    g_string_append(path, name);
    gwy_debug("path %s", path->str);
    if (infobuf.type == H5O_TYPE_GROUP) {
        return_val = H5Literate_by_name(loc_id, name, H5_INDEX_NAME, H5_ITER_NATIVE,
                                        NULL, scan_file, user_data, H5P_DEFAULT);
    }
    /* Nothing to do for other object types. */
    else if (infobuf.type == H5O_TYPE_DATASET) {
    }
    else if (infobuf.type == H5O_TYPE_NAMED_DATATYPE) {
    }
    else {
        gwy_debug("unknown type %d", infobuf.type);
    }

    if (ghfile->idprefix && infobuf.type == ghfile->idwhat)
        enumerate_indexed(path, ghfile->idprefix, ghfile->idlist);

    if (infobuf.num_attrs > 0) {
        hid_t this_id = H5Oopen(loc_id, name, H5P_DEFAULT);

        H5Aiterate2(this_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, process_attribute, user_data);
        H5Oclose(this_id);
    }

    g_string_truncate(path, len);
    g_array_set_size(addr, addr->len-1);

    return return_val;
}

static herr_t
process_attribute(hid_t loc_id,
                  const char *attr_name,
                  G_GNUC_UNUSED const H5A_info_t *ainfo,
                  void *user_data)
{
    GenericHDF5File *ghfile = (GenericHDF5File*)user_data;
    GString *path = ghfile->path, *buf = ghfile->buf;
    G_GNUC_UNUSED H5T_cset_t cset = H5T_CSET_ERROR;
    guint len = path->len;
    hid_t attr, attr_type, str_type, space;
    gboolean is_vlenstr = FALSE;
    H5T_class_t type_class;
    gint i, nitems;
    herr_t status;

    attr = H5Aopen(loc_id, attr_name, H5P_DEFAULT);
    attr_type = H5Aget_type(attr);
    space = H5Aget_space(attr);
    nitems = H5Sget_simple_extent_npoints(space);
    type_class = H5Tget_class(attr_type);
    if (type_class == H5T_STRING) {
        is_vlenstr = H5Tis_variable_str(attr_type);
        cset = H5Tget_cset(attr_type);
    }

    gwy_debug("attr %s, type class %d (is_vlenstr: %d, cset: %d)", attr_name, type_class, is_vlenstr, cset);
    g_string_append_c(path, '/');
    g_string_append(path, attr_name);
    status = -1;
    /* Try to read all attribute types used by Ergo; there are just a few. */
    if (type_class == H5T_INTEGER) {
        if (nitems == 1) {
            gint v;
            gwy_debug("integer");
            if ((status = H5Aread(attr, H5T_NATIVE_INT, &v)) >= 0)
                g_string_printf(buf, "%d", v);
        }
        else if (nitems > 0) {
            gint *v = g_new(gint, nitems);
            gwy_debug("integer array");
            if ((status = H5Aread(attr, H5T_NATIVE_INT, v)) >= 0) {
                g_string_printf(buf, "%d", v[0]);
                for (i = 1; i < nitems; i++)
                    g_string_append_printf(buf, "; %d", v[i]);
            }
            g_free(v);
        }
    }
    else if (type_class == H5T_FLOAT) {
        if (nitems == 1) {
            gdouble v;
            gwy_debug("float");
            if ((status = H5Aread(attr, H5T_NATIVE_DOUBLE, &v)) >= 0)
                g_string_printf(buf, "%.8g", v);
        }
        else if (nitems > 0) {
            gdouble *v = g_new(gdouble, nitems);
            gwy_debug("float array");
            if ((status = H5Aread(attr, H5T_NATIVE_DOUBLE, v)) >= 0) {
                g_string_printf(buf, "%.8g", v[0]);
                for (i = 1; i < nitems; i++)
                    g_string_append_printf(buf, "; %.8g", v[i]);
            }
            g_free(v);
        }
    }
    else if (type_class == H5T_STRING && is_vlenstr) {
        /* Fixed strings are read differently, into preallocated buffers, but fortunately no one uses them. */
        if (nitems == 1) {
            gchar *s;
            gwy_debug("string");
            str_type = make_string_type_for_attr(attr_type);
            if ((status = H5Aread(attr, str_type, &s)) >= 0) {
                g_strstrip(s);
                g_string_printf(buf, "%s", s);
            }
            H5free_memory(s);
            H5Tclose(str_type);
        }
        else if (nitems > 0) {
            gchar **s = g_new(gchar*, nitems);
            gwy_debug("string array");
            str_type = make_string_type_for_attr(attr_type);
            if ((status = H5Aread(attr, str_type, s)) >= 0) {
                g_strstrip(s[0]);
                g_string_assign(buf, s[0]);
                H5free_memory(s[0]);
                for (i = 1; i < nitems; i++) {
                    g_strstrip(s[i]);
                    g_string_append(buf, "; ");
                    g_string_append(buf, s[i]);
                    H5free_memory(s[i]);
                }
            }
            H5Tclose(str_type);
            g_free(s);
        }
    }
    H5Sclose(space);
    H5Tclose(attr_type);
    H5Aclose(attr);

    if (status >= 0) {
        gwy_debug("[%s] = <%s>", path->str, buf->str);
        gwy_container_set_const_string_by_name(ghfile->meta, path->str, buf->str);
    }
    else {
        g_warning("Cannot handle attribute %d(%d)[%d]", type_class, is_vlenstr, nitems);
    }

    if (ghfile->attr_handler) {
        ghfile->attr_handler(ghfile, loc_id, attr_name);
    }

    g_string_truncate(path, len);

    return 0;
}

static hid_t
make_string_type_for_attr(hid_t attr_type)
{
    hid_t str_type;

    g_return_val_if_fail(H5Tget_class(attr_type) == H5T_STRING, -1);
    str_type = H5Tcopy(H5T_C_S1);
    if (H5Tis_variable_str(attr_type))
        H5Tset_size(str_type, H5T_VARIABLE);
    H5Tset_cset(str_type, H5Tget_cset(attr_type));

    return str_type;
}

static hid_t
open_and_check_attr(hid_t file_id,
                    const gchar *obj_path,
                    const gchar *attr_name,
                    H5T_class_t expected_class,
                    gint expected_rank,
                    const gint *expected_dims,
                    GError **error)
{
    hid_t attr, attr_type, space;
    H5T_class_t type_class;
    gint i, rank, status;
    hsize_t dims[3];

    gwy_debug("looking for %s in %s, class %d, rank %d",
              attr_name, obj_path, expected_class, expected_rank);
    if ((attr = H5Aopen_by_name(file_id, obj_path, attr_name, H5P_DEFAULT, H5P_DEFAULT)) < 0) {
        err_MISSING_FIELD(error, attr_name);
        return -1;
    }

    attr_type = H5Aget_type(attr);
    type_class = H5Tget_class(attr_type);
    gwy_debug("found attr %d of type %d and class %d", (gint)attr, (gint)attr_type, type_class);
    if (type_class != expected_class) {
        H5Tclose(attr_type);
        H5Aclose(attr);
        err_UNSUPPORTED(error, attr_name);
        return -1;
    }

    if ((space = H5Aget_space(attr)) < 0) {
        err_HDF5(error, "H5Aget_space", space);
        H5Tclose(attr_type);
        H5Aclose(attr);
    }
    rank = H5Sget_simple_extent_ndims(space);
    gwy_debug("attr space is %d with rank %d", (gint)space, rank);
    if (rank > 3 || rank != expected_rank) {
        err_UNSUPPORTED(error, attr_name);
        goto fail;
    }

    if ((status = H5Sget_simple_extent_dims(space, dims, NULL)) < 0) {
        gwy_debug("cannot get space %d extent dims", (gint)space);
        err_HDF5(error, "H5Sget_simple_extent_dims", status);
        goto fail;
    }
    for (i = 0; i < rank; i++) {
        gwy_debug("dims[%d]=%lu, expecting %lu", i, (gulong)dims[i], (gulong)expected_dims[i]);
        if (dims[i] != (hsize_t)expected_dims[i]) {
            err_UNSUPPORTED(error, attr_name);
            goto fail;
        }
    }

    H5Sclose(space);
    H5Tclose(attr_type);
    gwy_debug("attr %d seems OK", (gint)attr);
    return attr;

fail:
    H5Sclose(space);
    H5Tclose(attr_type);
    H5Aclose(attr);
    return -1;
}

static gboolean
get_ints_attr(hid_t file_id,
              const gchar *obj_path,
              const gchar *attr_name,
              gint expected_rank,
              const gint *expected_dims,
              gint *v,
              GError **error)
{
    hid_t attr;
    gint status;

    if ((attr = open_and_check_attr(file_id, obj_path, attr_name, H5T_INTEGER,
                                    expected_rank, expected_dims, error)) < 0)
        return FALSE;

    status = H5Aread(attr, H5T_NATIVE_INT, v);
    H5Aclose(attr);
    if (status < 0) {
        err_HDF5(error, "H5Aread", status);
        return FALSE;
    }
    return TRUE;
}

static gboolean
get_int_attr(hid_t file_id,
             const gchar *obj_path, const gchar *attr_name,
             gint *v, GError **error)
{
    return get_ints_attr(file_id, obj_path, attr_name, 0, NULL, v, error);
}

static gboolean
get_floats_attr(hid_t file_id,
                const gchar *obj_path,
                const gchar *attr_name,
                gint expected_rank,
                const gint *expected_dims,
                gdouble *v,
                GError **error)
{
    hid_t attr;
    gint status;

    if ((attr = open_and_check_attr(file_id, obj_path, attr_name, H5T_FLOAT, expected_rank, expected_dims, error)) < 0)
        return FALSE;

    status = H5Aread(attr, H5T_NATIVE_DOUBLE, v);
    H5Aclose(attr);
    if (status < 0) {
        err_HDF5(error, "H5Aread", status);
        return FALSE;
    }
    return TRUE;
}

G_GNUC_UNUSED
static gboolean
get_float_attr(hid_t file_id,
               const gchar *obj_path, const gchar *attr_name,
               gdouble *v, GError **error)
{
    return get_floats_attr(file_id, obj_path, attr_name, 0, NULL, v, error);
}

static gboolean
get_strs_attr(hid_t file_id,
              const gchar *obj_path,
              const gchar *attr_name,
              gint expected_rank,
              const gint *expected_dims,
              gchar **v,
              GError **error)
{
    hid_t attr, attr_type, str_type;
    gboolean is_vlenstr;
    gint status;

    if ((attr = open_and_check_attr(file_id, obj_path, attr_name, H5T_STRING, expected_rank, expected_dims, error)) < 0)
        return FALSE;

    attr_type = H5Aget_type(attr);
    if (attr_type < 0) {
        H5Aclose(attr);
        err_HDF5(error, "H5Aget_type", attr_type);
        return FALSE;
    }
    is_vlenstr = H5Tis_variable_str(attr_type);
    gwy_debug("attr %d is%s vlen string", (gint)attr, is_vlenstr ? "" : " not");
    if (!is_vlenstr) {
        H5Tclose(attr_type);
        H5Aclose(attr);
        /* XXX: Be more specific. */
        err_UNSUPPORTED(error, attr_name);
        return FALSE;
    }

    str_type = make_string_type_for_attr(attr_type);
    status = H5Aread(attr, str_type, v);
    H5Tclose(attr_type);
    H5Tclose(str_type);
    H5Aclose(attr);
    if (status < 0) {
        err_HDF5(error, "H5Aread", status);
        return FALSE;
    }
    return TRUE;
}

static gboolean
get_str_attr(hid_t file_id,
             const gchar *obj_path, const gchar *attr_name,
             gchar **v, GError **error)
{
    return get_strs_attr(file_id, obj_path, attr_name, 0, NULL, v, error);
}

static gboolean
enumerate_indexed(GString *path, const gchar *prefix, GArray *array)
{
    guint i, len = strlen(prefix);
    const gchar *p;

    if (strncmp(path->str, prefix, len))
        return FALSE;

    p = path->str + len;
    for (i = 0; g_ascii_isdigit(p[i]); i++)
        ;
    if (!i || p[i])
        return FALSE;

    i = atol(p);
    if (i > G_MAXINT)
        return FALSE;

    g_array_append_val(array, i);
    gwy_debug("found indexed %s[%u]", prefix, i);
    return TRUE;
}

static void
err_HDF5(GError **error, const gchar *where, glong code)
{
    g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_SPECIFIC,
                _("HDF5 library error %ld in function %s."),
                code, where);
}

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
