17.6.2 C Code for chdir() and stat()

Here is the C code for these extensions.114

The file includes a number of standard header files, and then includes the gawkapi.h header file, which provides the API definitions. Those are followed by the necessary variable declarations to make use of the API macros and boilerplate code (see Boilerplate Code):

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <sys/types.h>
#include <sys/stat.h>

#include "gawkapi.h"

#include "gettext.h"
#define _(msgid)  gettext(msgid)
#define N_(msgid) msgid

#include "gawkfts.h"
#include "stack.h"

static const gawk_api_t *api;    /* for convenience macros to work */
static awk_ext_id_t ext_id;
static awk_bool_t init_filefuncs(void);
static awk_bool_t (*init_func)(void) = init_filefuncs;
static const char *ext_version = "filefuncs extension: version 1.0";

int plugin_is_GPL_compatible;

By convention, for an awk function foo(), the C function that implements it is called do_foo(). The function should have two arguments. The first is an int, usually called nargs, that represents the number of actual arguments for the function. The second is a pointer to an awk_value_t structure, usually named result:

/*  do_chdir --- provide dynamically loaded chdir() function for gawk */

static awk_value_t *
do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)
{
    awk_value_t newdir;
    int ret = -1;

    assert(result != NULL);

The newdir variable represents the new directory to change to, which is retrieved with get_argument(). Note that the first argument is numbered zero.

If the argument is retrieved successfully, the function calls the chdir() system call. Otherwise, if the chdir() fails, it updates ERRNO:

    if (get_argument(0, AWK_STRING, & newdir)) {
        ret = chdir(newdir.str_value.str);
        if (ret < 0)
            update_ERRNO_int(errno);
    }

Finally, the function returns the return value to the awk level:

    return make_number(ret, result);
}

The stat() extension is more involved. First comes a function that turns a numeric mode into a printable representation (e.g., octal 0644 becomes ‘-rw-r--r--’). This is omitted here for brevity:

/* format_mode --- turn a stat mode field into something readable */

static char *
format_mode(unsigned long fmode)
{
    ...
}

Next comes a function for reading symbolic links, which is also omitted here for brevity:

/* read_symlink --- read a symbolic link into an allocated buffer.
   ... */

static char *
read_symlink(const char *fname, size_t bufsize, ssize_t *linksize)
{
    ...
}

Two helper functions simplify entering values in the array that will contain the result of the stat():

/* array_set --- set an array element */

static void
array_set(awk_array_t array, const char *sub, awk_value_t *value)
{
    awk_value_t index;

    set_array_element(array,
                      make_const_string(sub, strlen(sub), & index),
                      value);

}

/* array_set_numeric --- set an array element with a number */

static void
array_set_numeric(awk_array_t array, const char *sub, double num)
{
    awk_value_t tmp;

    array_set(array, sub, make_number(num, & tmp));
}

The following function does most of the work to fill in the awk_array_t result array with values obtained from a valid struct stat. This work is done in a separate function to support the stat() function for gawk and also to support the fts() extension, which is included in the same file but whose code is not shown here (see File-Related Functions).

The first part of the function is variable declarations, including a table to map file types to strings:

/* fill_stat_array --- do the work to fill an array with stat info */

static int
fill_stat_array(const char *name, awk_array_t array, struct stat *sbuf)
{
    char *pmode;    /* printable mode */
    const char *type = "unknown";
    awk_value_t tmp;
    static struct ftype_map {
        unsigned int mask;
        const char *type;
    } ftype_map[] = {
        { S_IFREG, "file" },
        { S_IFBLK, "blockdev" },
        { S_IFCHR, "chardev" },
        { S_IFDIR, "directory" },
#ifdef S_IFSOCK
        { S_IFSOCK, "socket" },
#endif
#ifdef S_IFIFO
        { S_IFIFO, "fifo" },
#endif
#ifdef S_IFLNK
        { S_IFLNK, "symlink" },
#endif
#ifdef S_IFDOOR /* Solaris weirdness */
        { S_IFDOOR, "door" },
#endif
    };
    int j, k;

The destination array is cleared, and then code fills in various elements based on values in the struct stat:

    /* empty out the array */
    clear_array(array);

    /* fill in the array */
    array_set(array, "name", make_const_string(name, strlen(name),
                                               & tmp));
    array_set_numeric(array, "dev", sbuf->st_dev);
    array_set_numeric(array, "ino", sbuf->st_ino);
    array_set_numeric(array, "mode", sbuf->st_mode);
    array_set_numeric(array, "nlink", sbuf->st_nlink);
    array_set_numeric(array, "uid", sbuf->st_uid);
    array_set_numeric(array, "gid", sbuf->st_gid);
    array_set_numeric(array, "size", sbuf->st_size);
    array_set_numeric(array, "blocks", sbuf->st_blocks);
    array_set_numeric(array, "atime", sbuf->st_atime);
    array_set_numeric(array, "mtime", sbuf->st_mtime);
    array_set_numeric(array, "ctime", sbuf->st_ctime);

    /* for block and character devices, add rdev,
       major and minor numbers */
    if (S_ISBLK(sbuf->st_mode) || S_ISCHR(sbuf->st_mode)) {
        array_set_numeric(array, "rdev", sbuf->st_rdev);
        array_set_numeric(array, "major", major(sbuf->st_rdev));
        array_set_numeric(array, "minor", minor(sbuf->st_rdev));
    }

The latter part of the function makes selective additions to the destination array, depending upon the availability of certain members and/or the type of the file. It then returns zero, for success:

#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
    array_set_numeric(array, "blksize", sbuf->st_blksize);
#endif

    pmode = format_mode(sbuf->st_mode);
    array_set(array, "pmode", make_const_string(pmode, strlen(pmode),
                                                & tmp));

    /* for symbolic links, add a linkval field */
    if (S_ISLNK(sbuf->st_mode)) {
        char *buf;
        ssize_t linksize;

        if ((buf = read_symlink(name, sbuf->st_size,
                    & linksize)) != NULL)
            array_set(array, "linkval",
                      make_malloced_string(buf, linksize, & tmp));
        else
            warning(ext_id, _("stat: unable to read symbolic link `%s'"),
                    name);
    }

    /* add a type field */
    type = "unknown";   /* shouldn't happen */
    for (j = 0, k = sizeof(ftype_map)/sizeof(ftype_map[0]); j < k; j++) {
        if ((sbuf->st_mode & S_IFMT) == ftype_map[j].mask) {
            type = ftype_map[j].type;
            break;
        }
    }

    array_set(array, "type", make_const_string(type, strlen(type), & tmp));

    return 0;
}

The third argument to stat() was not discussed previously. This argument is optional. If present, it causes do_stat() to use the stat() system call instead of the lstat() system call. This is done by using a function pointer: statfunc. statfunc is initialized to point to lstat() (instead of stat()) to get the file information, in case the file is a symbolic link. However, if the third argument is included, statfunc is set to point to stat(), instead.

Here is the do_stat() function, which starts with variable declarations and argument checking:

/* do_stat --- provide a stat() function for gawk */

static awk_value_t *
do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
{
    awk_value_t file_param, array_param;
    char *name;
    awk_array_t array;
    int ret;
    struct stat sbuf;
    /* default is lstat() */
    int (*statfunc)(const char *path, struct stat *sbuf) = lstat;

    assert(result != NULL);

Then comes the actual work. First, the function gets the arguments. Next, it gets the information for the file. If the called function (lstat() or stat()) returns an error, the code sets ERRNO and returns:

    /* file is first arg, array to hold results is second */
    if (   ! get_argument(0, AWK_STRING, & file_param)
        || ! get_argument(1, AWK_ARRAY, & array_param)) {
        warning(ext_id, _("stat: bad parameters"));
        return make_number(-1, result);
    }

    if (nargs == 3) {
        statfunc = stat;
    }

    name = file_param.str_value.str;
    array = array_param.array_cookie;

    /* always empty out the array */
    clear_array(array);

    /* stat the file; if error, set ERRNO and return */
    ret = statfunc(name, & sbuf);
    if (ret < 0) {
        update_ERRNO_int(errno);
        return make_number(ret, result);
    }

The tedious work is done by fill_stat_array(), shown earlier. When done, the function returns the result from fill_stat_array():

    ret = fill_stat_array(name, array, & sbuf);

    return make_number(ret, result);
}

Finally, it’s necessary to provide the “glue” that loads the new function(s) into gawk.

The filefuncs extension also provides an fts() function, which we omit here (see File-Related Functions). For its sake, there is an initialization function:

/* init_filefuncs --- initialization routine */

static awk_bool_t
init_filefuncs(void)
{
    ...
}

We are almost done. We need an array of awk_ext_func_t structures for loading each function into gawk:

static awk_ext_func_t func_table[] = {
    { "chdir", do_chdir, 1, 1, awk_false, NULL },
    { "stat",  do_stat, 3, 2, awk_false, NULL },
    ...
};

Each extension must have a routine named dl_load() to load everything that needs to be loaded. It is simplest to use the dl_load_func() macro in gawkapi.h:

/* define the dl_load() function using the boilerplate macro */

dl_load_func(func_table, filefuncs, "")

And that’s it!


Footnotes

(114)

This version is edited slightly for presentation. See extension/filefuncs.c in the gawk distribution for the complete version.