Chris Umbel

C from erlang via linked-in driver

Erlang Erlang truly is a useful language. It's fast, full-featured and elegant. Like all high-level languages, however, you sometimes need to access legacy or system code written in another language like, you guessed it, C.

I came accross this situation recently when porting my webcrawler to erlang. Under pressure http:request proved to be unreliable when used in a multi-threaded scenario. The solution I chose was to bridge erlang with libcurl by way of an erlang "linked-in driver". While I only used it for retrieving web page content into memory I figure I'll be able to wrap all of libcurl in the future.

C

I'll start out with some C code that will function as the bridge between erlang and libcurl. This is the code that qualifies as a "linked-in" driver. Like all C code there's plenty of plumbing. See my comments for explanation.

Note that I'm using curl's multi interface which doesn't block on I/O.

#include "erl_driver.h"
#include "ei.h"
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <stdlib.h>

typedef struct 
{
  ErlDrvPort port;
} eurl_data;

/* erlang's firing us up */
static ErlDrvData eurl_drv_start(ErlDrvPort port, char *buff)
{
  eurl_data* d = (eurl_data*)driver_alloc(sizeof(eurl_data));
  d->port = port;
  return (ErlDrvData)d;
}

/* erlang's done with us */
static void eurl_drv_stop(ErlDrvData handle)
{
  driver_free((char*)handle);
}

/* basic data structure that contains the content of a web page */
struct document
{
  char *memory;
  size_t size;
};

static void *myrealloc(void *ptr, size_t size)
{
  if(ptr)
    return realloc(ptr, size);
  else
    return malloc(size);
   
}

/* write data into a structure for erlang */
static size_t write_chunk(void *ptr, size_t size, size_t nmemb, void *data)
{
  size_t realsize = size * nmemb;
  struct document *mem = (struct document *)data;
     
  mem->memory = myrealloc(mem->memory, mem->size + realsize + 1);
   
  if (mem->memory) 
    {
      memcpy(&(mem->memory[mem->size]), ptr, realsize);
      mem->size += realsize;
      mem->memory[mem->size] = 0;
    }
   
  return realsize;
}

/* here's the business end.  don't be intimidated.  most of this code is 
just to satisfy libcurl and isn't plumbing required to talk to erlang.  most of what you'll be interested in is the very beginning and the end. */
static void eurl_drv_output(ErlDrvData handle, char *buff, int bufflen)
{
  eurl_data* d = (eurl_data*)handle;     /* <--ERLANG STUFF */  
  CURLM *multi_handle;
  CURL *curl;
  int is_running;

  struct document doc;
  doc.memory = NULL;
  doc.size = 0;

  /* grab the url from erlang and put 
     it in a NULL terminated string */  
  char* url = malloc(bufflen + 1);  
  memset(url, '\0', bufflen + 1);  
  strncpy(url, buff, bufflen); 

  /* initialize curl for non-blocking multi interface */  
  curl_global_init(CURL_GLOBAL_ALL);
  curl = curl_easy_init();
  multi_handle = curl_multi_init();

  if(curl && multi_handle) {
    /* connect the multi interfact to easy interface */
    curl_multi_add_handle(multi_handle, curl);

    /* tell curl what we indend to do */  
    curl_easy_setopt(curl, CURLOPT_URL, url);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_chunk);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&doc);

    while(CURLM_CALL_MULTI_PERFORM == curl_multi_perform(multi_handle, &is_running));

    while(is_running) {
      struct timeval timeout;
      int rc;

      fd_set fdread;
      fd_set fdwrite;
      fd_set fdexcep;
      int maxfd = -1;

      long curl_timeo = -1;

      FD_ZERO(&fdread);
      FD_ZERO(&fdwrite);
      FD_ZERO(&fdexcep);

      /* assign a timeout for the operation */
      timeout.tv_sec = 10;
      timeout.tv_usec = 0;

      curl_multi_timeout(multi_handle, &curl_timeo);

      if(curl_timeo >= 0) {
        timeout.tv_sec = curl_timeo / 1000;
	if(timeout.tv_sec > 1)
          timeout.tv_sec = 1;
        else
          timeout.tv_usec = (curl_timeo % 1000) * 1000;
      }

      /* ask curl for the file descriptors */
      curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);

      /* ask OS for their status */
      rc = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);

      switch(rc) {
      case -1:
        break;
      case 0:
      default:
        while(CURLM_CALL_MULTI_PERFORM ==
              curl_multi_perform(multi_handle, &is_running));

        break;
      }
    }

    curl_easy_cleanup(curl);
    curl_multi_cleanup(curl);

    /* !!! ERLANG STUFF !!! */  
    /* send data back to erlang */  
    driver_output(d->port, doc.memory, doc.size);  
  } else  
    driver_output(d->port, "", 0);  

  printf("%d, %s", (int)doc.size, doc.memory);
      
  if(doc.memory)
    free(doc.memory);

  free(url);
}

/* mapping of the drivers functions */
static ErlDrvEntry eurl_driver_entry = 
  {
    NULL,
    eurl_drv_start,
    eurl_drv_stop, 
    eurl_drv_output,
    NULL,
    NULL,
    "eurl_drv",
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    ERL_DRV_EXTENDED_MARKER,
    ERL_DRV_EXTENDED_MAJOR_VERSION,
    ERL_DRV_EXTENDED_MAJOR_VERSION,
     ERL_DRV_FLAG_USE_PORT_LOCKING
  };

DRIVER_INIT(eurl_drv)
{   
  return &eurl_driver_entry;
}

Build

The next task is to build the driver as a shared library.

gcc -o eurl_drv.so -L/usr/lib/erlang/lib/erl_interface-3.5.7 -I/usr/lib/erlang/lib/erl_interface-3.5.7/include/ -fpic -rdynamic -shared -fno-common eurl.c -lerl_interface -lei -lcurl

Erlang wrapper

Depending on your point of view the hard part is now done. Now I'll slap a little thread safe erlang wrapper together.

-module(eurl).
-export([start/0, curl/1]).

%% initialize the driver
start() ->
%% load the C code
%% note the name of the library is 
%% eurl_drv in the current directory
  case erl_ddll:load_driver(".", eurl_drv) of
    ok -> ok;
    {error, already_loaded} -> ok;
    _ -> exit({error, could_not_load_driver})
  end,
  
  register(eurl_proc, self()).
  
%% perform a curl request and return string data
curl(Url) ->
%% start the conversation with the driver
  P = open_port({spawn, eurl_drv}, [binary]),
%% instruct the driver to execute the curl request
  port_command(P, [list_to_binary(Url)]),
%% read the response
  receive
    {_, {_, BinaryData}}  ->
      port_close(P)
  end,

%% convert to list and return
  binary_to_list(BinaryData).

Usage

Now let's test it from the erlang shell.

1> eurl:start().
2> Body = eurl:curl("http://erlang.org/").  

And as you'd have guessed the full HTML of erlang.org is now in the Body variable.

Wed Sep 16 2009 21:09:00 GMT+0000 (UTC)

Follow Chris
RSS Feed
Twitter
Facebook
CodePlex
github
LinkedIn
Google