C from erlang via linked-in driver

Erlang Erlang truly is a useful language. It's fast, full-featured and elegant. Like all high-level languages, however, you sometimes need to access legacy or system code written in another language like, you guessed it, C.

I came accross this situation recently when porting my webcrawler to erlang. Under pressure http:request proved to be unreliable when used in a multi-threaded scenario. The solution I chose was to bridge erlang with libcurl by way of an erlang "linked-in driver". While I only used it for retrieving web page content into memory I figure I'll be able to wrap all of libcurl in the future.

C

I'll start out with some C code that will function as the bridge between erlang and libcurl. This is the code that qualifies as a "linked-in" driver. Like all C code there's plenty of plumbing. See my comments for explanation.

#include "erl_driver.h"
#include "ei.h"
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <stdlib.h>

typedef struct 
{
       ErlDrvPort port;
} eurl_data;

/* erlang's firing us up */
static ErlDrvData eurl_drv_start(ErlDrvPort port, char *buff)
{
       eurl_data* d = (eurl_data*)driver_alloc(sizeof(eurl_data));
       d->port = port;
       return (ErlDrvData)d;
}

/* erlang's done with us */
static void eurl_drv_stop(ErlDrvData handle)
{
       driver_free((char*)handle);
}

/* basic data structure that contains the content of a web page */
struct document
{
      char *memory;
      size_t size;
};

static void *myrealloc(void *ptr, size_t size)
{
   if(ptr)
     return realloc(ptr, size);
   else
     return malloc(size);
   
}

/* write data into a structure for erlang */
static size_t write_chunk(void *ptr, size_t size, size_t nmemb, void *data)
{
   size_t realsize = size * nmemb;
   struct document *mem = (struct document *)data;
     
   mem->memory = myrealloc(mem->memory, mem->size + realsize + 1);
   
   if (mem->memory) 
     {
	memcpy(&(mem->memory[mem->size]), ptr, realsize);
	mem->size += realsize;
	mem->memory[mem->size] = 0;
     }
   
   return realsize;
}

/* here's the business end,  */
static void eurl_drv_output(ErlDrvData handle, char *buff, int bufflen)
{
   eurl_data* d = (eurl_data*)handle;
   
   CURL *curl;
   CURLcode res;

   struct document doc;
   doc.memory = NULL;
   doc.size = 0;
   
   /* grab the url from erlang and put
      it in a NULL terminated string */
   char* url = malloc(bufflen + 1);
   memset(url, '\0', bufflen + 1);
   strncpy(url, buff, bufflen);lenn

   /* initialize curl */
   curl_global_init(CURL_GLOBAL_ALL);
   curl = curl_easy_init();

   if(curl)
     {
	/* tell curl what we indend to do */
	curl_easy_setopt(curl, CURLOPT_URL, url);
	curl_easy_setopt(curl, CURLOPT_TIMEOUT, 10);
	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_chunk);
	curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&doc);
	curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");

	/* invoke curl to download data */
	res = curl_easy_perform(curl);
		
	/* release curl's resources */
	curl_easy_cleanup(curl);
	
	/* send data back to erlang */
	driver_output(d->port, doc.memory, doc.size);
     }  else
       driver_output(d->port, "A", 1);

      
   if(doc.memory)
     free(doc.memory);
   
   free(url);
}

/* mapping of the drivers functions */
static ErlDrvEntry eurl_driver_entry = 
{
   NULL,
     eurl_drv_start,
     eurl_drv_stop, 
     eurl_drv_output,
     NULL,
     NULL,
     "eurl_drv",
     NULL,
     NULL,
     NULL,
     NULL,
     NULL,
     NULL,
     NULL,
     NULL,
     NULL,
     ERL_DRV_EXTENDED_MARKER,
     ERL_DRV_EXTENDED_MAJOR_VERSION,
     ERL_DRV_EXTENDED_MAJOR_VERSION,
     ERL_DRV_FLAG_USE_PORT_LOCKING
};

DRIVER_INIT(eurl_drv)
{   
    return &eurl_driver_entry;
}

Build

The next task is to build the driver as a shared library.

gcc -o eurl_drv.so \
-L/usr/lib/erlang/lib/erl_interface-3.5.7 \
-I/usr/lib/erlang/lib/erl_interface-3.5.7/include/ \
-fpic -rdynamic -shared -fno-common eurl.c -lerl_interface -lei -lcurl

Erlang wrapper

Depending on your point of view the hard part is now done. Now I'll slap a little thread safe erlang wrapper together.

-module(eurl).
-export([start/0, curl/1]).

%% initialize the driver
start() ->
%% load the C code
%% note the name of the library is 
%% eurl_drv in the current directory
  case erl_ddll:load_driver(".", eurl_drv) of
    ok -> ok;
    {error, already_loaded} -> ok;
    _ -> exit({error, could_not_load_driver})
  end,
  
  register(eurl_proc, self()).
  
%% perform a curl request and return string data
curl(Url) ->
%% start the conversation with the driver
  P = open_port({spawn, eurl_drv}, [binary]),
%% instruct the driver to execute the curl request
  port_command(P, [list_to_binary(Url)]),
%% read the response
  receive
    {_, {_, BinaryData}}  ->
      port_close(P)
  end,

%% convert to list and return
  binary_to_list(BinaryData).

Usage

Now let's test it from the erlang shell.

1> eurl:start().
2> Body = eurl:curl("http://erlang.org/").  

And as you'd have guessed the full HTML of erlang.org is now in the Body variable.

Created on 2009-09-16 21:17:00
Share on Facebook Facebook
Comment Feed
Nice intro, except for the 'noop' thing which is pretty ugly actually :) The rest was a really nice intro though. /Mazen 
by Mazen Harake on 2009-11-06 09:19:54
Hey, thanks for the feedback.  You're certainly right about that.  Can't imagine I intended to leave it that way.  I'm certainly cleaning that up. 
by chrisumbel on 2009-11-11 09:16:41
Add a Comment: (HTML not accepted. URLs will automatically be converted to links)
Body
Nickname (Login || Register)
Home Page
Email Addy(kept private)
Are you human?
Tags:
linq .Net performance sql 2008 sql server powershell indexes scripting reporting services filestream ruby ironruby entity framework EF testing .net framework 4.0 ADO.NET SSRS rs setpolicies vb cte c# podcasts webdav exchange server data warehousing Data Services Web Services Astoria jQuery database object oriented cql refactoring remoting simpledb cloud HTML GObject GNOME Vala BI couchdb django ORM python erlang functional C curl stackless concurrency Groovy Java JVM dynamic tools windows ironpython dlr systems programming go CAPTCHA appengine natural language full-text rails lucene wave clr parallel virtualization Oracle iPhone xml Objective-C Haiku security cocoa touch C++ BeOS Operating Systems Lucene monitoring Solr lisp VS 2010
Blog History:
Solrnet, a Solr Client Library for .Net - 03/08/2010
Monitoring Solr with LucidGaze - 02/21/2010
Haiku, an Open Source Continuation of BeOS - 02/10/2010
Basic Authentication with a NSURLRequest in Cocoa Touch - 01/24/2010
Asynchronous Programming in Cocoa Touch - 01/17/2010
NSXML-like XPath Support in Cocoa Touch with TouchXML - 01/03/2010
Using Solr in Django for Full-Text Searching via Solango - 01/01/2010
Using Entity Framework with Oracle - 12/22/2009
Solutions to Common VirtualBox Problems - 12/20/2009
Parallel Programming with the Task Parallel Library and PLINQ in .Net 4.0 - 12/14/2009
Clojure, A Lisp for the JVM and CLR - 12/13/2009
Google Wave Robots in Java - 12/07/2009
Employing Solr/Lucene with SQL Server for Full-Text Searching - 12/05/2009
Full-Text Indexing in Ruby Using Ferret - 11/28/2009
Home-Brewing a Full-Text Search in Google's AppEngine - 11/22/2009
Using reCAPTCHA With Django - 11/21/2009
Phat Go Code Launched - 11/19/2009
A Little More of Google's Go - 11/17/2009
First Impressions of Go, Google's New Systems Language - 11/14/2009
Scripting Your .Net Applications with IronPython - 11/03/2009
Windows Services in Python - 11/02/2009
My Tool List - 10/26/2009
Groovy: Dynamic Language for the JVM... Groovy! - 10/23/2009
Easy Concurrency with Stackless Python - 10/03/2009
C from erlang via linked-in driver - 09/16/2009
Templating with NDjango - 09/06/2009
A little bit o' Erlang - 08/23/2009
Tale of a Website, from Rails to ASP.NET to Django - 08/20/2009
Now in Django - 08/19/2009
Stored Procedures in Django - 08/09/2009
CouchDBExtension - 08/06/2009
POCO Entities in ADO.NET 4.0 - 07/30/2009
Accessing SimpleDB from SSRS - 07/22/2009
Easy GNOME Development with the Vala Programming Language - 07/16/2009
HTML Parsing with Ruby and Nokogiri - 07/12/2009
Amazon SimpleDB Batched PUTs Usage and Performance - 07/10/2009
PowerShell 2.0 Out-GridView, ISE and ScriptCmdlets - 07/05/2009
Asynchronous and remote execution with powershell 2 ctp3 - 06/30/2009
Understanding Source Code with NDepend and CQL - 06/22/2009
Object Oriented Databases with db4o - 06/07/2009
ADO.Net Data Services with jQuery - 05/29/2009
Exchange webdav automation - 05/26/2009
Podcasts - 05/26/2009
Linq to Object Performance - 05/11/2009
SQL 2008 and powershell - 01/25/2009
SQL 2008 filtered indexes - 06/11/2008
SQL 2008's table valued parameters - 05/11/2008
SQL 2008's MERGE statement - 04/22/2008
ironruby - 04/11/2008
SSRS scripting with RS.EXE - 11/20/2007
SQL 2008 FILESTREAM - 08/04/2007
CTE Concatenation - 01/01/2007