Chris Umbel

C from erlang via linked-in driver

Erlang Erlang truly is a useful language. It's fast, full-featured and elegant. Like all high-level languages, however, you sometimes need to access legacy or system code written in another language like, you guessed it, C.

I came accross this situation recently when porting my webcrawler to erlang. Under pressure http:request proved to be unreliable when used in a multi-threaded scenario. The solution I chose was to bridge erlang with libcurl by way of an erlang "linked-in driver". While I only used it for retrieving web page content into memory I figure I'll be able to wrap all of libcurl in the future.

C

I'll start out with some C code that will function as the bridge between erlang and libcurl. This is the code that qualifies as a "linked-in" driver. Like all C code there's plenty of plumbing. See my comments for explanation.

Note that I'm using curl's multi interface which doesn't block on I/O.

#include "erl_driver.h"
#include "ei.h"
#include <stdio.h>
#include <curl/curl.h>
#include <string.h>
#include <stdlib.h>

typedef struct 
{
  ErlDrvPort port;
} eurl_data;

/* erlang's firing us up */
static ErlDrvData eurl_drv_start(ErlDrvPort port, char *buff)
{
  eurl_data* d = (eurl_data*)driver_alloc(sizeof(eurl_data));
  d->port = port;
  return (ErlDrvData)d;
}

/* erlang's done with us */
static void eurl_drv_stop(ErlDrvData handle)
{
  driver_free((char*)handle);
}

/* basic data structure that contains the content of a web page */
struct document
{
  char *memory;
  size_t size;
};

static void *myrealloc(void *ptr, size_t size)
{
  if(ptr)
    return realloc(ptr, size);
  else
    return malloc(size);
   
}

/* write data into a structure for erlang */
static size_t write_chunk(void *ptr, size_t size, size_t nmemb, void *data)
{
  size_t realsize = size * nmemb;
  struct document *mem = (struct document *)data;
     
  mem->memory = myrealloc(mem->memory, mem->size + realsize + 1);
   
  if (mem->memory) 
    {
      memcpy(&(mem->memory[mem->size]), ptr, realsize);
      mem->size += realsize;
      mem->memory[mem->size] = 0;
    }
   
  return realsize;
}

/* here's the business end.  don't be intimidated.  most of this code is 
just to satisfy libcurl and isn't plumbing required to talk to erlang.  most of what you'll be interested in is the very beginning and the end. */
static void eurl_drv_output(ErlDrvData handle, char *buff, int bufflen)
{
  eurl_data* d = (eurl_data*)handle;     /* <--ERLANG STUFF */  
  CURLM *multi_handle;
  CURL *curl;
  int is_running;

  struct document doc;
  doc.memory = NULL;
  doc.size = 0;

  /* grab the url from erlang and put 
     it in a NULL terminated string */  
  char* url = malloc(bufflen + 1);  
  memset(url, '\0', bufflen + 1);  
  strncpy(url, buff, bufflen); 

  /* initialize curl for non-blocking multi interface */  
  curl_global_init(CURL_GLOBAL_ALL);
  curl = curl_easy_init();
  multi_handle = curl_multi_init();

  if(curl && multi_handle) {
    /* connect the multi interfact to easy interface */
    curl_multi_add_handle(multi_handle, curl);

    /* tell curl what we indend to do */  
    curl_easy_setopt(curl, CURLOPT_URL, url);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_chunk);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&doc);

    while(CURLM_CALL_MULTI_PERFORM == curl_multi_perform(multi_handle, &is_running));

    while(is_running) {
      struct timeval timeout;
      int rc;

      fd_set fdread;
      fd_set fdwrite;
      fd_set fdexcep;
      int maxfd = -1;

      long curl_timeo = -1;

      FD_ZERO(&fdread);
      FD_ZERO(&fdwrite);
      FD_ZERO(&fdexcep);

      /* assign a timeout for the operation */
      timeout.tv_sec = 10;
      timeout.tv_usec = 0;

      curl_multi_timeout(multi_handle, &curl_timeo);

      if(curl_timeo >= 0) {
        timeout.tv_sec = curl_timeo / 1000;
	if(timeout.tv_sec > 1)
          timeout.tv_sec = 1;
        else
          timeout.tv_usec = (curl_timeo % 1000) * 1000;
      }

      /* ask curl for the file descriptors */
      curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);

      /* ask OS for their status */
      rc = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);

      switch(rc) {
      case -1:
        break;
      case 0:
      default:
        while(CURLM_CALL_MULTI_PERFORM ==
              curl_multi_perform(multi_handle, &is_running));

        break;
      }
    }

    curl_easy_cleanup(curl);
    curl_multi_cleanup(curl);

    /* !!! ERLANG STUFF !!! */  
    /* send data back to erlang */  
    driver_output(d->port, doc.memory, doc.size);  
  } else  
    driver_output(d->port, "", 0);  

  printf("%d, %s", (int)doc.size, doc.memory);
      
  if(doc.memory)
    free(doc.memory);

  free(url);
}

/* mapping of the drivers functions */
static ErlDrvEntry eurl_driver_entry = 
  {
    NULL,
    eurl_drv_start,
    eurl_drv_stop, 
    eurl_drv_output,
    NULL,
    NULL,
    "eurl_drv",
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    NULL,
    ERL_DRV_EXTENDED_MARKER,
    ERL_DRV_EXTENDED_MAJOR_VERSION,
    ERL_DRV_EXTENDED_MAJOR_VERSION,
     ERL_DRV_FLAG_USE_PORT_LOCKING
  };

DRIVER_INIT(eurl_drv)
{   
  return &eurl_driver_entry;
}

Build

The next task is to build the driver as a shared library.

gcc -o eurl_drv.so -L/usr/lib/erlang/lib/erl_interface-3.5.7 -I/usr/lib/erlang/lib/erl_interface-3.5.7/include/ -fpic -rdynamic -shared -fno-common eurl.c -lerl_interface -lei -lcurl

Erlang wrapper

Depending on your point of view the hard part is now done. Now I'll slap a little thread safe erlang wrapper together.

-module(eurl).
-export([start/0, curl/1]).

%% initialize the driver
start() ->
%% load the C code
%% note the name of the library is 
%% eurl_drv in the current directory
  case erl_ddll:load_driver(".", eurl_drv) of
    ok -> ok;
    {error, already_loaded} -> ok;
    _ -> exit({error, could_not_load_driver})
  end,
  
  register(eurl_proc, self()).
  
%% perform a curl request and return string data
curl(Url) ->
%% start the conversation with the driver
  P = open_port({spawn, eurl_drv}, [binary]),
%% instruct the driver to execute the curl request
  port_command(P, [list_to_binary(Url)]),
%% read the response
  receive
    {_, {_, BinaryData}}  ->
      port_close(P)
  end,

%% convert to list and return
  binary_to_list(BinaryData).

Usage

Now let's test it from the erlang shell.

1> eurl:start().
2> Body = eurl:curl("http://erlang.org/").  

And as you'd have guessed the full HTML of erlang.org is now in the Body variable.

Wed Sep 16 2009 21:09:00 GMT+0000 (UTC)

Comments

Templating with NDjango

It never ceases to amaze me how many great open source tools and libraries have been ported to .Net. NUnit... NHibernate... NAnt... All incredibly widely adopted.

Today, however I'd like to focus one that a coworker brought to my attention which may actually get some use at the office, NDjango.

During my recent Django work I've become quite attached to Django's template language. It took me a while to warm up to it but I'm hooked. When I heard that the templating had been ported to .Net I was naturally quite interested.

Examples

As you'd expect, there really isn't all that much to it. Consider the following code

/* The data we'll pass into the template */
Dictionary<string, object> context = 
	new Dictionary<string, object>();

context.Add("name", "Chris Umbel");
context.Add("profession", "Database Administrator");

/* Create the templating engine */
NDjango.TemplateManagerProvider provider = 
	new NDjango.TemplateManagerProvider();
NDjango.Interfaces.ITemplateManager manager = 
	provider.GetNewManager();

/* Apply the template to the data */
TextReader reader = manager.RenderTemplate(@"C:\template.html", context);

string results = reader.ReadToEnd();
Console.WriteLine(results);

Obviously I need to provide template.txt as specified on line 15.

<table>
    <tr>
        <td>Name:</td>
        <td>{{ name }}</td>
    </tr>
    <tr>
        <td>Profession</td>
        <td>{{ profession }}</td>
    </tr>
</table>

After executing the program the following output is produced:

<table>
    <tr>
        <td>Name:</td>
        <td>Chris Umbel</td>
    </tr>
    <tr>
        <td>Profession</td>
        <td>Database Administrator</td>
    </tr>
</table>

The real power comes into play when you use Django's filters. With some minor modifications to the code:

context.Add("text", "Welcome to the official website and blog of Chris Umbel, database administrator and software developer. ");

and some minor additions to the table in the the template:

<tr>
	<td>Text</td>
	<td>{{ text|truncatewords:10 }}</td>
</tr>    

and I've employed the truncatewords filter resulting in the following output.

<table>
    <tr>
        <td>Name:</td>
        <td>Chris Umbel</td>
    </tr>
    <tr>
        <td>Profession</td>
        <td>Database Administrator</td>
    </tr>
    <tr>
        <td>Text</td>
        <td>Welcome to the official website and blog of Chris Umbel, ...</td>
    </tr>
</table>

Templates can also iterate IEnumerables:

List<DateTime> dateTimes = new List<DateTime>();
dateTimes.Add(new DateTime(2009, 1, 1));
dateTimes.Add(new DateTime(2009, 3, 1));

/* The data we'll pass into the template */
Dictionary<string, object> context = 
	new Dictionary<string, object>();

context.Add("datetimes", dateTimes);

/* Create the templating engine */
NDjango.TemplateManagerProvider provider = 
	new NDjango.TemplateManagerProvider();
NDjango.Interfaces.ITemplateManager manager = 
	provider.GetNewManager();

/* Apply the template to the data */
TextReader reader = 
	manager.RenderTemplate(@"C:\template.html", context);

string results = reader.ReadToEnd();
Console.WriteLine(results);

with a template like:

<table>
    {% for dt in datetimes %}
    <tr>
        <td>{{ dt.Month }}</td>       
    </tr>
    <tr>
        <td>{{ dt.Year }}</td>       
    </tr>    
    {% endfor %}
</table>

produces:

	
<table>

    <tr>
        <td>1</td>
    </tr>
    <tr>
        <td>2009</td>
    </tr>

    <tr>
        <td>3</td>
    </tr>
    <tr>
        <td>2009</td>
    </tr>

</table>

Conclusion

I've just covered a basic example here. The possibilities with NDjango are pretty much limitless, however.

For some inspiration it's worth investigating some of the integrations already done such as Bistro and ASP.Net MVC.

Sun Sep 06 2009 17:09:00 GMT+0000 (UTC)

Comments

A little bit o' Erlang

ErlangAlthough it's not the subject of this article I've been fooling around with CouchDB a bit lately. CouchDB is perhaps most well known for being written in Erlang, a functional language developed by Ericsson. All of the fiddling around really got me interested in actually hacking out some Erlang to evaluate any practical purpose it may have to me.

I've messed around with functional languages in the past, namely F#, Caml and quite a bit of Lisp, but this was all in my spare time with no real goal in mind. In short, I am not an experienced functional programmer. I am familiar enough that if I bend my brain I can more or less get into the functional mindset although it certainly doesn't feel natural to me. Due to this please don't consider the code I'm going to share the work of anyone knowledgeable. If anything I'm looking for feedback on how to actually do this stuff right.

Disclaimer out of the way I figured I'd post the two little programs I whipped out to learn Erlang.

Classic Mathematical Example

Considering functional languages lend themselves to mathematics by their nature and I've always enjoyed probability & statistics I figured I'd take a stab whipping out some code to compute combinations and permutations. The first step to accomplishing either of these are a basic function to compute factors.

fact(M, 0) ->
  1;

fact(M, A) ->
  M * fact(M - 1, A - 1).

This is actually one function named fact. The first parameter, "M", will serve as the number to factor and the second, "A", will serve as an accumulator which controls how many factors I'll compute (when M = A we get a factorial). Note how the first declaration ends with a semicolon. That indicates that there's more function to come. The period at the end of the second declaration indicates that I'm done defining my function. Essentially the Erlang VM will execute the first definition recursively until the accumulator reaches zero in which case it'll execute the first definition.

Now I'll define functions to actually compute combinations and permutations

combo(N, K) ->
  fact(N, K) / fact(K, K).

permu(N, K) ->
  fact(N, N) / fact(N - K, N - K).

which represent nCk and nPk respectively.

Now I'll actually package it up into a module named probability (in a file named probability.erl) so I can compile and use it:

-module(probability).
-export([combo/2]).
-export([permu/2]).

fact(M, 0) ->
  1;

fact(M, A) ->
  M * fact(M - 1, A - 1).

combo(N, K) ->
  fact(N, K) / fact(K, K).

permu(N, K) ->
  fact(N, N) / fact(N - K, N - K).

Now from the Erlang shell I can compile my code with

> c(probability).

and test it with

> probability:combo(8, 6).

resulting in

28.0

and

> probability:permu(10, 3).

producing

720.0

A more fun example

Ok, well that's all fun and games, but... Actually, no. It's not much fun at all. So I figured I'd whip up a function to retrieve the status of a user on twitter.

-module(twitterstatus).
-export([getstatus/1]).

getstatus(UserName) ->
%% initialize internet services
  application:start(inets),

  UserAgent = "Erlang Status Lookerupper",
%% format a URL
  Url = "http://twitter.com/users/" ++ UserName ++ ".xml",

%% perform the HTTP request to twitter
  {ok, {{HttpVer, Code, Msg}, Headers, Body}} =
    http:request(get, {Url, [{"User-Agent", UserAgent}]}, [], []),

%% parse the response XML
  {RootElement, _RemainingText = ""} =
    xmerl_scan:string(Body),

%% pull the status text out
  [{_,[A|B],_,[], StatusText, _}] =
    xmerl_xpath:string("/user/status/text/text()", RootElement),

%% return the status text
  StatusText.

Which can be called like

> twitterstatus:getstatus("chrisumbel").  

Conclusion

Will this be useful to me? I'm not sold either way yet. I found it to be concise but I think it'll take me a while to get used to reading it. I suppose I was productive enough, however, that I'll continue to explore it in more depth if time permits.

Sun Aug 23 2009 23:08:00 GMT+0000 (UTC)

Comments

Tale of a Website, from Rails to ASP.NET to Django

I hesitate to call it complete yet but for the most part www.chrisumbel.com has been ported from ASP.Net to Django. Sure, there really isn't a lot to this site so no port would have been incredibly painful but I'm quite pleased with the effort level (or lack thereof) required to get it done.

Aside from just this port it's been a long and interesting ride for just a bunch of blog posts and comments.

Ancient History: Rails in my basement

In reality the story began with Ruby on Rails several years back. This site still bares the look, feel, basic data structure and even a few blog posts from kilnaar.com which was based on a Rails and MySQL stack and spent most of its time running on various OpenBSD, Solaris and Linux machines in my basement.

While I was always pleased with kilnaar.com it got to the point that I became sick of hosting it myself. I was lucky to go all those years with no major hardware failures considering most of them were spent on a rickety a old Sun Ultra 10 MySQL box and a PIII Dell PowerEdge 1300 webserver. Ultimately I ended up slapping it in a Linux VM which ran on a MacBook whose screen was actually held together with duct tape.

Enter ASP.Net

In January 2009 I decided it was time to move on and build a bit more of an identity. To that end I bought the domain name chrisumbel.com. While I was in the mood for fresh starts I figured, what the hell, I'll start over with a new codebase and go with plain-old-web-forms-ASP.Net, SQL Server and Entity Framework data access all hosted cheaply on godaddy.

Why the Microsoft stack? I honestly can't answer that. I'm intimately familiar with all the technologies involved considering I was a .Net developer for years and currently hold a post a SQL Server DBA. That made the barrier to entry quite low to be sure. Still I was ending up with virtually the same application that already ran well under rails. I probably could have gotten away with just switching the logo. I guess it was just something to do.

It had been a few years since I've worked with ASP.Net and guess I forgot why I went to rails in the first place. Since I'm not getting paid to do this site, let alone by the hour or keystroke there was no real benefit from me choosing that stack. Even within the confines of the .Net I really should have investigated ASP.Net MVC as that would have likely allowed me to get away with a lot less thinking. I have far more important problems to solve with my time rather than gluing an architecture together.

I changed platforms for all the wrong reasons. I changed mainly for changes sake. There were no good financial or technical reasons.

The godaddy hosting also didn't end up working for me. Even for such a simple site I'm the kind of guy who needs (or at least wants) a bit of control over IIS. I was unable to use my own HttpHandlers and had no control over authentication schemes. For many people, if not most, that would be absolutely fine. I certainly won't badmouth it. It was, however, a thorn in my side. Because of this I ended up going to virtualized hosting which costed a bit more but I was happy to spend the money at that point. I was getting traffic and having enough fun doing it to spend the cash.

Present day: Django

Despite the fun I was having with my site a lot more effort was required to make changes because of my design. I'm not going to start blasting ASP.Net here. While I firmly believe it's a generally less productive platform than Rails or Django most of the problems resulted from my own mistakes in the design process. Perhaps a serious re-architecting or adopting MVC could have saved me but another option, Django, presented itself.

An asset I had from other projects I was working on was a dedicated and somewhat underutilized Linux EC2 instance. Since I had that instance anyway and it had plenty of surplus capacity I decided to abandon ASP.Net altogether. By moving to this instance my hosting essentially became free and I had total control.

To be honest my old rails code could have possibly been adapted to my recent changes but Django, a python based web framework, caught my eye. The way "applications" (which would probably be called something more like components in other frameworks) plug right in together seems to make adopting (and adapting) third party code disarmingly simple.

The template system, which initially turned me off a bit, also ultimately turned out to impress me. At first I wanted something more rails-like with the actual core language powering it. That's quite difficult for Django, however. Due to Python's significant white-space it's nearly impossible to fit it in as a template language without getting hairy.

I certainly warmed up to Django's template language as time went on, though. It's designed with a single purpose, to be a template language. Because it was designed with one purpose in mind all the goodies are there for typical HTML processing tasks. It definitely feels like it was built for the job. The template inheritance also let me get a lot bang for my HTML buck.

Combined with it's data access Django imposes a good design upon you much like rails. It makes it very easy to not make mistakes that lead to maintainability nightmares just by pushing you in the right direction.

Microsoft uses the phrase "fall into the pit of success". In my opinion Django's do-the-right-stuff-in-the-right-place and don't-repeat-yourself (much) architecture delivers on that philosophy a bit better than ASP.Net.

Conclusion

Through all of these iterations this site is still really just a blog engine. If I wanted it to stay that way I'd have just used an existing blog engine. I do, however, ultimately want far more out of this site and I don't want it to be difficult to get there. I believe Django is the key to achieving that goal but only time will tell.

Thu Aug 20 2009 18:08:00 GMT+0000 (UTC)

Comments
< 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 >
Follow Chris
RSS Feed
Twitter
Facebook
CodePlex
github
LinkedIn
Google