Jim Hague, the 1986’s IOCCC winner

Pierre Forcioli
5 min readOct 19, 2020

Let me explain you how Jim Hague has programmed, in C, a program that can translates ASCII code which is inserted in standard input into Morse code. You will tell me that it's not awesome, in fact, it isn't, you're right. But the way he did is so much awesome that I need to decrypt it with you. Let's begin!

According to Wikipedia, the International Obfuscated C Code Contest (abbreviated IOCCC) is a computer programming contest for the most creatively obfuscated C code. That means that you have to write something that don't looks like code, but that in reality, it is, like the main image on the top of this page.

The original code of the program was that :

#define DIT (
#define DAH )
#define __DAH ++
#define DITDAH *
#define DAHDIT for
#define DIT_DAH malloc
#define DAH_DIT gets
#define _DAHDIT char
_DAHDIT _DAH_[]="ETIANMSURWDKGOHVFaLaPJBXCYZQb54a3d2f16g7c8a90l?e'b.s;i,d:"
;main DIT DAH{_DAHDIT
DITDAH _DIT,DITDAH DAH_,DITDAH DIT_,
DITDAH _DIT_,DITDAH DIT_DAH DIT
DAH,DITDAH DAH_DIT DIT DAH;DAHDIT
DIT _DIT=DIT_DAH DIT 81 DAH,DIT_=_DIT
__DAH;_DIT==DAH_DIT DIT _DIT DAH;__DIT
DIT'\n'DAH DAH DAHDIT DIT DAH_=_DIT;DITDAH
DAH_;__DIT DIT DITDAH
_DIT_?_DAH DIT DITDAH DIT_ DAH:'?'DAH,__DIT
DIT' 'DAH,DAH_ __DAH DAH DAHDIT DIT
DITDAH DIT_=2,_DIT_=_DAH_; DITDAH _DIT_&&DIT
DITDAH _DIT_!=DIT DITDAH DAH_>='a'? DITDAH
DAH_&223:DITDAH DAH_ DAH DAH; DIT
DITDAH DIT_ DAH __DAH,_DIT_ __DAH DAH
DITDAH DIT_+= DIT DITDAH _DIT_>='a'? DITDAH _DIT_-'a':0
DAH;}_DAH DIT DIT_ DAH{ __DIT DIT
DIT_>3?_DAH DIT DIT_>>1 DAH:'\0'DAH;return
DIT_&1?'-':'.';}__DIT DIT DIT_ DAH _DAHDIT
DIT_;{DIT void DAH write DIT 1,&DIT_,1 DAH;}

As you can see, there is a lot of defines that might be very simplified.
To do that, we're going to use one of the command we already see before :

gcc -E hague.c

It will only execute the preprocessing action of the gcc, which is exactly what we want because in this step, the preprocessor is replacing every top define in our code. So here it is :

char _DAH_[]="ETIANMSURWDKGOHVFaLaPJBXCYZQb54a3d2f16g7c8a90l?e'b.s;i,d:"
;main ( ){char
* _DIT,* DAH_,* DIT_,
* _DIT_,* malloc (
),* gets ( );for
( _DIT=malloc ( 81 ),DIT_=_DIT
++;_DIT==gets ( _DIT );__DIT
('\n') ) for ( DAH_=_DIT;*
DAH_;__DIT ( *
_DIT_?_DAH ( * DIT_ ):'?'),__DIT
(' '),DAH_ ++ ) for (
* DIT_=2,_DIT_=_DAH_; * _DIT_&&(
* _DIT_!=( * DAH_>='a'? *
DAH_&223:* DAH_ ) ); (
* DIT_ ) ++,_DIT_ ++ )
* DIT_+= ( * _DIT_>='a'? * _DIT_-'a':0
);}_DAH ( DIT_ ){ __DIT (
DIT_>3?_DAH ( DIT_>>1 ):'\0');return
DIT_&1?'-':'.';}__DIT ( DIT_ ) char
DIT_;{( void ) write ( 1,&DIT_,1 );}

This looks more like C code that we know, but let's make it more readable y indenting correctly and putting things at they right place :

char _DAH_[]="ETIANMSURWDKGOHVFaLaPJBXCYZQb54a3d2f16g7c8a90l?e'b.s;i,d:";
main()
{
char *_DIT, *DAH_, *DIT_, *_DIT_, *malloc(), *gets();
for(_DIT = malloc(81), DIT_ = _DIT++ ; _DIT == gets(_DIT) ;
__DIT('\n'))
for (DAH_=_DIT; *DAH_ ;
__DIT(*_DIT_ ? _DAH(*DIT_) : '?'), __DIT(' '),DAH_++)
for (*DIT_ = 2, _DIT_ = _DAH_ ; *_DIT_ && (*_DIT_ != (*DAH_ >= 'a' ? * DAH_ & 223 : *DAH_)) ; (*DIT_)++ , _DIT_++)
*DIT_ += (*_DIT_ >= 'a' ? *_DIT_ - 'a' : 0);
}
_DAH(DIT_)
{
__DIT(DIT_ > 3 ? _DAH(DIT_ >> 1) : '\0');
return DIT_ & 1 ? '-' : '.';
}
__DIT(DIT_) char DIT_;
{
(void)write(1, &DIT_, 1);
}

Again, it looks a little bit better but we're not usually coding like this isn't it? As we can see, there is a string on the first line called _DAH_. It seems that it's an encryption string, maybe something to compare with something else? But at the bottom, we can also see a function that we used to do, it looks like _putchar no? Let's rename __DIT function into __putchar, and do some reorganisation, like putting the two functions that they are after our main, before, to ommit the prototype.

void _putchar(char DIT_)
{
write(1, &DIT_, 1);
}
_DAH(DIT_)
{
_putchar(DIT_ > 3 ? _DAH(DIT_ >> 1) : '\0');
return DIT_ & 1 ? '-' : '.';
}
char _DAH_[]="ETIANMSURWDKGOHVFaLaPJBXCYZQb54a3d2f16g7c8a90l?e'b.s;i,d:";
main()
{
char *_DIT, *DAH_, *DIT_, *_DIT_, *malloc(), *gets();
for(_DIT = malloc(81), DIT_ = _DIT++ ; _DIT == gets(_DIT) ;
_putchar('\n'))
for (DAH_=_DIT ; *DAH_ ;
_putchar(*_DIT_ ? _DAH(*DIT_) : '?'), _putchar(' '),DAH_++)
for (*DIT_ = 2, _DIT_ = _DAH_ ; *_DIT_ && (*_DIT_ != (*DAH_ >= 'a' ? * DAH_ & 223 : *DAH_)) ; (*DIT_)++ , _DIT_++)
*DIT_ += (*_DIT_ >= 'a' ? *_DIT_ - 'a' : 0);
}

Ewww, it's a little bit more readable! But now, let's see what this program is doing. In our main, we have 3 for loops, il will add brackets to make them more visible and understand what they do.

for(_DIT = malloc(81), DIT_ = _DIT++ ; _DIT == gets(_DIT) ;
_putchar('\n'))
{
for (DAH_=_DIT ; *DAH_ ;
_putchar(*_DIT_ ? _DAH(*DIT_) : '?'), _putchar(' '),DAH_++)
{
for (*DIT_ = 2, _DIT_ = _DAH_ ; *_DIT_ && (*_DIT_ != (*DAH_ >= 'a' ? * DAH_ & 223 : *DAH_)) ; (*DIT_)++ , _DIT_++)
{
*DIT_ += (*_DIT_ >= 'a' ? *_DIT_ - 'a' : 0);
}
}
}

So the first loop seems to allocate memory, and waiting for an input, then, it prints a \n when finishing.
The second one gives a variable the value of the input the user gave, and if this input exists, it seems to print something. If we go deeper, it tests for if _DIT_ exists, prints something that is processed into the _DAH function (that we'll decrypt later) and if not, prints "?". Also, it prints a space and increments DAH__ after that.
The third loop will check if wether the character is lowercase, it if is, it will make it uppercase (where DAH_ & 223 means DAH_ - 32), and give the value to the _DAH function.

The names of the variables and the functions seems a little crazy no? Now we know what they do in a large scale, let's rename them.

void _putchar(char c)
{
write(1, &c, 1);
}
tr(c)
{
_putchar(c > 3 ? tr(c >> 1) : '\0');
return c & 1 ? '-' : '.';
}
char morse[]="ETIANMSURWDKGOHVFaLaPJBXCYZQb54a3d2f16g7c8a90l?e'b.s;i,d:";
main()
{
char *str, *copy, *c, *morse_copy, *malloc(), *gets();
for(str = malloc(81), c = str++ ; str == gets(str) ;
_putchar('\n'))
{
for (copy=str ; *copy ;
_putchar(*morse_copy ? tr(*c) : '?'), _putchar(' '),copy++)
{
for (*c = 2, morse_copy = morse ; *morse_copy && (*morse_copy != (*copy >= 'a' ? * copy & 223 : *copy)) ; (*c)++ , morse_copy++)
{
*c += (*morse_copy >= 'a' ? *morse_copy - 'a' : 0);
}
}
}
}

I renamed the second function tr (because it's a function that gets the character, and translates it recursively using bitwise operation c >> 1), so this function outputs wether '-' or '.' which are the two symbols of communication in morse. The symbol is selected by the bitwise operation, for example, if you have an O letter, the function will return '-' then, '-' and then '-' because the O letter is coded by 111, and as you can see, 1 corresponds to a '-' and 0 to '.'

I have only one thing left to say :

... --- ... — i really need help sleeping after understanding this hard exercise! Thanks for reading me and if you have any question, don't hesitate to ask in the comments!

[pierre@ubuntu 0x0D-preprocessor] $ ./a.out
Hello, Holberton
.... . .-.. .-.. --- --..-- ? .... --- .-.. -... . .-. - --- -.

Bye 🤘

--

--