This is my "Hacker News Reader". It converts HN to csv. (Only selected fields of interest to me.) From there it can easily be imported into kdb+. I have more reusable generalized lex techniques for other websites but HN is so simple it can be done via a braindead one-off as below.

Requirements: lex, cc

Usage:

   fetch -4o yc.htm https://news.ycombinator.com
   yc < yc.htm 
To compile this I use something like

    flex -Crfa -8 -i yc.l;
    cc -Wall -pipe lex.yy.c -static -o yc;
Save the text below as yc.l then compile as above.

    #define jmp BEGIN
    #define p printf
    #define x yytext
   %s aa bb cc dd ee ff gg hh
   %s ii jj kk ll mm nn oo 
   aa "span class=\"rank\""
   bb "a href=\""
   cc score_........
   dd \>
    /* #include  */
    /* #include  */
   %%
   [^\12\40-\176]
   , p("%2c");
    /* rank (dont care) */
   {aa} jmp aa;
    /* [1-9][^<\.]* p("\n%s,",x);jmp bb; */
   [1-9][^<\.]* p("\n");jmp bb; 
    /* url */
   {bb} jmp cc;
   http[^"]* p("%s,",x);jmp dd; 
    /* title */
   
{dd} jmp ee; /* [^><]* p("%s,",x);jmp ff; */ [^><]* p("%s",x);jmp ff; /* host (omit) */ /* points (dont care) */ {cc} jmp gg; {dd} jmp hh; /* [1-9][^<> p]* p("%s,",x);jmp ii; */ [1-9][^<> p]* p(",");jmp ii; /* user */ {bb} jmp jj; http[^"]* p("%s,",x);jmp kk; /* time (dont care) */ {bb} jmp ll; /* http[^"]* ; */ http[^"]* jmp mm; /* unix time (dont care) */ /* [1-9][^<]* { time_t t0; time_t t1; time_t t2; t1=time(&t0); t2=parsedate(x,&t1,0); p("%d,",t2); jmp mm; } */ /* item */ {bb} jmp nn; /* http[^"]* p("%s,",x);jmp oo; */ http[^"]* p("%s",x);jmp oo; /* comments (dont care) */ {dd} jmp oo; /* [1-9d][^ <]* p("%s",x);jmp 0; */ [1-9d][^ <]* jmp 0; . \n %% main(){ yylex();} yywrap(){ p("\n");}