POLYGLOT PROGRAMMING

Better poly than sorry!

I wanted to have a quick access to Polish-English dictionary, from command line. I wanted it to be extremely simple and fast, just a tiny script which would scrape dict.pl site.

I implemented it in a couple of languages while learning them or when trying to get the script to be faster.

In the end the Chicken Scheme implementation, when compiled via C, proved to be the fastest, with very little overhead on top of network IO.

But writing all these was fun and also they can serve as a material for languages comparison.

In Racket

#lang racket

(require net/http-client
         (prefix-in x: xml)
         html)

(define-values (status headers content)
  (http-sendrecv
   "dict.pl"
   (format "http://dict.pl/dict?LANG=EN&word=~a"
           (vector-ref (current-command-line-arguments) 0))))

(define content-xml (read-html content))

(define (children el)
  (match el
    [(html-full _ children) children]
    [_ '()]))

(define (nodes->str-list nodes)
  (for/list ([node nodes]
             #:when (x:pcdata? node)
             #:when (> (string-length (x:pcdata-string node)) 0))
    (x:pcdata-string node)))

(define (flat-map fun nodes)
  (let ([splice-map
         (λ (el acc)
           (let ([res (fun el)])
             (append acc ((if (list? res) identity list) res))))])
    (foldl splice-map '() nodes)))

(define (descendants node)
  (reverse (cons node (flat-map descendants (children node)))))

(define-syntax-rule (attr->list attr)
  (list (x:attribute-name attr)
        (x:attribute-value attr)))

(define (is-res-col? attrs)
  (for/or ([attr attrs])
    (match (attr->list attr)
      [(list 'class "resWordCol") #t]
      [_ #f])))

(define (is-td? el)
  (match el
    [(td attrs _) (is-res-col? attrs)]
    [_ #f]))

(define tds (map (compose nodes->str-list descendants)
                 (filter is-td? (descendants content-xml))))

(let loop ([tds tds])
  (when (not (empty? tds))
    (display (string-join (first tds) ""))
    (display "---")
    (displayln (string-join (second tds) ""))
    (loop (cddr tds))))

In Io

SGML
Socket

Sequence rpad := method(len, pad_str,
    pad_str ifNil(pad_str := " ");
    pad_str := pad_str repeated(len - self size);
    self with(pad_str)
)

Sequence pad := method(len, pad_str,
    pad_str ifNil(pad_str := " ");
    pad_str := pad_str repeated(len - self size);
    pad_str with(self)
)


Dict := Object clone

Dict url         := "http://dict.pl/dict"
Dict lang        := "PL"
Dict queryString := method(word,
    m := Map with("LANG", self lang,
                  "word", word);
    self url with(m asQueryString)
)

Dict printResults := method(word,
    page := URL with(self queryString(word)) fetch asXML;
    tds := page search(block(el,
        attrs := el attributes;
        attrs at("class") and attrs at("class") == "resWordCol" and (el name == "th") not
    ));

    for(i, 0, tds size - 1, 2,
        tds at(i) allText rpad(40) print; "--" print; tds at(i + 1) allText pad(40) println
    );
    nil
)

(System args size > 1) ifTrue(
    Dict printResults(System args at(1))
)

In (Chicken) Scheme

This is the implementation I actually use every day: it proved to be the fastest in terms of startup and displaying results. This why, when I added a bit of functionality to the tool, I did it only in this implementation.

There are also half-working implementations in Dylan and Nimrod, which probably could outperform Chicken Scheme, by the way. There was a bug in Dylan HTTP client library (fixed now) which caused me to stop working on this implementation and I never get back to it. It's different for Nimrod: I started writing another implementation of this tool, but then I realized I'd like to have a currency converter instead and went with it...

(use http-client)
(use html-parser)
(use uri-generic)
(use sxpath)

(define pronounciation-url
  "http://www.macmillandictionary.com/dictionary/american/")


(define *lang*
  (let ((args (command-line-arguments)))
    (if (= 2 (length args))
        (cadr args)
        "EN")))

(define (fetch-page word)
  (let
      ((url (format "http://dict.pl/dict?word=~a&LANG=~a" (uri-encode-string word) *lang*)))
    (with-input-from-request url #f read-string)))

(define (fetch-pron word)
  (let ((url (format "~a~a" pronounciation-url (uri-encode-string word))))
    (with-input-from-request url #f read-string)))

(define xml (html->sxml (fetch-page (car (command-line-arguments)))))
(define tds ((sxpath "//td[@class=\"resWordCol\"]") xml))

(define (get-text el)
  (string-join ((sxpath '(// *text*)) el) ""))

(define (get-pad-len str)
  (let* ((pad-len (- 40 (string-length str))))
    (if (< pad-len 1) 0 pad-len)))

(define (format-line col1 col2)
  (let* ((pad1 (make-string (get-pad-len col1) #\ ))
         (pad2 (make-string (get-pad-len col2) #\ )))
    (string-append col1 pad1 "--" pad2 col2 "\n")))

(define (play url)
  (let-values
      (((out in pid err) (process* "/usr/bin/mpg123" (list url))))
    (process-wait pid)))

(define mp3-path (sxpath "//img[@class=\"sound audio_play_button\"]/@data-src-mp3"))

(let loop ((tds tds)
           (en-words '()))
  (if (not (null? tds))
      (let ((pl (get-text (car tds)))
            (en (get-text (cadr tds))))
        (display (format-line pl en))
        (loop (cddr tds)
              (cons en en-words)))
      (begin
        (for-each (lambda (x)
                    (let ((p (mp3-path (html->sxml (fetch-pron x)))))
                      (when (not (null? p))
                        (play (cadar p)))))
                  (delete-duplicates
                   (let
                       ((gh (reverse en-words)))
                     (if (> (length gh) 4)
                         (take gh 4)
                         gh)))))))

In Python

#! /usr/bin/env python

def check(words):
    from BeautifulSoup import BeautifulSoup as BS
    from urllib import urlopen, urlencode
    url = "http://dict.pl/dict"

    for word in words:
        params = urlencode({"word" : word, "LANG" : "EN"})
        page = urlopen("%s?%s" % (url, params)).read()
        soup = BS(page)
        rows = soup.findAll("tr", "resRow")
        for row in rows:
            pl, en  = [x.text for x in row("td", "resWordCol")]
            print "%20s -- %20s" % (pl, en)
        # make http request
        # parse results
        # print them

def main():
    import argparse
    parser = argparse.ArgumentParser(description='Check english to polish translation of word(s).')
    parser.add_argument("word", action="store", nargs="+")
    args = parser.parse_args()
    check(args.word)


if __name__ == "__main__":
    main()