Brojanje reci u Bibliji, Swift vs Rust vs C++

  • Začetnik teme Začetnik teme bmaxa
  • Datum pokretanja Datum pokretanja

bmaxa

Legenda
Poruka
70.808
Elem ajde da vidimo kako ide update hash tabele i parsovanje Stringa procitanog iz fajla.
Obrada je prvo se izbace puntucioni karakteri, potom se pretvori u lowercase i na kraju
splituje na reci koje se potom broje. Prikaze se prvih 20 sa najvecom frekvencijom.
Prvo Swift:
Swift:
import Foundation
@main
struct Main{
  static func main(){
  do {
      let bible = try
      String(contentsOfFile: "bible.txt",encoding:String.Encoding.ascii)
      let sep = CharacterSet(charactersIn:" \t\r\n")
      let components = bible.filter{!is_punct($0)}
                            .lowercased()
                            .components(separatedBy:sep)
      var words:[String:Int] = [:]
      for s in components{
        if words[s] != nil {
          words[s]!+=1
        } else {
          words[s] = 1
        }
      }
      let sorted = words.sorted{$0.value>$1.value}
      var k = 1
      for (key,value) in sorted{
        if k>20 { break }
        print(String(format:"%10d%10d %10@",k,value,key))
        k+=1
      }
      } catch {
        print("error reading bible.txt")
      }
  }
  static func is_punct(_ c: Character)->Bool{
    c == "'" || c == "." || c == ";" || c == "(" || c == ")"
    || c == "\"" || c == "?" || c == "-" || c == "_" || c == "!"
    || c == "," || c == ":" || c == "|"
  }
}

Kod:
bmaxa@Branimirs-Air bible % swiftc -O bbl.swift -o bblswift -parse-as-library
bmaxa@Branimirs-Air bible % time ./bblswift
         1     63924 the
         2     51696 and
         3     34734 of
         4     13561 to
         5     12913 that
         6     12667 in
         7     10420 he
         8      9838 shall
         9      8997 unto
        10      8971 for
        11      8854 i
        12      8473 his
        13      8177 a
        14      7830 lord
        15      7376 they
        16      7013 be
        17      6989 is
        18      6659 him
        19      6596 not
        20      6430 them
./bblswift  1.25s user 0.02s system 99% cpu 1.273 total

Potom Rust (malo napredniji kod threadovanje):
Kod:
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::collections::*;
use std::sync::mpsc;
use std::thread;
static CONC:usize = 16;

fn main()->Result<(),std::io::Error> {
    let mut buf : String = String::new();
    let mut file = File::open("bible.txt")?;
    let n = file.read_to_string(&mut buf)?;
    let filtered =
        buf.chars().filter(|c| !is_punct(*c)).collect::<String>()
         .to_lowercase();
    let filtered:Vec<String> = filtered.split_whitespace().map(|x| x.to_string()).collect();
    let mut hm : HashMap<String,u32> = HashMap::new();
    let chunks = chunks(CONC,&filtered);
    let clen = chunks.len();

    let (tx, rx) = mpsc::channel();

    for i in chunks {
        let tx = tx.clone();
        thread::spawn(move || {
            let mut hm: HashMap<String,u32> = HashMap::new();
            for j in i {
                let v = hm.entry(j).or_insert(0);
                *v += 1;
            }
            tx.send(hm).unwrap();
        });
    }
    for _ in 0..clen {
        let lst = rx.recv().unwrap();
        for (k,v) in lst {
            let vv = hm.entry(k).or_insert(0);
            *vv += v;
        }
    }
    let mut bm:Vec<(u32,String)> = Vec::new();
    for (i,j) in hm {
        bm.push((j,i));
    }
    bm.sort();
    let mut k = 1;
    for (i,j) in bm.iter().rev().take(20) {
        println!("{:2} {:8} {}",k,i,j);
        k+=1;
    }
    Ok(())
}
fn chunks<T:Clone>(n: usize,mut buf : &[T])->Vec<Vec<T>> {
    let mut res = Vec::new();
    let clen = buf.len() / n;
    while buf.len() > clen {
        let chunk:Vec<T> = buf[..clen].to_vec();
        buf = &buf[clen..];
        res.push(chunk)
    }
    if buf.len() > 0 {
        let chunk:Vec<T> = buf.to_vec();
        res.push(chunk);
    }
    res
}

fn is_punct(c: char)->bool{
    c == '\'' || c == '.' || c == ';' || c == '(' || c == ')'
    || c == '"' || c == '?' || c == '-' || c == '_' || c == '!'
    || c == ',' || c == ':' || c == '|'
}

Izvrsavanje:
Kod:
bmaxa@Branimirs-Air bible % time ./bblrs
 1    63924 the
 2    51696 and
 3    34734 of
 4    13561 to
 5    12913 that
 6    12667 in
 7    10420 he
 8     9838 shall
 9     8997 unto
10     8971 for
11     8854 i
12     8473 his
13     8177 a
14     7830 lord
15     7376 they
16     7013 be
17     6989 is
18     6659 him
19     6596 not
20     6430 them
./bblrs  1.05s user 1.26s system 511% cpu 0.452 total

I na kraju C++:
C++:
#include <map>
#include <unordered_map>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <iomanip>
using namespace std;
using Pairs = unordered_map<string,int>;

void fill( Pairs& pairs, char c )
{
   static string word;

   if( ispunct(c) ) return;

   if( isspace(c) )
   {
     if( word.size() )
     {
       pairs[word]++;
       word.clear();
     }
   }
   else
   {
     word += tolower(c);
   }
}

int main()
{
   ifstream bible {"bible.txt"};

   using citerator = istreambuf_iterator<char>;

   Pairs pairs;

   for_each( citerator(bible.rdbuf()), citerator(),
             [&pairs]( char c ){ fill( pairs, c ); } );

   multimap<unsigned,string> sorted;

   // Sort the {word, count} pairs.
   //
   for_each( pairs.begin(), pairs.end(),
             [&sorted]( const Pairs::value_type& p )
             { sorted.insert(make_pair(p.second,p.first)); } );

   // Print the top 20.
   //
   auto item = sorted.rbegin();

   for( auto n = 0; n < 20; ++n, ++item )
   {
     cout << "Position  " << setw(2) << n+1
          << ": count = " << setw(6) << item->first
          << "  " << item->second << '\n';
   }

   return 0;
}

I C++ ubedljivo najbrzi :p
Kod:
bmaxa@Branimirs-Air bible % g++ -O3 bbl.cpp -o bblcpp -std=c++11
bmaxa@Branimirs-Air bible % time ./bblcpp
Position   1: count =  63924  the
Position   2: count =  51696  and
Position   3: count =  34734  of
Position   4: count =  13561  to
Position   5: count =  12913  that
Position   6: count =  12667  in
Position   7: count =  10420  he
Position   8: count =   9838  shall
Position   9: count =   8997  unto
Position  10: count =   8971  for
Position  11: count =   8854  i
Position  12: count =   8473  his
Position  13: count =   8177  a
Position  14: count =   7830  lord
Position  15: count =   7376  they
Position  16: count =   7013  be
Position  17: count =   6989  is
Position  18: count =   6659  him
Position  19: count =   6596  not
Position  20: count =   6430  them
./bblcpp  0.08s user 0.01s system 96% cpu 0.085 total

Dakle imperativni fazon dobija funkcionalni fazon :p

a da evo ga : https://www.icloud.com/iclouddrive/0jUEF_8tClqffiGiHtFgv6LOw#bible
tu je bible.txt, ne secam se odakle sam skinuo
 
A sad ultimativni funkcionalni jezik, cist funkcionalan ko suza:
Kod:
bmaxa@Branimirs-Air bible % ghc -O2 bbl1.hs -o bbl1hs
Loaded package environment from /Users/bmaxa/.ghc/aarch64-darwin-8.10.5/environments/default
[1 of 1] Compiling Main             ( bbl1.hs, bbl1.o )
Linking bbl1hs ...
bmaxa@Branimirs-Air bible % time ./bbl1hs
   63924 the
   51695 and
   34734 of
   13561 to
   12913 that
   12667 in
   10420 he
    9838 shall
    8997 unto
    8971 for
    8854 i
    8473 his
    8177 a
    7830 lord
    7376 they
    7013 be
    6989 is
    6659 him
    6596 not
    6429 them
./bbl1hs  0.82s user 0.02s system 99% cpu 0.838 total

Kod:
import qualified Data.Map.Strict as Map
import Data.List
import Text.Printf
import Data.Char

wordFreq :: [String] -> [(String, Int)]
wordFreq xs = Map.toList $
              foldl' updateMap Map.empty xs
    where updateMap freqmap word = Map.insertWith (+) word 1 freqmap

main = do
    contents <- readFile "bible.txt"
    let result = reverse.sort $ map (\(x,y) -> (y,x)) $
                 wordFreq.words $
                 map toLower $ filter (not.isPunctuation) contents
    mapM_ (\(x,y) -> printf "%8d %s\n" x y) $ take 20 result

Haskell mi je velika ljubav, posao se moze naci jedino u inostranstvu, no imao sam zbog njega ponudu
za posao u Google, koju sam morao da odbijem, jer je bio uslov selidba u insotranstvo :p
 

Back
Top