bmaxa
Legenda
- Poruka
- 70.808
Elem ajde da vidimo kako ide update hash tabele i parsovanje Stringa procitanog iz fajla.
Obrada je prvo se izbace puntucioni karakteri, potom se pretvori u lowercase i na kraju
splituje na reci koje se potom broje. Prikaze se prvih 20 sa najvecom frekvencijom.
Prvo Swift:
Potom Rust (malo napredniji kod threadovanje):
Izvrsavanje:
I na kraju C++:
I C++ ubedljivo najbrzi
Dakle imperativni fazon dobija funkcionalni fazon
a da evo ga : https://www.icloud.com/iclouddrive/0jUEF_8tClqffiGiHtFgv6LOw#bible
tu je bible.txt, ne secam se odakle sam skinuo
Obrada je prvo se izbace puntucioni karakteri, potom se pretvori u lowercase i na kraju
splituje na reci koje se potom broje. Prikaze se prvih 20 sa najvecom frekvencijom.
Prvo Swift:
Swift:
import Foundation
@main
struct Main{
static func main(){
do {
let bible = try
String(contentsOfFile: "bible.txt",encoding:String.Encoding.ascii)
let sep = CharacterSet(charactersIn:" \t\r\n")
let components = bible.filter{!is_punct($0)}
.lowercased()
.components(separatedBy:sep)
var words:[String:Int] = [:]
for s in components{
if words[s] != nil {
words[s]!+=1
} else {
words[s] = 1
}
}
let sorted = words.sorted{$0.value>$1.value}
var k = 1
for (key,value) in sorted{
if k>20 { break }
print(String(format:"%10d%10d %10@",k,value,key))
k+=1
}
} catch {
print("error reading bible.txt")
}
}
static func is_punct(_ c: Character)->Bool{
c == "'" || c == "." || c == ";" || c == "(" || c == ")"
|| c == "\"" || c == "?" || c == "-" || c == "_" || c == "!"
|| c == "," || c == ":" || c == "|"
}
}
Kod:
bmaxa@Branimirs-Air bible % swiftc -O bbl.swift -o bblswift -parse-as-library
bmaxa@Branimirs-Air bible % time ./bblswift
1 63924 the
2 51696 and
3 34734 of
4 13561 to
5 12913 that
6 12667 in
7 10420 he
8 9838 shall
9 8997 unto
10 8971 for
11 8854 i
12 8473 his
13 8177 a
14 7830 lord
15 7376 they
16 7013 be
17 6989 is
18 6659 him
19 6596 not
20 6430 them
./bblswift 1.25s user 0.02s system 99% cpu 1.273 total
Potom Rust (malo napredniji kod threadovanje):
Kod:
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::collections::*;
use std::sync::mpsc;
use std::thread;
static CONC:usize = 16;
fn main()->Result<(),std::io::Error> {
let mut buf : String = String::new();
let mut file = File::open("bible.txt")?;
let n = file.read_to_string(&mut buf)?;
let filtered =
buf.chars().filter(|c| !is_punct(*c)).collect::<String>()
.to_lowercase();
let filtered:Vec<String> = filtered.split_whitespace().map(|x| x.to_string()).collect();
let mut hm : HashMap<String,u32> = HashMap::new();
let chunks = chunks(CONC,&filtered);
let clen = chunks.len();
let (tx, rx) = mpsc::channel();
for i in chunks {
let tx = tx.clone();
thread::spawn(move || {
let mut hm: HashMap<String,u32> = HashMap::new();
for j in i {
let v = hm.entry(j).or_insert(0);
*v += 1;
}
tx.send(hm).unwrap();
});
}
for _ in 0..clen {
let lst = rx.recv().unwrap();
for (k,v) in lst {
let vv = hm.entry(k).or_insert(0);
*vv += v;
}
}
let mut bm:Vec<(u32,String)> = Vec::new();
for (i,j) in hm {
bm.push((j,i));
}
bm.sort();
let mut k = 1;
for (i,j) in bm.iter().rev().take(20) {
println!("{:2} {:8} {}",k,i,j);
k+=1;
}
Ok(())
}
fn chunks<T:Clone>(n: usize,mut buf : &[T])->Vec<Vec<T>> {
let mut res = Vec::new();
let clen = buf.len() / n;
while buf.len() > clen {
let chunk:Vec<T> = buf[..clen].to_vec();
buf = &buf[clen..];
res.push(chunk)
}
if buf.len() > 0 {
let chunk:Vec<T> = buf.to_vec();
res.push(chunk);
}
res
}
fn is_punct(c: char)->bool{
c == '\'' || c == '.' || c == ';' || c == '(' || c == ')'
|| c == '"' || c == '?' || c == '-' || c == '_' || c == '!'
|| c == ',' || c == ':' || c == '|'
}
Izvrsavanje:
Kod:
bmaxa@Branimirs-Air bible % time ./bblrs
1 63924 the
2 51696 and
3 34734 of
4 13561 to
5 12913 that
6 12667 in
7 10420 he
8 9838 shall
9 8997 unto
10 8971 for
11 8854 i
12 8473 his
13 8177 a
14 7830 lord
15 7376 they
16 7013 be
17 6989 is
18 6659 him
19 6596 not
20 6430 them
./bblrs 1.05s user 1.26s system 511% cpu 0.452 total
I na kraju C++:
C++:
#include <map>
#include <unordered_map>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <iomanip>
using namespace std;
using Pairs = unordered_map<string,int>;
void fill( Pairs& pairs, char c )
{
static string word;
if( ispunct(c) ) return;
if( isspace(c) )
{
if( word.size() )
{
pairs[word]++;
word.clear();
}
}
else
{
word += tolower(c);
}
}
int main()
{
ifstream bible {"bible.txt"};
using citerator = istreambuf_iterator<char>;
Pairs pairs;
for_each( citerator(bible.rdbuf()), citerator(),
[&pairs]( char c ){ fill( pairs, c ); } );
multimap<unsigned,string> sorted;
// Sort the {word, count} pairs.
//
for_each( pairs.begin(), pairs.end(),
[&sorted]( const Pairs::value_type& p )
{ sorted.insert(make_pair(p.second,p.first)); } );
// Print the top 20.
//
auto item = sorted.rbegin();
for( auto n = 0; n < 20; ++n, ++item )
{
cout << "Position " << setw(2) << n+1
<< ": count = " << setw(6) << item->first
<< " " << item->second << '\n';
}
return 0;
}
I C++ ubedljivo najbrzi

Kod:
bmaxa@Branimirs-Air bible % g++ -O3 bbl.cpp -o bblcpp -std=c++11
bmaxa@Branimirs-Air bible % time ./bblcpp
Position 1: count = 63924 the
Position 2: count = 51696 and
Position 3: count = 34734 of
Position 4: count = 13561 to
Position 5: count = 12913 that
Position 6: count = 12667 in
Position 7: count = 10420 he
Position 8: count = 9838 shall
Position 9: count = 8997 unto
Position 10: count = 8971 for
Position 11: count = 8854 i
Position 12: count = 8473 his
Position 13: count = 8177 a
Position 14: count = 7830 lord
Position 15: count = 7376 they
Position 16: count = 7013 be
Position 17: count = 6989 is
Position 18: count = 6659 him
Position 19: count = 6596 not
Position 20: count = 6430 them
./bblcpp 0.08s user 0.01s system 96% cpu 0.085 total
Dakle imperativni fazon dobija funkcionalni fazon

a da evo ga : https://www.icloud.com/iclouddrive/0jUEF_8tClqffiGiHtFgv6LOw#bible
tu je bible.txt, ne secam se odakle sam skinuo