Starting with 128 threads

Total ellapsed time: 4.99s
Total computed records: 1000000000
Validation passed: true
This commit is contained in:
dtookey 2024-08-02 13:27:36 -04:00
parent 3093a09c7c
commit b0bf9100a6
3 changed files with 62 additions and 70798 deletions

75
1brc.go
View File

@ -2,12 +2,14 @@ package main
import (
"bytes"
"encoding/json"
"fmt"
"hash/fnv"
"log"
"math"
"os"
"runtime/pprof"
"slices"
"sort"
"strings"
"sync"
@ -21,7 +23,7 @@ const benchFile = "results.txt"
const profile = false
var nGoRoutine = 64
var nGoRoutine = 128
const maxSeekLen = int64(100)
@ -32,15 +34,17 @@ type partitionRange struct {
type resultSet map[uint64]result
type fRS struct {
type fastResultSet struct {
v []result
keys []uint64
size int
}
func newFRS() *fRS {
// this is actually awful. we have ~412 unique keys for our specific data set, and we require f**kin' 100x space because
// our fast hash collides like it was designed to, but this one works for
func newFRS() *fastResultSet {
const size = 50000
return &fRS{v: make([]result, size), keys: make([]uint64, size)}
return &fastResultSet{v: make([]result, size), keys: make([]uint64, size)}
}
type result struct {
@ -48,7 +52,8 @@ type result struct {
count float64
min float64
max float64
rAvg float64
sum float64
//rAvg float64
}
var count = int64(0)
@ -80,7 +85,7 @@ func oneBRC() {
parts := createPartitions(bits, '\n', nGoRoutine)
wg := &sync.WaitGroup{}
results := make([]*fRS, len(parts))
results := make([]*fastResultSet, len(parts))
for i, part := range parts {
wg.Add(1)
@ -142,7 +147,7 @@ func stopProfiling(f *os.File) {
f.Close()
}
func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []*fRS, wg *sync.WaitGroup) {
func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []*fastResultSet, wg *sync.WaitGroup) {
defer wg.Done()
const delimiter = byte(';')
@ -186,7 +191,8 @@ func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int
}
r.count += 1.0
r.rAvg = ((r.rAvg * (r.count - 1.0)) + temp) / r.count
//r.rAvg = ((r.rAvg * (r.count - 1.0)) + temp) / r.count
r.sum = r.sum + temp
if temp > r.max {
r.max = temp
@ -246,14 +252,15 @@ func f2CharToInt(b1 byte, b2 byte) int {
func seekNextNewLine(b []byte, part partitionRange, last int64) int64 {
const step = 8
for i := last + step; i < part.end; {
switch b[i] {
case '\n':
return i + 1
case '\r':
return i + 2
case ';': // this will be minimum [;0.0\r]
i += 5
case '.':
return i + 4
default:
i += 2
}
@ -339,7 +346,8 @@ func (r *result) merge(other *result) {
r.min = other.min
}
r.rAvg = ((r.rAvg * r.count) + (other.rAvg * other.count)) / (r.count + other.count)
//r.rAvg = ((r.rAvg * r.count) + (other.rAvg * other.count)) / (r.count + other.count)
r.sum += other.sum
r.count += other.count
}
@ -356,7 +364,38 @@ func (rs resultSet) merge(other resultSet) {
rs[k] = tr
}
}
func (rs resultSet) Report() string {
bldr := &strings.Builder{}
keyHist := make(map[int][]string)
for _, v := range rs {
name := string(v.name)
key := len(name)
container, ok := keyHist[key]
if !ok {
container = []string{}
}
if !slices.Contains(container, name) {
container = append(container, name)
keyHist[key] = container
}
}
keyList := []int{}
for k := range keyHist {
keyList = append(keyList, k)
}
slices.Sort(keyList)
for _, key := range keyList {
b, _ := json.Marshal(keyHist[key])
bldr.WriteString(fmt.Sprintf("[%d]{%s}\n", key, string(b)))
}
return bldr.String()
}
func (rs resultSet) String() string {
keys := make([]string, 0, len(rs))
@ -377,7 +416,7 @@ func (rs resultSet) String() string {
b.WriteString("=")
b.WriteString(fmt.Sprintf("%.1f", r.min))
b.WriteString("/")
b.WriteString(fmt.Sprintf("%.1f", r.rAvg))
b.WriteString(fmt.Sprintf("%.1f", r.sum/r.count))
b.WriteString("/")
b.WriteString(fmt.Sprintf("%.1f", r.max))
if i < len(keys)-1 {
@ -387,25 +426,23 @@ func (rs resultSet) String() string {
return b.String()
}
func (f *fRS) get(key uint64) (result, bool) {
func (f *fastResultSet) get(key uint64) (result, bool) {
p := f.v[f.keyMap(key)]
return p, p.name != nil
}
func (f *fRS) put(key uint64, v result) {
func (f *fastResultSet) put(key uint64, v result) {
idx := f.keyMap(key)
f.v[idx] = v
f.keys[idx] = key
f.size += 1
}
func (f *fRS) keyMap(key uint64) uint64 {
//a := key & math.MaxUint32
//b := (key >> 32) & math.MaxUint32
func (f *fastResultSet) keyMap(key uint64) uint64 {
return (key) % uint64(len(f.v))
}
func (f *fRS) getKeys() []uint64 {
func (f *fastResultSet) getKeys() []uint64 {
keys := make([]uint64, 0, f.size)
for _, k := range f.keys {
if k > 0 {
@ -415,7 +452,7 @@ func (f *fRS) getKeys() []uint64 {
return keys
}
func (f *fRS) toResultSet() resultSet {
func (f *fastResultSet) toResultSet() resultSet {
rs := make(resultSet)
for _, key := range f.getKeys() {
v, _ := f.get(key)

70773
dummy.txt

File diff suppressed because it is too large Load Diff

View File

@ -80,7 +80,7 @@ Chiang Mai=-25.3/25.8/75.2,
Chicago=-39.5/9.8/62.0,
Chihuahua=-34.9/18.6/66.6,
Chittagong=-22.9/25.9/76.9,
Chi?in?u=-41.2/10.2/59.0,
Chișinău=-41.2/10.2/59.0,
Chongqing=-29.5/18.6/67.4,
Christchurch=-39.7/12.2/64.7,
City of San Marino=-38.6/11.8/59.6,
@ -347,7 +347,7 @@ St. Louis=-34.4/13.9/63.4,
Stockholm=-41.4/6.6/56.0,
Surabaya=-23.0/27.1/79.9,
Suva=-23.5/25.6/72.5,
Suwa?ki=-44.1/7.2/57.6,
Suwałki=-44.1/7.2/57.6,
Sydney=-33.8/17.7/64.2,
Ségou=-24.0/28.0/78.5,
Tabora=-28.0/23.0/72.1,
@ -374,7 +374,7 @@ Toliara=-26.4/24.1/72.9,
Toluca=-38.7/12.4/62.7,
Toronto=-42.2/9.4/58.3,
Tripoli=-29.0/20.0/68.3,
Troms?=-46.9/2.9/54.1,
Tromsø=-46.9/2.9/54.1,
Tucson=-30.3/20.9/72.3,
Tunis=-40.3/18.4/67.3,
Ulaanbaatar=-49.1/-0.4/52.2,
@ -398,7 +398,7 @@ Whitehorse=-53.0/-0.1/49.9,
Wichita=-40.5/13.9/66.3,
Willemstad=-20.8/28.0/74.1,
Winnipeg=-47.7/3.0/54.7,
Wroc?aw=-42.9/9.6/57.8,
Wrocław=-42.9/9.6/57.8,
Xi'an=-38.3/14.1/65.8,
Yakutsk=-56.4/-8.8/39.9,
Yangon=-21.7/27.5/75.3,
@ -410,4 +410,4 @@ Zagreb=-38.7/10.7/62.7,
Zanzibar City=-22.7/26.0/81.5,
Zürich=-43.7/9.3/62.4,
Ürümqi=-41.1/7.4/59.7,
?zmir=-33.2/17.9/66.4
İzmir=-33.2/17.9/66.4