Starting with 128 threads

Total ellapsed time: 4.99s
Total computed records: 1000000000
Validation passed: true
This commit is contained in:
dtookey 2024-08-02 13:27:36 -04:00
parent 3093a09c7c
commit b0bf9100a6
3 changed files with 62 additions and 70798 deletions

75
1brc.go
View File

@ -2,12 +2,14 @@ package main
import ( import (
"bytes" "bytes"
"encoding/json"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
"log" "log"
"math" "math"
"os" "os"
"runtime/pprof" "runtime/pprof"
"slices"
"sort" "sort"
"strings" "strings"
"sync" "sync"
@ -21,7 +23,7 @@ const benchFile = "results.txt"
const profile = false const profile = false
var nGoRoutine = 64 var nGoRoutine = 128
const maxSeekLen = int64(100) const maxSeekLen = int64(100)
@ -32,15 +34,17 @@ type partitionRange struct {
type resultSet map[uint64]result type resultSet map[uint64]result
type fRS struct { type fastResultSet struct {
v []result v []result
keys []uint64 keys []uint64
size int size int
} }
func newFRS() *fRS { // this is actually awful. we have ~412 unique keys for our specific data set, and we require f**kin' 100x space because
// our fast hash collides like it was designed to, but this one works for
func newFRS() *fastResultSet {
const size = 50000 const size = 50000
return &fRS{v: make([]result, size), keys: make([]uint64, size)} return &fastResultSet{v: make([]result, size), keys: make([]uint64, size)}
} }
type result struct { type result struct {
@ -48,7 +52,8 @@ type result struct {
count float64 count float64
min float64 min float64
max float64 max float64
rAvg float64 sum float64
//rAvg float64
} }
var count = int64(0) var count = int64(0)
@ -80,7 +85,7 @@ func oneBRC() {
parts := createPartitions(bits, '\n', nGoRoutine) parts := createPartitions(bits, '\n', nGoRoutine)
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
results := make([]*fRS, len(parts)) results := make([]*fastResultSet, len(parts))
for i, part := range parts { for i, part := range parts {
wg.Add(1) wg.Add(1)
@ -142,7 +147,7 @@ func stopProfiling(f *os.File) {
f.Close() f.Close()
} }
func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []*fRS, wg *sync.WaitGroup) { func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []*fastResultSet, wg *sync.WaitGroup) {
defer wg.Done() defer wg.Done()
const delimiter = byte(';') const delimiter = byte(';')
@ -186,7 +191,8 @@ func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int
} }
r.count += 1.0 r.count += 1.0
r.rAvg = ((r.rAvg * (r.count - 1.0)) + temp) / r.count //r.rAvg = ((r.rAvg * (r.count - 1.0)) + temp) / r.count
r.sum = r.sum + temp
if temp > r.max { if temp > r.max {
r.max = temp r.max = temp
@ -246,14 +252,15 @@ func f2CharToInt(b1 byte, b2 byte) int {
func seekNextNewLine(b []byte, part partitionRange, last int64) int64 { func seekNextNewLine(b []byte, part partitionRange, last int64) int64 {
const step = 8 const step = 8
for i := last + step; i < part.end; { for i := last + step; i < part.end; {
switch b[i] { switch b[i] {
case '\n': case '\n':
return i + 1 return i + 1
case '\r': case '\r':
return i + 2 return i + 2
case ';': // this will be minimum [;0.0\r] case '.':
i += 5 return i + 4
default: default:
i += 2 i += 2
} }
@ -339,7 +346,8 @@ func (r *result) merge(other *result) {
r.min = other.min r.min = other.min
} }
r.rAvg = ((r.rAvg * r.count) + (other.rAvg * other.count)) / (r.count + other.count) //r.rAvg = ((r.rAvg * r.count) + (other.rAvg * other.count)) / (r.count + other.count)
r.sum += other.sum
r.count += other.count r.count += other.count
} }
@ -356,7 +364,38 @@ func (rs resultSet) merge(other resultSet) {
rs[k] = tr rs[k] = tr
} }
} }
func (rs resultSet) Report() string {
bldr := &strings.Builder{}
keyHist := make(map[int][]string)
for _, v := range rs {
name := string(v.name)
key := len(name)
container, ok := keyHist[key]
if !ok {
container = []string{}
}
if !slices.Contains(container, name) {
container = append(container, name)
keyHist[key] = container
}
}
keyList := []int{}
for k := range keyHist {
keyList = append(keyList, k)
}
slices.Sort(keyList)
for _, key := range keyList {
b, _ := json.Marshal(keyHist[key])
bldr.WriteString(fmt.Sprintf("[%d]{%s}\n", key, string(b)))
}
return bldr.String()
}
func (rs resultSet) String() string { func (rs resultSet) String() string {
keys := make([]string, 0, len(rs)) keys := make([]string, 0, len(rs))
@ -377,7 +416,7 @@ func (rs resultSet) String() string {
b.WriteString("=") b.WriteString("=")
b.WriteString(fmt.Sprintf("%.1f", r.min)) b.WriteString(fmt.Sprintf("%.1f", r.min))
b.WriteString("/") b.WriteString("/")
b.WriteString(fmt.Sprintf("%.1f", r.rAvg)) b.WriteString(fmt.Sprintf("%.1f", r.sum/r.count))
b.WriteString("/") b.WriteString("/")
b.WriteString(fmt.Sprintf("%.1f", r.max)) b.WriteString(fmt.Sprintf("%.1f", r.max))
if i < len(keys)-1 { if i < len(keys)-1 {
@ -387,25 +426,23 @@ func (rs resultSet) String() string {
return b.String() return b.String()
} }
func (f *fRS) get(key uint64) (result, bool) { func (f *fastResultSet) get(key uint64) (result, bool) {
p := f.v[f.keyMap(key)] p := f.v[f.keyMap(key)]
return p, p.name != nil return p, p.name != nil
} }
func (f *fRS) put(key uint64, v result) { func (f *fastResultSet) put(key uint64, v result) {
idx := f.keyMap(key) idx := f.keyMap(key)
f.v[idx] = v f.v[idx] = v
f.keys[idx] = key f.keys[idx] = key
f.size += 1 f.size += 1
} }
func (f *fRS) keyMap(key uint64) uint64 { func (f *fastResultSet) keyMap(key uint64) uint64 {
//a := key & math.MaxUint32
//b := (key >> 32) & math.MaxUint32
return (key) % uint64(len(f.v)) return (key) % uint64(len(f.v))
} }
func (f *fRS) getKeys() []uint64 { func (f *fastResultSet) getKeys() []uint64 {
keys := make([]uint64, 0, f.size) keys := make([]uint64, 0, f.size)
for _, k := range f.keys { for _, k := range f.keys {
if k > 0 { if k > 0 {
@ -415,7 +452,7 @@ func (f *fRS) getKeys() []uint64 {
return keys return keys
} }
func (f *fRS) toResultSet() resultSet { func (f *fastResultSet) toResultSet() resultSet {
rs := make(resultSet) rs := make(resultSet)
for _, key := range f.getKeys() { for _, key := range f.getKeys() {
v, _ := f.get(key) v, _ := f.get(key)

70773
dummy.txt

File diff suppressed because it is too large Load Diff

View File

@ -80,7 +80,7 @@ Chiang Mai=-25.3/25.8/75.2,
Chicago=-39.5/9.8/62.0, Chicago=-39.5/9.8/62.0,
Chihuahua=-34.9/18.6/66.6, Chihuahua=-34.9/18.6/66.6,
Chittagong=-22.9/25.9/76.9, Chittagong=-22.9/25.9/76.9,
Chi?in?u=-41.2/10.2/59.0, Chișinău=-41.2/10.2/59.0,
Chongqing=-29.5/18.6/67.4, Chongqing=-29.5/18.6/67.4,
Christchurch=-39.7/12.2/64.7, Christchurch=-39.7/12.2/64.7,
City of San Marino=-38.6/11.8/59.6, City of San Marino=-38.6/11.8/59.6,
@ -347,7 +347,7 @@ St. Louis=-34.4/13.9/63.4,
Stockholm=-41.4/6.6/56.0, Stockholm=-41.4/6.6/56.0,
Surabaya=-23.0/27.1/79.9, Surabaya=-23.0/27.1/79.9,
Suva=-23.5/25.6/72.5, Suva=-23.5/25.6/72.5,
Suwa?ki=-44.1/7.2/57.6, Suwałki=-44.1/7.2/57.6,
Sydney=-33.8/17.7/64.2, Sydney=-33.8/17.7/64.2,
Ségou=-24.0/28.0/78.5, Ségou=-24.0/28.0/78.5,
Tabora=-28.0/23.0/72.1, Tabora=-28.0/23.0/72.1,
@ -374,7 +374,7 @@ Toliara=-26.4/24.1/72.9,
Toluca=-38.7/12.4/62.7, Toluca=-38.7/12.4/62.7,
Toronto=-42.2/9.4/58.3, Toronto=-42.2/9.4/58.3,
Tripoli=-29.0/20.0/68.3, Tripoli=-29.0/20.0/68.3,
Troms?=-46.9/2.9/54.1, Tromsø=-46.9/2.9/54.1,
Tucson=-30.3/20.9/72.3, Tucson=-30.3/20.9/72.3,
Tunis=-40.3/18.4/67.3, Tunis=-40.3/18.4/67.3,
Ulaanbaatar=-49.1/-0.4/52.2, Ulaanbaatar=-49.1/-0.4/52.2,
@ -398,7 +398,7 @@ Whitehorse=-53.0/-0.1/49.9,
Wichita=-40.5/13.9/66.3, Wichita=-40.5/13.9/66.3,
Willemstad=-20.8/28.0/74.1, Willemstad=-20.8/28.0/74.1,
Winnipeg=-47.7/3.0/54.7, Winnipeg=-47.7/3.0/54.7,
Wroc?aw=-42.9/9.6/57.8, Wrocław=-42.9/9.6/57.8,
Xi'an=-38.3/14.1/65.8, Xi'an=-38.3/14.1/65.8,
Yakutsk=-56.4/-8.8/39.9, Yakutsk=-56.4/-8.8/39.9,
Yangon=-21.7/27.5/75.3, Yangon=-21.7/27.5/75.3,
@ -410,4 +410,4 @@ Zagreb=-38.7/10.7/62.7,
Zanzibar City=-22.7/26.0/81.5, Zanzibar City=-22.7/26.0/81.5,
Zürich=-43.7/9.3/62.4, Zürich=-43.7/9.3/62.4,
Ürümqi=-41.1/7.4/59.7, Ürümqi=-41.1/7.4/59.7,
?zmir=-33.2/17.9/66.4 İzmir=-33.2/17.9/66.4