~5.5s performance checkin

This commit is contained in:
dtookey 2024-08-01 08:46:19 -04:00
parent d7e78e6371
commit 47a4c95f3e

67
1brc.go
View File

@ -19,7 +19,7 @@ const testFile = "dummy.txt"
const resultFile = "my_results.txt" const resultFile = "my_results.txt"
const benchFile = "results.txt" const benchFile = "results.txt"
const profile = true const profile = false
var nGoRoutine = 64 var nGoRoutine = 64
@ -32,6 +32,17 @@ type partitionRange struct {
type resultSet map[uint64]result type resultSet map[uint64]result
type fRS struct {
v []result
keys []uint64
size int
}
func newFRS() *fRS {
const size = 50000
return &fRS{v: make([]result, size), keys: make([]uint64, size)}
}
type result struct { type result struct {
name []byte name []byte
count float64 count float64
@ -69,7 +80,7 @@ func oneBRC() {
parts := createPartitions(bits, '\n', nGoRoutine) parts := createPartitions(bits, '\n', nGoRoutine)
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
results := make([]resultSet, len(parts)) results := make([]*fRS, len(parts))
for i, part := range parts { for i, part := range parts {
wg.Add(1) wg.Add(1)
@ -80,7 +91,7 @@ func oneBRC() {
finalRS := make(resultSet) finalRS := make(resultSet)
for _, rs := range results { for _, rs := range results {
finalRS.merge(rs) finalRS.merge(rs.toResultSet())
} }
os.WriteFile(resultFile, []byte(finalRS.String()), 0666) os.WriteFile(resultFile, []byte(finalRS.String()), 0666)
@ -100,13 +111,17 @@ func validate() {
meLines := bytes.Split(me, []byte{'\r', '\n'}) meLines := bytes.Split(me, []byte{'\r', '\n'})
refLines := bytes.Split(ref, []byte{'\r', '\n'}) refLines := bytes.Split(ref, []byte{'\r', '\n'})
errs := 0
for i, line := range meLines { for i, line := range meLines {
refLine := string(refLines[i]) refLine := string(refLines[i])
meLine := string(line) meLine := string(line)
if refLine != meLine { if refLine != meLine {
errs++
fmt.Printf("Validation tripped: reference[%s]\tme[%s]\n", refLine, meLine) fmt.Printf("Validation tripped: reference[%s]\tme[%s]\n", refLine, meLine)
} }
} }
fmt.Printf("Validation passed: %v\n", errs == 0)
} }
func startCpuProfile() *os.File { func startCpuProfile() *os.File {
@ -127,11 +142,11 @@ func stopProfiling(f *os.File) {
f.Close() f.Close()
} }
func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []resultSet, wg *sync.WaitGroup) { func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []*fRS, wg *sync.WaitGroup) {
defer wg.Done() defer wg.Done()
const delimiter = byte(';') const delimiter = byte(';')
rs := make(resultSet) rs := newFRS()
alloc := make([]result, 500) alloc := make([]result, 500)
aCnt := 0 aCnt := 0
@ -161,7 +176,7 @@ func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int
hasher.Write(line[:di]) hasher.Write(line[:di])
key := hasher.Sum64() key := hasher.Sum64()
hasher.Reset() hasher.Reset()
r, ok := rs[key] r, ok := rs.get(key)
if !ok { if !ok {
r = alloc[aCnt] r = alloc[aCnt]
r.name = line[:di] r.name = line[:di]
@ -181,7 +196,7 @@ func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int
r.min = temp r.min = temp
} }
rs[key] = r rs.put(key, r)
start = end start = end
if end == wPart.end { if end == wPart.end {
break break
@ -352,6 +367,7 @@ func (rs resultSet) String() string {
sort.Strings(keys) sort.Strings(keys)
b := &strings.Builder{} b := &strings.Builder{}
hasher := fnv.New64() hasher := fnv.New64()
for i, key := range keys { for i, key := range keys {
hasher.Write([]byte(key)) hasher.Write([]byte(key))
r := rs[hasher.Sum64()] r := rs[hasher.Sum64()]
@ -370,3 +386,40 @@ func (rs resultSet) String() string {
} }
return b.String() return b.String()
} }
func (f *fRS) get(key uint64) (result, bool) {
p := f.v[f.keyMap(key)]
return p, p.name != nil
}
func (f *fRS) put(key uint64, v result) {
idx := f.keyMap(key)
f.v[idx] = v
f.keys[idx] = key
f.size += 1
}
func (f *fRS) keyMap(key uint64) uint64 {
//a := key & math.MaxUint32
//b := (key >> 32) & math.MaxUint32
return (key) % uint64(len(f.v))
}
func (f *fRS) getKeys() []uint64 {
keys := make([]uint64, 0, f.size)
for _, k := range f.keys {
if k > 0 {
keys = append(keys, k)
}
}
return keys
}
func (f *fRS) toResultSet() resultSet {
rs := make(resultSet)
for _, key := range f.getKeys() {
v, _ := f.get(key)
rs[key] = v
}
return rs
}