From 47a4c95f3e35cc6e85b5182e3557cd19102502eb Mon Sep 17 00:00:00 2001 From: dtookey Date: Thu, 1 Aug 2024 08:46:19 -0400 Subject: [PATCH] ~5.5s performance checkin --- 1brc.go | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/1brc.go b/1brc.go index 35542f8..b9307e1 100644 --- a/1brc.go +++ b/1brc.go @@ -19,7 +19,7 @@ const testFile = "dummy.txt" const resultFile = "my_results.txt" const benchFile = "results.txt" -const profile = true +const profile = false var nGoRoutine = 64 @@ -32,6 +32,17 @@ type partitionRange struct { type resultSet map[uint64]result +type fRS struct { + v []result + keys []uint64 + size int +} + +func newFRS() *fRS { + const size = 50000 + return &fRS{v: make([]result, size), keys: make([]uint64, size)} +} + type result struct { name []byte count float64 @@ -69,7 +80,7 @@ func oneBRC() { parts := createPartitions(bits, '\n', nGoRoutine) wg := &sync.WaitGroup{} - results := make([]resultSet, len(parts)) + results := make([]*fRS, len(parts)) for i, part := range parts { wg.Add(1) @@ -80,7 +91,7 @@ func oneBRC() { finalRS := make(resultSet) for _, rs := range results { - finalRS.merge(rs) + finalRS.merge(rs.toResultSet()) } os.WriteFile(resultFile, []byte(finalRS.String()), 0666) @@ -100,13 +111,17 @@ func validate() { meLines := bytes.Split(me, []byte{'\r', '\n'}) refLines := bytes.Split(ref, []byte{'\r', '\n'}) + errs := 0 for i, line := range meLines { refLine := string(refLines[i]) meLine := string(line) if refLine != meLine { + errs++ fmt.Printf("Validation tripped: reference[%s]\tme[%s]\n", refLine, meLine) } } + + fmt.Printf("Validation passed: %v\n", errs == 0) } func startCpuProfile() *os.File { @@ -127,11 +142,11 @@ func stopProfiling(f *os.File) { f.Close() } -func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []resultSet, wg *sync.WaitGroup) { +func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int, container []*fRS, wg *sync.WaitGroup) { defer wg.Done() const delimiter = byte(';') - rs := make(resultSet) + rs := newFRS() alloc := make([]result, 500) aCnt := 0 @@ -161,7 +176,7 @@ func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int hasher.Write(line[:di]) key := hasher.Sum64() hasher.Reset() - r, ok := rs[key] + r, ok := rs.get(key) if !ok { r = alloc[aCnt] r.name = line[:di] @@ -181,7 +196,7 @@ func workerComputePartition(aData []byte, wPart partitionRange, workerNumber int r.min = temp } - rs[key] = r + rs.put(key, r) start = end if end == wPart.end { break @@ -352,6 +367,7 @@ func (rs resultSet) String() string { sort.Strings(keys) b := &strings.Builder{} hasher := fnv.New64() + for i, key := range keys { hasher.Write([]byte(key)) r := rs[hasher.Sum64()] @@ -370,3 +386,40 @@ func (rs resultSet) String() string { } return b.String() } + +func (f *fRS) get(key uint64) (result, bool) { + p := f.v[f.keyMap(key)] + return p, p.name != nil +} + +func (f *fRS) put(key uint64, v result) { + idx := f.keyMap(key) + f.v[idx] = v + f.keys[idx] = key + f.size += 1 +} + +func (f *fRS) keyMap(key uint64) uint64 { + //a := key & math.MaxUint32 + //b := (key >> 32) & math.MaxUint32 + return (key) % uint64(len(f.v)) +} + +func (f *fRS) getKeys() []uint64 { + keys := make([]uint64, 0, f.size) + for _, k := range f.keys { + if k > 0 { + keys = append(keys, k) + } + } + return keys +} + +func (f *fRS) toResultSet() resultSet { + rs := make(resultSet) + for _, key := range f.getKeys() { + v, _ := f.get(key) + rs[key] = v + } + return rs +}