From ef6013faff0d10cfb2d4b9afbee8f0a1fe73fb94 Mon Sep 17 00:00:00 2001
From: ledgerwatch <akhounov@gmail.com>
Date: Sat, 12 Feb 2022 22:16:06 +0000
Subject: [PATCH] Automation for gathering erigon2 history data (#3501)

Co-authored-by: Alexey Sharp <alexeysharp@Alexeys-iMac.local>
---
 cmd/hack/hack.go | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/cmd/hack/hack.go b/cmd/hack/hack.go
index 010a2419ce..ba3a234e2d 100644
--- a/cmd/hack/hack.go
+++ b/cmd/hack/hack.go
@@ -15,6 +15,7 @@ import (
 	_ "net/http/pprof" //nolint:gosec
 	"os"
 	"os/signal"
+	"path/filepath"
 	"regexp"
 	"runtime"
 	"runtime/pprof"
@@ -2455,6 +2456,63 @@ func histStats() error {
 	return nil
 }
 
+func histStat1(chaindata string) error {
+	files, err := os.ReadDir(chaindata)
+	if err != nil {
+		return err
+	}
+	endBlockMap := map[uint64]struct{}{}
+	sizeMap := map[string]map[uint64]uint64{}
+	keys := []string{"ahistory", "shistory", "chistory", "abitmap", "sbitmap", "cbitmap"}
+	for _, k := range keys {
+		sizeMap[k] = map[uint64]uint64{}
+	}
+	re := regexp.MustCompile(fmt.Sprintf("(%s).([0-9]+)-([0-9]+).(dat|idx)", strings.Join(keys, "|")))
+	for _, f := range files {
+		name := f.Name()
+		subs := re.FindStringSubmatch(name)
+		if len(subs) != 5 {
+			if len(subs) != 0 {
+				log.Warn("File ignored by changes scan, more than 5 submatches", "name", name, "submatches", len(subs))
+			}
+			continue
+		}
+		var startBlock uint64
+		if startBlock, err = strconv.ParseUint(subs[2], 10, 64); err != nil {
+			return err
+		}
+		var endBlock uint64
+		if endBlock, err = strconv.ParseUint(subs[3], 10, 64); err != nil {
+			return err
+		}
+		if endBlock-startBlock < 499_999 {
+			continue
+		}
+		endBlockMap[endBlock] = struct{}{}
+		if fileInfo, err1 := os.Stat(filepath.Join(chaindata, name)); err1 == nil {
+			sizeMap[subs[1]][endBlock] += uint64(fileInfo.Size())
+		} else {
+			return err1
+		}
+	}
+	var endBlocks []uint64
+	for endBlock := range endBlockMap {
+		endBlocks = append(endBlocks, endBlock)
+	}
+	sort.Slice(endBlocks, func(i, j int) bool {
+		return endBlocks[i] < endBlocks[j]
+	})
+	fmt.Printf("endBlock,%s\n", strings.Join(keys, ","))
+	for _, endBlock := range endBlocks {
+		fmt.Printf("%d", endBlock)
+		for _, k := range keys {
+			fmt.Printf(",%.3f", float64(sizeMap[k][endBlock])/1024.0/1024.0/1024.0)
+		}
+		fmt.Printf("\n")
+	}
+	return nil
+}
+
 func main() {
 	debug.RaiseFdLimit()
 	flag.Parse()
@@ -2630,6 +2688,8 @@ func main() {
 		err = junkdb()
 	case "histStats":
 		err = histStats()
+	case "histStat1":
+		err = histStat1(*chaindata)
 	}
 
 	if err != nil {
-- 
GitLab