Skip to content

Commit

Permalink
feat/store-ranges-smaller-tree-and-deduplication (#3)
Browse files Browse the repository at this point in the history
- The IP tree is now binary
- Reduced IP tree size by ~3x
- Tree now properly deduplicates networks, by only keeping the largest ones (duplicates, or subnetworks will be dropped).
- Ranges are now stored in `./ranges`, one `.txt` file per provider. They're extracted after building the tree, meaning that they benefit from the deduplication.
- go stringer was replaced by go-enum for enum generation, to allow for more customization, and checking at build time that the enum / sources are correctly mapped
  • Loading branch information
nohehf authored Oct 1, 2024
1 parent ae3affe commit 5dd4b84
Show file tree
Hide file tree
Showing 43 changed files with 19,317 additions and 422 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ all: lint generate pre-build build test
# Install deps
.PHONY: setup
setup:
go install golang.org/x/tools/cmd/stringer@latest
go install github.com/abice/go-enum@latest

.PHONY: lint
lint:
Expand All @@ -21,7 +21,7 @@ generate:

.PHONY: pre-build
pre-build: generate
go run cmd/pre-build/pre-build.go
go run cmd/pre-build/pre-build.go --write-ranges ranges

.PHONY: build
build: pre-build generate
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

Detect the cloud / hosting provider of a given host. Fast, static & offline.
Cloudfinder offers both a cli and a golang package.
Cloud provider ranges are also tracked and can be found in `./ranges`.

## CLI Usage

Expand Down
155 changes: 106 additions & 49 deletions cmd/pre-build/pre-build.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package main

import (
"encoding/hex"
"flag"
"fmt"
"os"
"slices"
"sort"
"strings"

"crypto/sha256"

Expand All @@ -19,6 +21,67 @@ const (
ipRangesHashPath = "internal/static/hash.txt"
)

// Fetches ip range sources & generates the ip range data file & tree data file
func main() {
var writeRangesDir string
var force bool
flag.StringVar(&writeRangesDir, "write-ranges", "", "optionnaly store the ranges in a directory")
flag.BoolVar(&force, "force", false, "force to re compute tree")
flag.Parse()

// Fetch sourceRanges then sort
sourceRanges := source.GetAllIPRanges(source.AllSources)
sortRanges(sourceRanges)

// Compute the hash of the rangesStr
hash := computeRangesHash(sourceRanges)
log.Info("Hash of ip ranges: %s", hash)

// Compare to previous hash
prevHash, err := os.ReadFile(ipRangesHashPath)
if err != nil {
log.Fatal("Failed to read hash", err)
}

if hash == string(prevHash) && !force {
log.Info("Ip ranges have not changed (same hash), skipping")
return
}

// Write new hash to disk
err = os.WriteFile(ipRangesHashPath, []byte(hash), 0644) // nolint: mnd
if err != nil {
log.Fatal("Failed to write hash", err)
}

// Build tree
count4 := 0
ipv4Tree := tree.NewIPv4Tree()
count6 := 0
ipv6Tree := tree.NewIPv6Tree()
for _, r := range sourceRanges {
if r.Cat == source.CatIPv4 {
count4++
ipv4Tree.Add(r)
}
if r.Cat == source.CatIPv6 {
count6++
ipv6Tree.Add(r)
}
}

log.Info("Added %d IPv4 ranges to tree", count4)
log.Info("Added %d IPv6 ranges to tree", count6)

writeTree(ipv4Tree, ipv4TreePath)
writeTree(ipv6Tree, ipv6TreePath)

if writeRangesDir != "" {
writeRangesToDir(ipv4Tree, ipv6Tree, writeRangesDir)
log.Info("Wrote ranges to %s", writeRangesDir)
}
}

func byteCountSI(b int64) string {
const unit = 1000
if b < unit {
Expand All @@ -29,8 +92,7 @@ func byteCountSI(b int64) string {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %cB",
float64(b)/float64(div), "kMGTPE"[exp])
return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "kMGTPE"[exp])
}

func writeTree(tree tree.Tree, path string) {
Expand All @@ -51,62 +113,57 @@ func writeTree(tree tree.Tree, path string) {
log.Info("Wrote tree to %s (size: %s)", f.Name(), size)
}

// Fetches ip range sources & generates the ip range data file & tree data file
func main() {
// Fetch ranges
ranges := source.GetAllIPRanges(source.AllSources)

// Sort ranges per string repr
rangesStr := make([]string, len(ranges))
for i, r := range ranges {
rangesStr[i] = r.String()
}
sort.Strings(rangesStr)

// Compute the hash of the rangesStr
func computeRangesHash(sortedRanges []*source.IPRange) string {
h := sha256.New()
for _, r := range rangesStr {
h.Write([]byte(r))
for _, r := range sortedRanges {
h.Write([]byte(r.String()))
}
hash := h.Sum(nil)
log.Info("Hash of ip ranges: %x", hash)
return hex.EncodeToString(hash)
}

// Compare to previous hash
prevHash, err := os.ReadFile(ipRangesHashPath)
if err != nil {
log.Fatal("Failed to read hash", err)
}
func sortRanges(ranges []*source.IPRange) {
sort.Slice(ranges, func(i, j int) bool {
return ranges[i].String() > ranges[j].String()
})
}

if slices.Equal(hash, prevHash) {
log.Info("Ip ranges have not changed (ip ranges hashes are the same), skipping")
return
// Write the ranges per provider under the given directory
func writeRangesToDir(ipv4tree tree.Tree, ipv6tree tree.Tree, rangesDir string) {
// Extract ranges from trees
v4ranges := ipv4tree.GetAllRanges()
v6ranges := ipv6tree.GetAllRanges()

sortRanges(v4ranges)
sortRanges(v6ranges)

// Check if ranges dir exists, if not create it
if _, err := os.Stat(rangesDir); os.IsNotExist(err) {
err = os.Mkdir(rangesDir, os.ModePerm)
if err != nil {
log.Fatal("Failed to create ranges dir", err)
}
}

// Write new hash to disk
err = os.WriteFile(ipRangesHashPath, hash, 0644) // nolint: mnd
if err != nil {
log.Fatal("Failed to write hash", err)
// Map ranges per provider, first v4 then v6
rangesPerProvider := make(map[string][]*source.IPRange)
for _, r := range append(v4ranges, v6ranges...) {
providerKey := strings.ToLower(r.Provider.String())
rangesPerProvider[providerKey] = append(rangesPerProvider[providerKey], r)
}

// Build tree
count4 := 0
ipv4Tree := tree.NewIPv4Tree()
count6 := 0
ipv6Tree := tree.NewIPv6Tree()
for _, r := range ranges {
if r.Cat == source.CatIPv4 {
count4++
ipv4Tree.Add(r)
// Write ranges to files
for provider, ranges := range rangesPerProvider {
fileContents := strings.Builder{}
for _, r := range ranges {
fileContents.WriteString(r.Network.String())
fileContents.WriteString("\n")
}
if r.Cat == source.CatIPv6 {
count6++
ipv6Tree.Add(r)

filePath := fmt.Sprintf("%s/%s.txt", rangesDir, provider)
err := os.WriteFile(filePath, []byte(fileContents.String()), os.ModePerm)
if err != nil {
log.Fatal("Failed to write ranges file", err)
}
}

log.Info("Added %d IPv4 ranges to tree", count4)
log.Info("Added %d IPv6 ranges to tree", count6)

writeTree(ipv4Tree, ipv4TreePath)
writeTree(ipv6Tree, ipv6TreePath)
}
81 changes: 0 additions & 81 deletions internal/source/data.go

This file was deleted.

9 changes: 6 additions & 3 deletions internal/source/source_alibaba.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ type Alibaba struct{}

const alibabaFileURL = "https://raw.githubusercontent.com/devanshbatham/ip2cloud/main/data/aliyun.txt"

func (a Alibaba) GetProvider() provider.Provider {
return provider.Alibaba
}

func (a Alibaba) GetIPRanges() []*IPRange {
log.Info("Using static Alibaba ip ranges")

Expand All @@ -20,9 +24,8 @@ func (a Alibaba) GetIPRanges() []*IPRange {
for _, cdir := range alibabaRanges {
network, cat := ParseCIDR(cdir)
ranges = append(ranges, &IPRange{
Network: network,
Cat: cat,
Provider: provider.Alibaba,
Network: network,
Cat: cat,
})
}
return ranges
Expand Down
9 changes: 6 additions & 3 deletions internal/source/source_aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ type awsJSON struct {
} `json:"prefixes"`
}

func (a Aws) GetProvider() provider.Provider {
return provider.Aws
}

func (a Aws) GetIPRanges() []*IPRange {
log.Info("Fetching AWS ip ranges from %s", awsFileURL)

Expand All @@ -30,9 +34,8 @@ func (a Aws) GetIPRanges() []*IPRange {
for _, prefix := range awsJSON.Prefixes {
network, cat := ParseCIDR(prefix.IPPrefix)
ranges = append(ranges, &IPRange{
Network: network,
Cat: cat,
Provider: provider.Aws,
Network: network,
Cat: cat,
})
}

Expand Down
9 changes: 6 additions & 3 deletions internal/source/source_azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ import (

type Azure struct{}

func (a Azure) GetProvider() provider.Provider {
return provider.Azure
}

type azureJSON struct {
Values []struct {
Properties struct {
Expand Down Expand Up @@ -90,9 +94,8 @@ func (a Azure) GetIPRanges() []*IPRange {
for _, prefix := range prefixes {
network, cat := ParseCIDR(prefix)
ranges = append(ranges, &IPRange{
Network: network,
Cat: cat,
Provider: provider.Azure,
Network: network,
Cat: cat,
})
}

Expand Down
9 changes: 6 additions & 3 deletions internal/source/source_cloudflare.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ var cloudflareFileUrls = []string{
"https://www.cloudflare.com/ips-v6/#",
}

func (a Cloudflare) GetProvider() provider.Provider {
return provider.Cloudflare
}

func (a Cloudflare) GetIPRanges() []*IPRange {
ranges := make([]*IPRange, 0)
for _, cloudflareFileURL := range cloudflareFileUrls {
Expand All @@ -31,9 +35,8 @@ func (a Cloudflare) GetIPRanges() []*IPRange {
}
network, cat := ParseCIDR(ip)
ranges = append(ranges, &IPRange{
Network: network,
Cat: cat,
Provider: provider.Cloudflare,
Network: network,
Cat: cat,
})
}
}
Expand Down
Loading

0 comments on commit 5dd4b84

Please sign in to comment.