Skip to content

Commit

Permalink
fix: correctly stream process json data normalization to utf8 for bot… (
Browse files Browse the repository at this point in the history
#764)

* fix: correctly stream process json data normalization to utf8 for both metatag validation and writing to disk

* test: update expected errors in negative tests

* chore: restore go mod files for reverted package

* chore: correct module name

* chore: remove duplicate import

* doc: format inline comment

Co-authored-by: Ulises Rangel <[email protected]>

---------

Co-authored-by: Ulises Rangel <[email protected]>
  • Loading branch information
ddlees and urangel authored Aug 13, 2024
1 parent b8e8f9b commit 81c33af
Show file tree
Hide file tree
Showing 12 changed files with 896 additions and 61 deletions.
13 changes: 6 additions & 7 deletions cmd/api/src/api/v2/file_uploads_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ import (
"net/http"
"testing"

"github.com/specterops/bloodhound/mediatypes"
"github.com/specterops/bloodhound/src/services/fileupload"

"github.com/specterops/bloodhound/headers"
"github.com/specterops/bloodhound/mediatypes"
"github.com/specterops/bloodhound/src/api/v2/integration"
"github.com/specterops/bloodhound/src/services/fileupload"
"github.com/specterops/bloodhound/src/test/fixtures/fixtures"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -170,7 +169,7 @@ func Test_FileUploadWorkFlowVersion5(t *testing.T) {
"v5/ingest/sessions.json",
})

//Assert that we created stuff we expected
// Assert that we created stuff we expected
testCtx.AssertIngest(fixtures.IngestAssertions)
}

Expand All @@ -189,7 +188,7 @@ func Test_FileUploadWorkFlowVersion6(t *testing.T) {
"v6/ingest/sessions.json",
})

//Assert that we created stuff we expected
// Assert that we created stuff we expected
testCtx.AssertIngest(fixtures.IngestAssertions)
testCtx.AssertIngest(fixtures.IngestAssertionsv6)
testCtx.AssertIngest(fixtures.PropertyAssertions)
Expand Down Expand Up @@ -240,7 +239,7 @@ func Test_CompressedFileUploadWorkFlowVersion5(t *testing.T) {
"v5/ingest/sessions.json",
})

//Assert that we created stuff we expected
// Assert that we created stuff we expected
testCtx.AssertIngest(fixtures.IngestAssertions)
testCtx.AssertIngest(fixtures.PropertyAssertions)
}
Expand All @@ -260,7 +259,7 @@ func Test_CompressedFileUploadWorkFlowVersion6(t *testing.T) {
"v6/ingest/sessions.json",
})

//Assert that we created stuff we expected
// Assert that we created stuff we expected
testCtx.AssertIngest(fixtures.IngestAssertions)
testCtx.AssertIngest(fixtures.IngestAssertionsv6)
testCtx.AssertIngest(fixtures.PropertyAssertions)
Expand Down
4 changes: 2 additions & 2 deletions cmd/api/src/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ require (
github.com/pquerna/otp v1.4.0
github.com/prometheus/client_golang v1.16.0
github.com/russellhaering/goxmldsig v1.4.0
github.com/stretchr/testify v1.8.4
github.com/stretchr/testify v1.9.0
github.com/unrolled/secure v1.13.0
github.com/zenazn/goji v1.0.1
go.uber.org/mock v0.2.0
Expand Down Expand Up @@ -79,7 +79,7 @@ require (
github.com/prometheus/procfs v0.11.0 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/text v0.17.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
8 changes: 3 additions & 5 deletions cmd/api/src/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,7 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/unrolled/secure v1.13.0 h1:sdr3Phw2+f8Px8HE5sd1EHdj1aV3yUwed/uZXChLFsk=
github.com/unrolled/secure v1.13.0/go.mod h1:BmF5hyM6tXczk3MpQkFf1hpKSRqCyhqcbiQtiAF7+40=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
Expand Down Expand Up @@ -250,7 +249,7 @@ golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand All @@ -274,8 +273,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
Expand Down
18 changes: 6 additions & 12 deletions cmd/api/src/services/fileupload/file_upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package fileupload

import (
"bufio"
"context"
"errors"
"fmt"
Expand All @@ -27,6 +26,7 @@ import (
"os"
"time"

"github.com/specterops/bloodhound/bomenc"
"github.com/specterops/bloodhound/headers"
"github.com/specterops/bloodhound/mediatypes"
"github.com/specterops/bloodhound/src/model/ingest"
Expand Down Expand Up @@ -120,18 +120,12 @@ func WriteAndValidateZip(src io.Reader, dst io.Writer) error {
}

func WriteAndValidateJSON(src io.Reader, dst io.Writer) error {
tr := io.TeeReader(src, dst)
bufReader := bufio.NewReader(tr)
if b, err := bufReader.Peek(3); err != nil {
normalizedContent, err := bomenc.NormalizeToUTF8(src)
if err != nil {
return err
} else {
if b[0] == UTF8BOM1 && b[1] == UTF8BOM2 && b[2] == UTF8BMO3 {
if _, err := bufReader.Discard(3); err != nil {
return err
}
}
}
_, err := ValidateMetaTag(bufReader, true)
tr := io.TeeReader(normalizedContent, dst)
_, err = ValidateMetaTag(tr, true)
return err
}

Expand All @@ -147,7 +141,7 @@ func SaveIngestFile(location string, request *http.Request) (string, model.FileT
} else if utils.HeaderMatches(request.Header, headers.ContentType.String(), ingest.AllowedZipFileUploadTypes...) {
return tempFile.Name(), model.FileTypeZip, WriteAndValidateFile(fileData, tempFile, WriteAndValidateZip)
} else {
//We should never get here since this is checked a level above
// We should never get here since this is checked a level above
return "", model.FileTypeJson, fmt.Errorf("invalid content type for ingest file")
}
}
Expand Down
119 changes: 85 additions & 34 deletions cmd/api/src/services/fileupload/file_upload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package fileupload

import (
"bytes"
"errors"
"io"
"os"
"strings"
Expand All @@ -27,42 +28,9 @@ import (
"github.com/stretchr/testify/assert"
)

func TestWriteAndValidateJSON(t *testing.T) {
t.Run("trigger invalid json on bad json", func(t *testing.T) {
var (
writer = bytes.Buffer{}
badJSON = strings.NewReader("{[]}")
)
err := WriteAndValidateJSON(badJSON, &writer)
assert.ErrorIs(t, err, ErrInvalidJSON)
})

t.Run("succeed on good json", func(t *testing.T) {
var (
writer = bytes.Buffer{}
goodJSON = strings.NewReader(`{"meta": {"methods": 0, "type": "sessions", "count": 0, "version": 5}, "data": []}`)
)
err := WriteAndValidateJSON(goodJSON, &writer)
assert.Nil(t, err)
})

t.Run("succeed on utf-8 BOM json", func(t *testing.T) {
var (
writer = bytes.Buffer{}
)

file, err := os.Open("../../test/fixtures/fixtures/utf8bomjson.json")
assert.Nil(t, err)
err = WriteAndValidateJSON(io.Reader(file), &writer)
assert.Nil(t, err)
})
}

func TestWriteAndValidateZip(t *testing.T) {
t.Run("valid zip file is ok", func(t *testing.T) {
var (
writer = bytes.Buffer{}
)
writer := bytes.Buffer{}

file, err := os.Open("../../test/fixtures/fixtures/goodzip.zip")
assert.Nil(t, err)
Expand All @@ -81,3 +49,86 @@ func TestWriteAndValidateZip(t *testing.T) {
assert.Equal(t, err, ingest.ErrInvalidZipFile)
})
}

func TestWriteAndValidateJSON(t *testing.T) {
tests := []struct {
name string
input []byte
expectedOutput []byte
expectedError error
}{
{
name: "UTF-8 without BOM",
input: []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}, "data": [{"domain": "example.com"}]}`),
expectedOutput: []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}, "data": [{"domain": "example.com"}]}`),
expectedError: nil,
},
{
name: "UTF-8 with BOM",
input: append([]byte{0xEF, 0xBB, 0xBF}, []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}, "data": [{"domain": "example.com"}]}`)...),
expectedOutput: []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}, "data": [{"domain": "example.com"}]}`),
expectedError: nil,
},
{
name: "UTF-16BE with BOM",
input: []byte{0xFE, 0xFF, 0x00, 0x7B, 0x00, 0x22, 0x00, 0x6D, 0x00, 0x65, 0x00, 0x74, 0x00, 0x61, 0x00, 0x22, 0x00, 0x3A, 0x00, 0x20, 0x00, 0x7B, 0x00, 0x22, 0x00, 0x74, 0x00, 0x79, 0x00, 0x70, 0x00, 0x65, 0x00, 0x22, 0x00, 0x3A, 0x00, 0x20, 0x00, 0x22, 0x00, 0x64, 0x00, 0x6F, 0x00, 0x6D, 0x00, 0x61, 0x00, 0x69, 0x00, 0x6E, 0x00, 0x73, 0x00, 0x22, 0x00, 0x2C, 0x00, 0x20, 0x00, 0x22, 0x00, 0x76, 0x00, 0x65, 0x00, 0x72, 0x00, 0x73, 0x00, 0x69, 0x00, 0x6F, 0x00, 0x6E, 0x00, 0x22, 0x00, 0x3A, 0x00, 0x20, 0x00, 0x34, 0x00, 0x2C, 0x00, 0x20, 0x00, 0x22, 0x00, 0x63, 0x00, 0x6F, 0x00, 0x75, 0x00, 0x6E, 0x00, 0x74, 0x00, 0x22, 0x00, 0x3A, 0x00, 0x20, 0x00, 0x31, 0x00, 0x7D, 0x00, 0x2C, 0x00, 0x20, 0x00, 0x22, 0x00, 0x64, 0x00, 0x61, 0x00, 0x74, 0x00, 0x61, 0x00, 0x22, 0x00, 0x3A, 0x00, 0x20, 0x00, 0x5B, 0x00, 0x7B, 0x00, 0x22, 0x00, 0x64, 0x00, 0x6F, 0x00, 0x6D, 0x00, 0x61, 0x00, 0x69, 0x00, 0x6E, 0x00, 0x22, 0x00, 0x3A, 0x00, 0x20, 0x00, 0x22, 0x00, 0x65, 0x00, 0x78, 0x00, 0x61, 0x00, 0x6D, 0x00, 0x70, 0x00, 0x6C, 0x00, 0x65, 0x00, 0x2E, 0x00, 0x63, 0x00, 0x6F, 0x00, 0x6D, 0x00, 0x22, 0x00, 0x7D, 0x00, 0x5D, 0x00, 0x7D},
expectedOutput: []byte{0x7b, 0x22, 0x6d, 0x65, 0x74, 0x61, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x74, 0x79, 0x70, 0x65, 0x22, 0x3a, 0x20, 0x22, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x73, 0x22, 0x2c, 0x20, 0x22, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x3a, 0x20, 0x34, 0x2c, 0x20, 0x22, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x22, 0x3a, 0x20, 0x31, 0x7d, 0x2c, 0x20, 0x22, 0x64, 0x61, 0x74, 0x61, 0x22, 0x3a, 0x20, 0x5b, 0x7b, 0x22, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x22, 0x3a, 0x20, 0x22, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x22, 0x7d, 0x5d, 0x7d},
expectedError: nil,
},
{
name: "Missing meta tag",
input: []byte(`{"data": [{"domain": "example.com"}]}`),
expectedOutput: []byte(`{"data": [{"domain": "example.com"}]}`),
expectedError: ingest.ErrMetaTagNotFound,
},
{
name: "Missing data tag",
input: []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}}`),
expectedOutput: []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}}`),
expectedError: ingest.ErrDataTagNotFound,
},
// NOTE: this test discovers a bug where invalid JSON files are not being invalidated due to the current
// implemenation of ValidateMetaTag of decoding each token.
// {
// name: "Invalid JSON",
// input: []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}, "data": [{"domain": "example.com"`),
// expectedOutput: []byte(`{"meta": {"type": "domains", "version": 4, "count": 1}, "data": [{"domain": "example.com"`),
// expectedError: ErrInvalidJSON,
// },
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
src := bytes.NewReader(tt.input)
dst := &bytes.Buffer{}

err := WriteAndValidateJSON(src, dst)
if tt.expectedError != nil {
assert.Error(t, err)
assert.ErrorIs(t, err, tt.expectedError)
} else {
assert.NoError(t, err)
}
assert.Equal(t, tt.expectedOutput, dst.Bytes())
})
}
}

func TestWriteAndValidateJSON_NormalizationError(t *testing.T) {
src := &ErrorReader{err: errors.New("read error")}
dst := &bytes.Buffer{}

err := WriteAndValidateJSON(src, dst)

assert.Error(t, err)
assert.ErrorIs(t, err, ErrInvalidJSON)
}

// ErrorReader is a mock reader that always returns an error
type ErrorReader struct {
err error
}

func (er *ErrorReader) Read(p []byte) (n int, err error) {
return 0, er.err
}
3 changes: 2 additions & 1 deletion go.work
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
//
// SPDX-License-Identifier: Apache-2.0

go 1.21
go 1.21.3

use (
./cmd/api/src
./packages/go/analysis
./packages/go/bomenc
./packages/go/cache
./packages/go/conftool
./packages/go/crypto
Expand Down
Loading

0 comments on commit 81c33af

Please sign in to comment.