From 07bbb442efc9d9794f1269830665fbb1260b91ca Mon Sep 17 00:00:00 2001 From: deranjer Date: Wed, 1 Jul 2020 14:33:01 -0400 Subject: [PATCH] switching everything over to []byte, abandoning merkletree for now --- client/clientcmd/commit.go | 29 +--------------- client/test/test.go | 27 --------------- client/test/test.go.old | 65 +++++++++++++++++++++++++++++++++++ client/test/test2.go | 40 +++++++++++++++++++++ common/database/db-ops.go | 4 +-- common/database/structures.go | 41 ++++++++++++++++------ common/engine/diff.go | 2 +- common/engine/filehashing.go | 14 +++++--- common/engine/filesystem.go | 28 +++++++++++++-- common/manager/manager.go | 44 ++++++++++++++++++++---- go.mod | 1 + go.sum | 2 ++ 12 files changed, 215 insertions(+), 82 deletions(-) delete mode 100644 client/test/test.go create mode 100644 client/test/test.go.old create mode 100644 client/test/test2.go diff --git a/client/clientcmd/commit.go b/client/clientcmd/commit.go index 1739585..eddea09 100644 --- a/client/clientcmd/commit.go +++ b/client/clientcmd/commit.go @@ -1,40 +1,13 @@ package clientcmd import ( - "fmt" - "os" - clientconfig "github.com/deranjer/gvc/client/clientconfig" - "github.com/deranjer/gvc/common/database" - "github.com/deranjer/gvc/common/engine" "github.com/deranjer/gvc/common/manager" ) // Commit commits the tracked files and changes to the repo func Commit(conf *clientconfig.Gvcconfig, commitMessage string, m *manager.Manager) error { - trackedFiles, err := m.FetchTrackedFiles() - if err != nil { - return err - } - var filesToDiff []database.File // Contains the list of files that have changed - for _, trackedFile := range trackedFiles { - currentFile, err := os.Stat(trackedFile.Path) - if err != nil { - fmt.Printf("unable to stat tracked file: %s error: %s\n", currentFile.Name(), err) - continue - } - currentFileHash, err := engine.UniqueFileHash(trackedFile.Path) - if err != nil { - fmt.Printf("unable to create hash for file: %s error: %s\n", currentFile.Name(), err) - continue - } - if currentFileHash == trackedFile.CurrentHash { - fmt.Printf("No changes found in file: %s when compared to file: %s\n", currentFile.Name(), trackedFile.Name) - continue - } - filesToDiff = append(filesToDiff, trackedFile) - } - m.BeginCommit(filesToDiff, conf.CurrentBranch) + m.BeginCommit(conf.CurrentBranch) return nil } diff --git a/client/test/test.go b/client/test/test.go deleted file mode 100644 index 5846266..0000000 --- a/client/test/test.go +++ /dev/null @@ -1,27 +0,0 @@ -package main - -import ( - "fmt" - - "github.com/imdario/mergo" -) - -type Foo struct { - Ignore []string - B int64 -} - -func main() { - src := Foo{ - Ignore: []string{"one", "two", "three"}, - B: 2, - } - dest := Foo{ - Ignore: []string{"one", "two", "four", "seven"}, - } - - mergo.Merge(&dest, src) - fmt.Println(dest) - // Will print - // {two 2} -} diff --git a/client/test/test.go.old b/client/test/test.go.old new file mode 100644 index 0000000..d49cd10 --- /dev/null +++ b/client/test/test.go.old @@ -0,0 +1,65 @@ +package main + +import ( + "crypto/sha256" + "log" + + "github.com/cbergoon/merkletree" +) + +//TestContent implements the Content interface provided by merkletree and represents the content stored in the tree. +type TestContent struct { + x string +} + +//CalculateHash hashes the values of a TestContent +func (t TestContent) CalculateHash() ([]byte, error) { + h := sha256.New() + if _, err := h.Write([]byte(t.x)); err != nil { + return nil, err + } + + return h.Sum(nil), nil +} + +//Equals tests for equality of two Contents +func (t TestContent) Equals(other merkletree.Content) (bool, error) { + return t.x == other.(TestContent).x, nil +} + +func main() { + //Build list of Content to build tree + var list []merkletree.Content + list = append(list, TestContent{x: "Hello"}) + list = append(list, TestContent{x: "Hi"}) + list = append(list, TestContent{x: "Hey"}) + list = append(list, TestContent{x: "Hola"}) + + //Create a new Merkle Tree from the list of Content + t, err := merkletree.NewTree(list) + if err != nil { + log.Fatal(err) + } + + //Get the Merkle Root of the tree + mr := t.MerkleRoot() + log.Println(mr) + + //Verify the entire tree (hashes for each node) is valid + vt, err := t.VerifyTree() + if err != nil { + log.Fatal(err) + } + log.Println("Verify Tree: ", vt) + + //Verify a specific content in in the tree + vc, err := t.VerifyContent(list[0]) + if err != nil { + log.Fatal(err) + } + + log.Println("Verify Content: ", vc) + + //String representation + log.Println(t) +} diff --git a/client/test/test2.go b/client/test/test2.go new file mode 100644 index 0000000..9d43561 --- /dev/null +++ b/client/test/test2.go @@ -0,0 +1,40 @@ +package main + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" +) + +func main() { + sha1 := []byte("32254b975eb8013394f7f7f3cd90e09aebf4b4489e69150a3260be3a5a7a0562") + sha2 := []byte("22254b975eb8013395f7f7f3cd90e09aebf4b4489e69150a3260be3a5a7a0562") + bytes1 := sha256.Sum256(sha1) + bytes2 := sha256.Sum256(sha2) + var shaList [][32]byte + shaList = append(shaList, bytes1) + shaList = append(shaList, bytes2) + + //var hashList [][]byte + hasher := sha256.New() + for _, file := range shaList { + hasher.Write(file[:]) + } + commitMessage := "This is a commit message2!" + //time := time.Now() + //hasher.Write([]byte(commitMessage + time.String())) + hasher.Write([]byte(commitMessage)) + //commitMeta := []byte(commitMessage + time.String()) //TODO add author and other things + //commitMetaHash := sha256.Sum256(commitMeta) + fmt.Println("Hashbytes: ", hasher.Sum(nil)) + + fullHash := hex.EncodeToString(hasher.Sum(nil)) + fmt.Println("Hasher: ", fullHash) + // if _, err := hasher.Write([]byte(commitMessage + time.String())); err != nil { // Create a hash of the message and time + // return err + // } + //hashList = append(hashList, commitMetaHash) // add that to the tree + //testhash := hashList[:] + //commitHash := sha256.Sum256(hashList[:]) + +} diff --git a/common/database/db-ops.go b/common/database/db-ops.go index df9862e..71d5617 100644 --- a/common/database/db-ops.go +++ b/common/database/db-ops.go @@ -106,12 +106,12 @@ func (db *DB) FindFileByID(ID int) (File, error) { } // UpdateFileData updates the current base file that diffs will compare to -func (db *DB) UpdateFileData(filePath, basePath string, hash [16]byte) error { +func (db *DB) UpdateFileData(filePath, basePath string, hash []byte) error { if file, err := db.FindFileByPath(filePath); err != nil { db.Err(err).Msg("Error updating the file base") return err } else { - err := db.Update(&File{ID: file.ID, CurrentBase: basePath, CurrentHash: hash}) + err := db.Update(&File{ID: file.ID, CurrentBase: basePath, Hash: hash}) return err } } diff --git a/common/database/structures.go b/common/database/structures.go index 44a06d6..53c0cce 100644 --- a/common/database/structures.go +++ b/common/database/structures.go @@ -1,16 +1,21 @@ package database -import "time" +import ( + "crypto/sha256" + "fmt" + "io/ioutil" + "os" + "time" +) // Commit stores all the necessary information for a commit type Commit struct { - CommitHash string // The hash of the commit (generated by hashing commit author name, time, the previous commit, and more? TODO: Not sure what else) + CommitHash []byte // The hash of the commit (generated by hashing commit author name, time, the previous commit, and more? TODO: Not sure what else) TrackedFiles []File // All of the tracked files for this commit Date string Version string //User can tag a commit with a version number Branch string //Branch this commit belongs to Number string // The commit number - } // CommitMeta stores the meta information about the commit @@ -29,28 +34,44 @@ type File struct { Name string //BkpLocation string //TODO: Needed? CurrentBase string - CurrentHash [16]byte `storm:"index,unique"` + Hash []byte `storm:"index,unique"` // with []byte can't use sha256.sum256 since that is [32]byte, so everything done manually. CreatedAt time.Time Unique string Version float64 NoCompress bool // Whether or not to compress this file } +// CalculateHash creates a hash for the file +func (f *File) CalculateHash() error { + file, err := os.Open(f.Path) + if err != nil { + return fmt.Errorf("unable to open file: %s err: %s", f.Path, err) + } + fileContents, err := ioutil.ReadAll(file) + if err != nil { + return fmt.Errorf("unable to read contents of file: %s err: %s", f.Path, err) + } + hash := sha256.New() + hash.Write(fileContents) + f.Hash = hash.Sum(nil) + return nil +} + type FileIndex struct { ID int `storm:"id,increment"` FileID int `storm:"index"` - FileHash [16]byte `storm:"index,unique"` + FileHash [32]byte `storm:"index,unique"` Index []byte Length int64 } // DiffObject store the information for each diff that is made type DiffObject struct { - ID int `storm:"id,increment"` - Target string `storm:"index"` - DiffObject string `storm:"index"` - TargetHash [16]byte `storm:"index"` - DiffObjectHash [16]byte `storm:"index"` + ID int `storm:"id,increment"` + Target string `storm:"index"` + DiffObject string `storm:"index"` + TargetHash []byte `storm:"index"` + DiffObjectHash []byte `storm:"index"` //Watching string //name of the file being watched DiffPath string //path of the diff/patch //path would be .gvc/hashofcommit/ //Label string //store a comment if the user wants to (user written) diff --git a/common/engine/diff.go b/common/engine/diff.go index 07b752f..ed6bc07 100644 --- a/common/engine/diff.go +++ b/common/engine/diff.go @@ -24,7 +24,7 @@ import ( // it might be nice to inform the user when diffs build up func manageFileDiffing(ctx context.Context, target, diffobject, commitHashPath string, diffChannel chan database.DiffObject, wg *sync.WaitGroup) error { - var targetHash, diffobjectHash [16]byte + var targetHash, diffobjectHash []byte var err error if targetHash, err = UniqueFileHash(target); err != nil { return err diff --git a/common/engine/filehashing.go b/common/engine/filehashing.go index 69982f1..bc3a5ab 100644 --- a/common/engine/filehashing.go +++ b/common/engine/filehashing.go @@ -1,14 +1,18 @@ package engine import ( - "github.com/kalafut/imohash" + "crypto/sha256" + "io/ioutil" ) -// UniqueFileHash creats a fast hash of a file. It's not bullet proof (could cause a collision, but in practice unlikely) but its fast -func UniqueFileHash(src string) ([16]byte, error) { - hash, err := imohash.SumFile(src) +// UniqueFileHash uses SHA256 to create a hash of the file +func UniqueFileHash(src string) ([]byte, error) { + file, err := ioutil.ReadFile(src) if err != nil { - return [16]byte{}, err + return []byte{}, err } + hasher := sha256.New() + hasher.Write(file) + hash := hasher.Sum(nil) return hash, nil } diff --git a/common/engine/filesystem.go b/common/engine/filesystem.go index 9f8773c..c606dd1 100644 --- a/common/engine/filesystem.go +++ b/common/engine/filesystem.go @@ -3,12 +3,15 @@ package engine import ( "bytes" "compress/gzip" + "crypto/sha256" "encoding/binary" + "encoding/hex" "fmt" "os" "strings" + "time" - clientconfig "github.com/deranjer/gvc/client/clientconfig" + "github.com/deranjer/gvc/common/database" ) // CompressIntArray compresses an array of integers into a buffer @@ -89,8 +92,29 @@ func InitiateDirectory(directory string) { } // CreateInitialCommit copies the files over and compresses them if they are not in the NoCompress struct -func CreateInitialCommit(conf *clientconfig.Gvcconfig) { +func CreateInitialCommit(fileList []database.File, commitMessage string) error { // ONLY HAPPENS FOR MASTER I THINK, SO NO BRANCH NEEDED //Need to deduplicate so we aren't storing duplicates of files, storing all the files in one folder won't work, will need something like git + //For initial commit no changes are made to files, so don't store anything, just save the list so you can send to server + var initialCommit database.Commit + initialCommit.Branch = "master" + + //var hashList [][]byte + hasher := sha256.New() + for _, file := range fileList { + var err error + err = file.CalculateHash() + if err != nil { + return fmt.Errorf("unable to calculate hash for file: %s with error: %s", file.Path, err) + } + hasher.Write(file.Hash[:]) + } + time := time.Now() // Adding the metadata to the hash + hasher.Write([]byte(commitMessage + time.String())) + hashBytes := hasher.Sum(nil) // Getting the hash bytes + fullHash := hex.EncodeToString(hashBytes) + fmt.Println("Commit hash: ", fullHash) + initialCommit.CommitHash = hashBytes + return nil } func IsDirectory(path string) (bool, error) { diff --git a/common/manager/manager.go b/common/manager/manager.go index 7e200b4..f6ff0a1 100644 --- a/common/manager/manager.go +++ b/common/manager/manager.go @@ -1,8 +1,10 @@ package manager import ( - "encoding/base64" + "bytes" + "encoding/hex" "fmt" + "os" "path/filepath" "strconv" "strings" @@ -110,7 +112,7 @@ func (m *Manager) AddFileToRepo(relFilePath string) error { relFilePath = strings.TrimSpace(relFilePath) //purging any odd spaces TODO: Make sure not needed var tmpFile database.File filename := filepath.Base(relFilePath) - var hash [16]byte + var hash []byte //check that the file actually exists (currently done by client/server) // if filename, err = engine.VerifySrcFile(relFilePath); err != nil { // //there was no source file or it was not recognisable as a file @@ -127,11 +129,11 @@ func (m *Manager) AddFileToRepo(relFilePath string) error { } tmpFile = database.File{} - tmpFile.CurrentHash = hash + tmpFile.Hash = hash tmpFile.Name = filename tmpFile.Path = relFilePath tmpFile.CreatedAt = time.Now() - tmpFile.Unique = base64.URLEncoding.EncodeToString([]byte(filename)) + "_" + base64.URLEncoding.EncodeToString((tmpFile.CurrentHash[:])) + "_" + strconv.FormatInt(tmpFile.CreatedAt.Unix(), 10) + "_" + filename + tmpFile.Unique = hex.EncodeToString([]byte(filename)) + "_" + hex.EncodeToString((tmpFile.Hash)) + "_" + strconv.FormatInt(tmpFile.CreatedAt.Unix(), 10) + "_" + filename //tmpFile.BkpLocation = filepath.Join(m.SyncFolder, tmpFile.Unique) //tmpFile.CurrentBase = tmpFile.BkpLocation //tmpFile.Ignore = false //we can have files in the database that are ignored. TODO: This was initially added so that 'All Files' would show up as a file (its a hack as it adds a dummy to the database) @@ -144,7 +146,7 @@ func (m *Manager) AddFileToRepo(relFilePath string) error { return err } - m.Info().Msgf("added file: %s at path: %s with hash: %s at time: %s", filename, relFilePath, tmpFile.CurrentHash, tmpFile.CreatedAt.String) + m.Info().Msgf("added file: %s at path: %s with hash: %s at time: %s", filename, relFilePath, tmpFile.Hash, tmpFile.CreatedAt.String) return nil } @@ -163,13 +165,41 @@ func (m *Manager) prepareDatabaseForFile(tmpFile database.File) (int, error) { } -func (m *Manager) BeginCommit(fileList []database.File, branch string) error { +func (m *Manager) BeginCommit(branch string, commitMessage string) error { + trackedFiles, err := m.FetchTrackedFiles() + if err != nil { + return err + } + var filesToDiff []database.File // Contains the list of files that have changed + for _, trackedFile := range trackedFiles { + currentFile, err := os.Stat(trackedFile.Path) + if err != nil { + fmt.Printf("unable to stat tracked file: %s error: %s\n", currentFile.Name(), err) + continue + } + currentFileHash, err := engine.UniqueFileHash(trackedFile.Path) + if err != nil { + fmt.Printf("unable to create hash for file: %s error: %s\n", currentFile.Name(), err) + continue + } + result := bytes.Compare(currentFileHash, trackedFile.Hash) // Compare the hashes of the two files + if result == 0 { //If they are equal + fmt.Printf("No changes found in file: %s when compared to file: %s\n", currentFile.Name(), trackedFile.Name) + continue + } + filesToDiff = append(filesToDiff, trackedFile) + } diffChannel := make(chan database.DiffObject) diffContext := context.Background() m.WaitGroup.Add(2) commit, err := m.dB.FetchLastCommitOnBranch(branch) if err != nil { - m.Err(err).Msgf("unable to fetch last commit on branch, assuming first commit on branch", err) + m.Info().Msgf("unable to fetch last commit on branch, assuming first commit on branch", err) + err := engine.CreateInitialCommit(filesToDiff, commitMessage) + if err != nil { + m.Err(err).Msgf("unable to create initial commit: %s", err) + return err + } } return nil } diff --git a/go.mod b/go.mod index 63fb75a..1ec69e1 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/amlwwalker/fdelta v0.0.0-20200513211915-3b53ff25eff6 github.com/apsdehal/go-logger v0.0.0-20190515212710-b0d6ccfee0e6 github.com/asdine/storm v2.1.2+incompatible + github.com/cbergoon/merkletree v0.2.0 // indirect github.com/deranjer/clir v1.0.5 github.com/deranjer/store v0.0.0-20200526205429-464dd59c6031 github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect diff --git a/go.sum b/go.sum index 43d0edb..1caa5d7 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/apsdehal/go-logger v0.0.0-20190515212710-b0d6ccfee0e6 h1:qISSdUEX4sjD github.com/apsdehal/go-logger v0.0.0-20190515212710-b0d6ccfee0e6/go.mod h1:U3/8D6R9+bVpX0ORZjV+3mU9pQ86m7h1lESgJbXNvXA= github.com/asdine/storm v2.1.2+incompatible h1:dczuIkyqwY2LrtXPz8ixMrU/OFgZp71kbKTHGrXYt/Q= github.com/asdine/storm v2.1.2+incompatible/go.mod h1:RarYDc9hq1UPLImuiXK3BIWPJLdIygvV3PsInK0FbVQ= +github.com/cbergoon/merkletree v0.2.0 h1:Bttqr3OuoiZEo4ed1L7fTasHka9II+BF9fhBfbNEEoQ= +github.com/cbergoon/merkletree v0.2.0/go.mod h1:5c15eckUgiucMGDOCanvalj/yJnD+KAZj1qyJtRW5aM= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=