Skip to content

Commit

Permalink
added inspect verb
Browse files Browse the repository at this point in the history
  • Loading branch information
scott-cotton committed Sep 16, 2021
1 parent 370aee0 commit 01713a4
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 14 deletions.
3 changes: 2 additions & 1 deletion cmd/dupi/dupi.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ var scMap = map[string]SubCmd{
"index": newIndexCmd(),
"extract": newExtractCmd(),
"blot": newBlotCmd(),
"unblot": newUnblotCmd()}
"unblot": newUnblotCmd(),
"inspect": newInspectCmd()}

var gFlags = flag.NewFlagSet("dupi", flag.ExitOnError)

Expand Down
50 changes: 48 additions & 2 deletions cmd/dupi/inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,57 @@
package main

import (
"encoding/json"
"flag"
"fmt"
"log"
"os"

"github.com/go-air/dupi"
)

type inspectCmd struct {
subCmd
index *dupi.Index
json *bool
json *bool
}

func newInspectCmd() *inspectCmd {
sub := &subCmd{
name: "inspect",
flags: flag.NewFlagSet("inspect", flag.ExitOnError)}
res := &inspectCmd{
subCmd: *sub,
json: sub.flags.Bool("json", false, "output json.")}
return res
}

func (in *inspectCmd) Usage() string {
return "inspect the root index."
}

func (in *inspectCmd) Run(args []string) error {
var (
err error
idx *dupi.Index
)
in.flags.Parse(args)
idx, err = dupi.OpenIndex(getIndexRoot())
if err != nil {
return err
}
defer idx.Close()
st, err := idx.Stats()
if err != nil {
log.Fatal(err)
}
if *in.json {
d, err := json.MarshalIndent(st, "", "\t")
if err != nil {
log.Fatal(err)
}
os.Stdout.Write(d)
} else {
fmt.Print(st)
}
return nil
}
16 changes: 13 additions & 3 deletions dmd/t.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ type T struct {
file *os.File
}

const rcdSize = 12

func New(root string) (*T, error) {
res := &T{path: filepath.Join(root, "dmd")}
var err error
Expand All @@ -37,20 +39,28 @@ func New(root string) (*T, error) {
return res, nil
}

func (t *T) NumDocs() (uint64, error) {
fi, err := t.file.Stat()
if err != nil {
return 0, err
}
return uint64(fi.Size()) / rcdSize, nil
}

func (t *T) Lookup(did uint32) (fid, start, end uint32, err error) {
f := t.file
_, err = f.Seek(int64(did)*12, 0)
_, err = f.Seek(int64(did)*rcdSize, 0)
if err != nil {
return
}
var buf [12]byte
var buf [rcdSize]byte
_, err = io.ReadFull(f, buf[:])
if err != nil {
return
}
fid = binary.BigEndian.Uint32(buf[0:4])
start = binary.BigEndian.Uint32(buf[4:8])
end = binary.BigEndian.Uint32(buf[8:12])
end = binary.BigEndian.Uint32(buf[8:rcdSize])
return
}

Expand Down
30 changes: 28 additions & 2 deletions index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package dupi
import (
"fmt"
"log"
"math"
"os"

"github.com/go-air/dupi/blotter"
Expand Down Expand Up @@ -95,10 +96,36 @@ func (x *Index) Root() string {
return x.config.IndexRoot
}

func (x *Index) Stats() (*Stats, error) {
var err error
st := &Stats{}
st.Root = x.config.IndexRoot
st.NumBlots = 1 << 16 * uint64(len(x.shards))
st.NumDocs, err = x.dmd.NumDocs()
if err != nil {
return nil, err
}
st.NumPaths = uint64(len(x.fnames.d))

for i := range x.shards {
shrd := &x.shards[i]
st.NumPosts += shrd.NumPosts()
}
st.BlotMean = float64(st.NumPosts) / float64(st.NumBlots)
var sos float64
for i := range x.shards {
shrd := &x.shards[i]
sos += shrd.SosDiffs(st.BlotMean)
}
sos /= float64(st.NumBlots)
st.BlotSigma = math.Sqrt(sos)
return st, nil
}

func (x *Index) TokenFunc() token.TokenizerFunc {
tf, err := token.FromConfig(&x.config.TokenConfig)
if err != nil {
panic(err) // should be impossible.
panic(err) // should be impossible, tf created in ctor
}
return tf
}
Expand Down Expand Up @@ -159,7 +186,6 @@ func (x *Index) JoinBlot(shard uint32, sblot uint16) uint32 {
blot := nsh * uint32(sblot)
blot += shard
return blot

}

func (x *Index) FindBlot(theBlot uint32, doc *Doc) (start, end uint32, err error) {
Expand Down
32 changes: 26 additions & 6 deletions internal/shard/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@ import (
)

type Index struct {
id uint32
path string
heads [1 << 16]int64
counts [1 << 16]uint32
perm [1 << 16]uint16
//ind [1 << 16]Posts
id uint32
path string
heads [1 << 16]int64
counts [1 << 16]uint32
perm [1 << 16]uint16
postFile *os.File
}

Expand Down Expand Up @@ -91,6 +90,27 @@ func (x *Index) Count(blot uint32) uint32 {
return x.counts[blot]
}

func (x *Index) NumPosts() uint64 {
var ttl uint64
for _, ct := range x.counts {
ttl += uint64(ct)
}
return ttl
}

func (x *Index) NumBlots() uint64 {
return 1 << 16
}

func (x *Index) SosDiffs(avg float64) float64 {
var ttl float64
for _, ct := range x.counts {
d := avg - float64(ct)
ttl += d * d
}
return ttl
}

func (x *Index) readIix() error {
f, err := os.Open(fmt.Sprintf("%s.iix", x.path))
if err != nil {
Expand Down
28 changes: 28 additions & 0 deletions stats.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package dupi

import "fmt"

type Stats struct {
Root string
NumDocs uint64
NumPaths uint64
NumPosts uint64
NumBlots uint64
BlotMean float64
BlotSigma float64
}

const stFmt = `dupi index at %s:
- %d docs
- %d nodes in path tree
- %d posts
- %d blots
- %.2f mean docs per blot
- %.2f sigma (std deviation)
`

func (st *Stats) String() string {
return fmt.Sprintf(stFmt, st.Root, st.NumDocs,
st.NumPaths, st.NumPosts, st.NumBlots,
st.BlotMean, st.BlotSigma)
}

0 comments on commit 01713a4

Please sign in to comment.