2
0
Fork 0
mirror of https://github.com/ii64/sonic.git synced 2026-06-21 00:46:43 +08:00
sonic/ast/node.go
2021-06-02 20:12:04 +08:00

808 lines
20 KiB
Go

/*
* Copyright 2021 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ast
import (
`encoding/json`
`fmt`
`strconv`
`unsafe`
`github.com/bytedance/sonic/internal/native`
`github.com/bytedance/sonic/internal/rt`
)
const (
_MAP_THRESHOLD = 5
_CAP_BITS = 32
_LEN_MASK = 1<<_CAP_BITS - 1
_APPEND_EXTRA_SIZE = 5
_NODE_SIZE = unsafe.Sizeof(Node{})
_PAIR_SIZE = unsafe.Sizeof(Pair{})
)
const (
V_RAW native.ValueType = 1 << 4
V_ARRAY_RAW = V_RAW | native.V_ARRAY
V_OBJECT_RAW = V_RAW | native.V_OBJECT
MASK_RAW = V_RAW - 1
)
type Node struct {
v int64
t native.ValueType
p unsafe.Pointer
m map[string]unsafe.Pointer
}
/** Node Type Accessor **/
func (self *Node) Type() native.ValueType {
return self.t & MASK_RAW
}
/** Simple Value Methods **/
// Raw returns underlying json string of an raw node,
// which usually created by Search() api
func (self *Node) Raw() string {
if self.t != V_RAW {
panic("value cannot be represented as raw json")
}
return addr2str(self.p, self.v)
}
// Bool returns bool value represented by this node
//
// If node type is not native.V_TRUE or native.V_FALSE, or V_RAW (must be a bool json value)
// it will panic
func (self *Node) Bool() bool {
switch self.t {
case native.V_TRUE : return true
case native.V_FALSE : return false
case V_RAW :
n := self.parseRaw()
return n.Bool()
default : panic("value cannot be represented as a boolean")
}
}
// Int64 as above.
func (self *Node) Int64() int64 {
switch self.t {
case native.V_TRUE : return 1
case native.V_FALSE : return 0
case native.V_DOUBLE : return int64(i64tof(self.v))
case native.V_INTEGER : return self.v
case V_RAW :
n := self.parseRaw()
return n.Int64()
default : panic("value cannot be represented as an integer")
}
}
// Number as above.
func (self *Node) Number() json.Number {
switch self.t {
case native.V_DOUBLE : return json.Number(strconv.FormatFloat(i64tof(self.v), 'g', -1, 64))
case native.V_INTEGER : return json.Number(strconv.FormatInt(self.v, 10))
case V_RAW :
n := self.parseRaw()
return n.Number()
default : panic("value cannot be represented as a json.Number")
}
}
// String as above.
func (self *Node) String() string {
switch self.t {
case native.V_NULL : return "null"
case native.V_TRUE : return "true"
case native.V_FALSE : return "false"
case native.V_STRING : return addr2str(self.p, self.v)
case native.V_DOUBLE : return strconv.FormatFloat(i64tof(self.v), 'g', -1, 64)
case native.V_INTEGER : return strconv.FormatInt(self.v, 10)
case V_RAW :
n := self.parseRaw()
return n.String()
default : panic("value cannot be represented as a simple string")
}
}
// Float64 as above.
func (self *Node) Float64() float64 {
switch self.t {
case native.V_TRUE : return 1.0
case native.V_FALSE : return 0.0
case native.V_DOUBLE : return i64tof(self.v)
case native.V_INTEGER : return float64(self.v)
case V_RAW :
n := self.parseRaw()
return n.Float64()
default : panic("value cannot be represented as an integer")
}
}
// IsRaw returns true if the node is type of below three:
//
// 1. V_RAW (never parsed)
// 2. native.V_Object_RAW (partially parsed)
// 3. native.V_Array_RAW (partially parsed)
func (self *Node) IsRaw() bool {
return self.t&V_RAW != 0
}
/** Sequencial Value Methods **/
// Len returns children count of a array|object|string node
// For partially loaded node, it also works but only counts the parsed children
func (self *Node) Len() int {
if self.t == native.V_ARRAY || self.t == native.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW {
return int(self.v & _LEN_MASK)
} else if self.t == native.V_STRING {
return int(self.v)
} else {
panic("value does not have a length")
}
}
// Cap returns malloc capacity of a array|object node for children
func (self *Node) Cap() int {
if self.t == native.V_ARRAY || self.t == native.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW {
return int(self.v >> _CAP_BITS)
} else {
panic("value does not have a capacity")
}
}
// Set sets the given node for the key under object node
func (self *Node) Set(key string, node Node) {
p := self.Get(key)
if p == nil {
l := self.Len()
c := self.Cap()
if l == c {
// TODO: maybe change append_extra_size in future
c += _APPEND_EXTRA_SIZE
mem := unsafe_NewArray(_PAIR_TYPE, c)
memmove(mem, self.p, _PAIR_SIZE*uintptr(l))
self.p = mem
}
v := self.pairAt(l)
v.Key = key
v.Value = node
if self.m != nil {
self.m[key] = unsafe.Pointer(&v.Value)
}
self.setCapAndLen(c, l+1)
} else {
*p = node
}
}
// SetByIndex sets the given node for the index under array node
//
// The index must within parent array's range
func (self *Node) SetByIndex(index int, node Node) {
p := self.Index(index)
if p == nil {
panic("index to nil node")
} else {
*p = node
}
}
// Add appends the given node under array node
func (self *Node) Add(node Node) {
self.must(native.V_ARRAY, "an array")
self.loadAllIndex()
l := self.Len()
c := self.Cap()
if l == c {
// TODO: maybe change append_extra_size in future
c += _APPEND_EXTRA_SIZE
mem := unsafe_NewArray(_NODE_TYPE, c)
memmove(mem, self.p, _NODE_SIZE*uintptr(l))
self.p = mem
}
v := self.nodeAt(l)
*v = node
self.setCapAndLen(c, l+1)
}
// GetByPath load given path on demands,
// which only ensure nodes before this path got parsed
func (self *Node) GetByPath(path ...interface{}) *Node {
var s = self
for _, p := range path {
switch p.(type) {
case int:
s = s.Index(p.(int))
case string:
s = s.Get(p.(string))
default:
panic("path must be either int or string")
}
}
return s
}
// Get loads given key of an object node on demands
func (self *Node) Get(key string) *Node {
self.must(native.V_OBJECT, "an object")
return self.loadKey(key)
}
// Index loads given index of an array node on demands
func (self *Node) Index(idx int) *Node {
self.must(native.V_ARRAY, "an array")
return self.loadIndex(idx)
}
func (self *Node) Values() ListIterator {
self.must(native.V_ARRAY, "an array")
self.loadAllIndex()
return ListIterator{Iterator{p: self}}
}
func (self *Node) Properties() ObjectIterator {
self.must(native.V_OBJECT, "an object")
self.loadAllKey()
return ObjectIterator{Iterator{p: self}}
}
/** Generic Value Converters **/
// Map loads all keys of an object node
func (self *Node) Map() map[string]interface{} {
self.must(native.V_OBJECT, "an object")
self.loadAllKey()
return self.toGenericObject()
}
// Array loads all indexes of an array node
func (self *Node) Array() []interface{} {
self.must(native.V_ARRAY, "an array")
self.loadAllIndex()
return self.toGenericArray()
}
// Interface loads all children under all pathes from this node,
// and converts itself as generic go type
func (self *Node) Interface() interface{} {
switch self.t {
case native.V_EOF : panic("invalid value")
case native.V_NULL : return nil
case native.V_TRUE : return true
case native.V_FALSE : return false
case native.V_ARRAY : return self.toGenericArray()
case native.V_OBJECT : return self.toGenericObject()
case native.V_STRING : return addr2str(self.p, self.v)
case native.V_DOUBLE : return i64tof(self.v)
case native.V_INTEGER : return self.v
case V_ARRAY_RAW:
self.loadAllIndex()
return self.toGenericArray()
case V_OBJECT_RAW:
self.loadAllKey()
return self.toGenericObject()
case V_RAW :
n := self.parseRaw()
return n.Interface()
default : panic("not gonna happen")
}
}
/** Internal Helper Methods **/
var (
_NODE_TYPE = rt.UnpackEface(Node{}).Type
_PAIR_TYPE = rt.UnpackEface(Pair{}).Type
)
func (self *Node) setCapAndLen(cap int, len int) {
if self.t == native.V_ARRAY || self.t == native.V_OBJECT || self.t == V_ARRAY_RAW || self.t == V_OBJECT_RAW {
self.v = int64(len&_LEN_MASK | cap<<_CAP_BITS)
} else {
panic("value does not have a length")
}
}
func (self *Node) unsafe_next() *Node {
return (*Node)(unsafe.Pointer(uintptr(unsafe.Pointer(self)) + _NODE_SIZE))
}
func (self *Pair) unsafe_next() *Pair {
return (*Pair)(unsafe.Pointer(uintptr(unsafe.Pointer(self)) + _PAIR_SIZE))
}
func (self *Node) must(t native.ValueType, s string) {
if self.t == V_RAW {
*self = self.parseRaw()
}
if self.t&MASK_RAW != t {
panic("value cannot be represented as " + s)
}
}
func (self *Node) bound(i int) {
if i < 0 || i >= self.Len() {
panic("list index out of range")
}
}
func (self *Node) nodeAt(i int) *Node {
return (*Node)(unsafe.Pointer(uintptr(self.p) + uintptr(i)*_NODE_SIZE))
}
func (self *Node) pairAt(i int) *Pair {
return (*Pair)(unsafe.Pointer(uintptr(self.p) + uintptr(i)*_PAIR_SIZE))
}
func (self *Node) findKey(key string) *Node {
if self.mapped() {
if n, ok := (self.m)[key]; !ok {
return nil
} else {
return (*Node)(n)
}
}
nb := self.Len()
if nb <= 0 {
return nil
}
var p *Pair
if !self.IsRaw() {
p = (*Pair)(self.p)
} else {
s := (*parseObjectStack)(self.p)
p = &s.v[0]
}
if p.Key == key {
return &p.Value
}
for i := 1; i < nb; i++ {
p = p.unsafe_next()
if p.Key == key {
return &p.Value
}
}
/* not found */
return nil
}
func (self *Node) mapped() bool {
if self.m != nil && len(self.m) == self.Len() {
return true
}
if self.Len() > _MAP_THRESHOLD {
self.appendMap()
return true
}
return false
}
func (self *Node) appendMap() {
nb := self.Len()
nm := len(self.m)
if nb <= nm {
return
}
if self.m == nil {
self.m = make(map[string]unsafe.Pointer, nb)
}
var p *Pair
if !self.IsRaw() {
p = self.pairAt(nm)
} else {
s := (*parseObjectStack)(self.p)
p = &s.v[nm]
}
(self.m)[p.Key] = unsafe.Pointer(&p.Value)
for i := nm + 1; i < nb; i++ {
p = p.unsafe_next()
(self.m)[p.Key] = unsafe.Pointer(&p.Value)
}
}
func (self *Node) loadAllIndex() {
if !self.IsRaw() {
return
}
var err native.ParsingError
stack := (*parseArrayStack)(self.p)
parser := &stack.parser
old := parser.noLazy
parser.noLazy = true
*self, err = parser.decodeArray(stack.v)
if err != 0 {
panic(fmt.Sprintf("%s at position %d", err, stack.parser.Pos()))
}
parser.noLazy = old
}
func (self *Node) loadAllKey() {
if !self.IsRaw() {
return
}
var err native.ParsingError
stack := (*parseObjectStack)(self.p)
parser := &stack.parser
old := parser.noLazy
parser.noLazy = true
*self, err = parser.decodeObject(stack.v)
if err != 0 {
panic(fmt.Sprintf("%s at position %d", err, stack.parser.Pos()))
}
parser.noLazy = old
}
func (self *Node) loadIndex(index int) *Node {
if !self.IsRaw() {
self.bound(index)
return self.nodeAt(index)
}
nb := self.Len()
if nb > index {
return self.nodeAt(index)
}
// lazy load
for last, err := self.loadNextNode(); last != nil; {
if err != 0 {
panic(fmt.Sprintf("%s at index %d", err, nb-1))
}
if self.Len() > index {
return last
}
last, err = self.loadNextNode()
}
return nil
}
func (self *Node) loadNextNode() (*Node, native.ParsingError) {
stack := (*parseArrayStack)(self.p)
ret := stack.v
parser := &stack.parser
sp := parser.p
ns := len(parser.s)
/* check for EOF */
if parser.p = parser.lspace(sp); parser.p >= ns {
return nil, native.ERR_EOF
}
/* check for empty array */
if parser.s[parser.p] == ']' {
parser.p++
self.setArray(ret)
return nil, 0
}
var val Node
var err native.ParsingError
/* decode the value */
parser.noLazy = true
if val, err = parser.Parse(); err != 0 {
return nil, err
}
parser.noLazy = false
/* add the value to result */
ret = append(ret, val)
parser.p = parser.lspace(parser.p)
/* check for EOF */
if parser.p >= ns {
return &ret[len(ret)-1], native.ERR_EOF
}
/* check for the next character */
switch parser.s[parser.p] {
case ',':
parser.p++
self.setRawArray(parser, ret)
return &ret[len(ret)-1], 0
case ']':
parser.p++
self.setArray(ret)
return &ret[len(ret)-1], 0
default:
return &ret[len(ret)-1], native.ERR_INVALID_CHAR
}
}
func (self *Node) loadKey(key string) *Node {
node := self.findKey(key)
if node != nil || !self.IsRaw() {
return node
}
// lazy load
for last, err := self.loadNextPair(); last != nil; {
if err != 0 {
panic(fmt.Sprintf("%s at key %s", err, last.Key))
}
if last.Key == key {
return &last.Value
}
last, err = self.loadNextPair()
}
return nil
}
func (self *Node) loadNextPair() (*Pair, native.ParsingError) {
stack := (*parseObjectStack)(self.p)
ret := stack.v
parser := &stack.parser
sp := parser.p
ns := len(parser.s)
/* check for EOF */
if parser.p = parser.lspace(sp); parser.p >= ns {
return nil, native.ERR_EOF
}
/* check for empty object */
if parser.s[parser.p] == '}' {
parser.p++
self.setObject(ret)
return nil, 0
}
/* decode one pair */
var val Node
var njs native.JsonState
var err native.ParsingError
/* decode the key */
if njs = parser.decodeValue(); njs.Vt != native.V_STRING {
return nil, native.ERR_INVALID_CHAR
}
/* extract the key */
idx := parser.p - 1
key := parser.s[njs.Iv:idx]
/* check for escape sequence */
if njs.Ep != -1 {
if key, err = UnquoteString(key); err != 0 {
return nil, err
}
}
/* expect a ':' delimiter */
if err = parser.delim(); err != 0 {
return nil, err
}
/* decode the value */
parser.noLazy = true
if val, err = parser.Parse(); err != 0 {
return nil, err
}
parser.noLazy = false
/* add the value to result */
ret = append(ret, Pair{Key: key, Value: val})
parser.p = parser.lspace(parser.p)
/* check for EOF */
if parser.p >= ns {
return &ret[len(ret)-1], native.ERR_EOF
}
/* check for the next character */
switch parser.s[parser.p] {
case ',':
parser.p++
self.setRawObject(parser, ret)
return &ret[len(ret)-1], 0
case '}':
parser.p++
self.setObject(ret)
return &ret[len(ret)-1], 0
default:
return &ret[len(ret)-1], native.ERR_INVALID_CHAR
}
}
func (self *Node) toGenericArray() []interface{} {
nb := self.Len()
ret := make([]interface{}, nb)
if nb == 0 {
return ret
}
/* convert each item */
var p = (*Node)(self.p)
ret[0] = p.Interface()
for i := 1; i < nb; i++ {
p = p.unsafe_next()
ret[i] = p.Interface()
}
/* all done */
return ret
}
func (self *Node) toGenericObject() map[string]interface{} {
nb := self.Len()
ret := make(map[string]interface{}, nb)
if nb == 0 {
return ret
}
/* convert each item */
var p = (*Pair)(self.p)
ret[p.Key] = p.Value.Interface()
for i := 1; i < nb; i++ {
p = p.unsafe_next()
ret[p.Key] = p.Value.Interface()
}
/* all done */
return ret
}
/** Internal Factory Methods **/
var (
nullNode = Node{t: native.V_NULL}
trueNode = Node{t: native.V_TRUE}
falseNode = Node{t: native.V_FALSE}
emptyArrayNode = Node{t: native.V_ARRAY}
emptyObjectNode = Node{t: native.V_OBJECT}
)
func newInt64(v int64) Node {
return Node{
v: v,
t: native.V_INTEGER,
}
}
func newBytes(v []byte) Node {
return Node{
t: native.V_STRING,
p: mem2ptr(v),
v: int64(len(v)),
}
}
func newString(v string) Node {
return Node{
t: native.V_STRING,
p: str2ptr(v),
v: int64(len(v)),
}
}
func newFloat64(v float64) Node {
return Node{
t: native.V_DOUBLE,
v: f64toi(v),
}
}
func newArray(v []Node) Node {
return Node{
t: native.V_ARRAY,
v: int64(len(v)&_LEN_MASK | cap(v)<<_CAP_BITS),
p: *(*unsafe.Pointer)(unsafe.Pointer(&v)),
}
}
func (self *Node) setArray(v []Node) {
self.t = native.V_ARRAY
self.setCapAndLen(cap(v), len(v))
self.p = *(*unsafe.Pointer)(unsafe.Pointer(&v))
}
func newObject(v []Pair) Node {
return Node{
t: native.V_OBJECT,
v: int64(len(v)&_LEN_MASK | cap(v)<<_CAP_BITS),
p: *(*unsafe.Pointer)(unsafe.Pointer(&v)),
}
}
func (self *Node) setObject(v []Pair) {
self.t = native.V_OBJECT
self.setCapAndLen(cap(v), len(v))
self.p = *(*unsafe.Pointer)(unsafe.Pointer(&v))
}
type parseObjectStack struct {
parser Parser
v []Pair
}
type parseArrayStack struct {
parser Parser
v []Node
}
func newRawArray(p *Parser, v []Node) Node {
s := new(parseArrayStack)
s.parser = *p
s.v = v
return Node{
t: V_ARRAY_RAW,
v: int64(len(v)&_LEN_MASK | cap(v)<<_CAP_BITS),
p: unsafe.Pointer(s),
}
}
func (self *Node) setRawArray(p *Parser, v []Node) {
s := new(parseArrayStack)
s.parser = *p
s.v = v
self.t = V_ARRAY_RAW
self.setCapAndLen(cap(v), len(v))
self.p = (unsafe.Pointer)(s)
}
func newRawObject(p *Parser, v []Pair) Node {
s := new(parseObjectStack)
s.parser = *p
s.v = v
return Node{
t: V_OBJECT_RAW,
v: int64(len(v)&_LEN_MASK | cap(v)<<_CAP_BITS),
p: unsafe.Pointer(s),
}
}
func (self *Node) setRawObject(p *Parser, v []Pair) {
s := new(parseObjectStack)
s.parser = *p
s.v = v
self.t = V_OBJECT_RAW
self.setCapAndLen(cap(v), len(v))
self.p = (unsafe.Pointer)(s)
}
func newRawNode(str string) Node {
return Node{
t: V_RAW,
p: str2ptr(str),
v: int64(len(str)),
}
}
func (self *Node) parseRaw() Node {
raw := self.Raw()
n, e := NewParser(raw).Parse()
if e != 0 {
panic(fmt.Sprintf("%v, raw json: %s", e.Error(), raw))
}
return n
}