AST.hxx File Reference

#include <string>
#include <vector>
#include <stdint.h>
#include "libsherpa/EnumSet.hxx"
#include "libsherpa/INOstream.hxx"
#include "FQName.hxx"
#include "LitValue.hxx"
#include "Environment.hxx"
#include "debug.hxx"
#include "shared_ptr.hxx"
#include <libsherpa/LexLoc.hxx>
#include <libsherpa/LToken.hxx>

Go to the source code of this file.

Data Structures

struct  EnvSet
 Set of environments associated with a given AST node. More...
class  AST

Defines

#define AST_SMART_PTR   boost::shared_ptr
#define AST_SUPERCLASS   boost::enable_shared_from_this<AST>
#define AST_LOCATION_TYPE   sherpa::LexLoc
#define AST_TOKEN_TYPE   sherpa::LToken
#define AST_SMART_PTR   boost::shared_ptr
#define AST_SUPERCLASS   boost::enable_shared_from_this<AST>
#define AST_LOCATION_TYPE   sherpa::LexLoc
#define AST_TOKEN_TYPE   sherpa::LToken

Typedefs

typedef sherpa::EnumSet< AstFlagValuesAstFlags
typedef sherpa::EnumSet< PrintFlagValuesPrintFlags
typedef sherpa::EnumSet< PrettyPrintFlagValuesPrettyPrintFlags

Enumerations

enum  IdentType {
  id_unresolved, id_tvar, id_union, id_struct,
  id_object, id_typeclass, id_tcmethod, id_field,
  id_method, id_interface, id_value, id_ucon,
  id_ucon0, id_block, idc_type, idc_FIRST_CATEGORY = idc_type,
  idc_value, idc_uctor, idc_ctor, idc_apply,
  idc_usesel_lhs
}
 Different classifications of identifiers that we might encounter. More...
enum  AstFlagValues {
  NO_FLAGS = 0, ID_IS_GLOBAL = 0x00000001u, ID_IS_GENSYM = 0x00000002u, SELF_TAIL = 0x00000004u,
  LB_IS_DUMMY = 0x00000008u, LB_POSTPONED = LB_IS_DUMMY, ID_IS_PRIVATE = 0x00000010u, DEF_IS_ENTRYPT = 0x00000020u,
  LB_REC_BIND = 0x00000040u, DEF_IS_EXTERNAL = 0x00000080u, ENUM_UN = 0x00000100u, SINGLE_LEG_UN = 0x00000200u,
  CARDELLI_UN = 0x00000400u, NULLABLE_UN = 0x00000800u, ID_FOR_USWITCH = 0x00001000u, ID_IS_CAPTURED = 0x00002000u,
  ID_IS_CLOSED = 0x00004000u, ID_NEEDS_HEAPIFY = 0x00008000u, ID_IS_MUTATED = 0x00010000u, ID_MUT_CLOSED = 0x00020000u,
  PROCLAIM_IS_INTERNAL = 0x00040000u, DEF_IS_TRIVIAL_INIT = 0x00080000u, IDENT_MANGLED = 0x00100000u, LOCAL_NOGEN_VAR = 0x00200000u,
  LBS_PROCESSED = 0x00400000u, ID_OBSERV_DEF = 0x00800000u, TVAR_POLY_SPECIAL = 0x01000000u, UNION_IS_REPR = 0x02000000u,
  FLD_IS_DISCM = 0x04000000u, LAM_NEEDS_TRANS = 0x08000000u, INNER_REF_NDX = 0x10000000u, ARG_BYREF = 0x20000000u,
  MASK_FLAGS_FROM_USE = (DEF_IS_ENTRYPT|ID_IS_CAPTURED)
}
enum  PrintFlagValues { pf_NONE, pf_IMPLIED, pf_PARENWRAP }
enum  PrettyPrintFlagValues {
  pp_NONE, pp_ShowTypes, pp_LitValues, pp_FinalNewline,
  pp_Raw, pp_InLayoutBlock
}
enum  AstType {
  at_Null, at_AnyGroup, at_ident, at_ifident,
  at_usesel, at_boolLiteral, at_charLiteral, at_intLiteral,
  at_floatLiteral, at_stringLiteral, at_module, at_interface,
  at_defunion, at_declunion, at_defstruct, at_declstruct,
  at_defobject, at_defrepr, at_declrepr, at_reprctrs,
  at_reprctr, at_reprrepr, at_boxedCat, at_unboxedCat,
  at_opaqueCat, at_oc_closed, at_oc_open, at_defexception,
  at_deftypeclass, at_tcdecls, at_tyfn, at_tcapp,
  at_method_decls, at_method_decl, at_qualType, at_constraints,
  at_definstance, at_tcmethods, at_tcmethod_binding, at_proclaim,
  at_define, at_recdef, at_importAs, at_provide,
  at_import, at_ifsel, at_declares, at_declare,
  at_tvlist, at_constructors, at_constructor, at_fields,
  at_field, at_fill, at_methdecl, at_bitfieldType,
  at_byRefType, at_arrayRefType, at_boxedType, at_unboxedType,
  at_fn, at_methType, at_primaryType, at_fnargVec,
  at_arrayType, at_vectorType, at_mutableType, at_constType,
  at_typeapp, at_exceptionType, at_fieldType, at_dummyType,
  at_identPattern, at_typeAnnotation, at_mixfix, at_unit,
  at_suspend, at_sizeof, at_bitsizeof, at_MakeVector,
  at_vector, at_array, at_begin, at_select,
  at_fqCtr, at_sel_ctr, at_array_nth, at_array_ref_nth,
  at_vector_nth, at_nth, at_lambda, at_argVec,
  at_apply, at_struct_apply, at_object_apply, at_ucon_apply,
  at_if, at_when, at_unless, at_and,
  at_or, at_cond, at_cond_legs, at_cond_leg,
  at_condelse, at_setbang, at_deref, at_dup,
  at_inner_ref, at_allocREF, at_copyREF, at_mkClosure,
  at_setClosure, at_mkArrayRef, at_labeledBlock, at_return_from,
  at_uswitch, at_usw_legs, at_usw_leg, at_otherwise,
  at_try, at_throw, at_let, at_letbindings,
  at_letbinding, at_letrec, at_loop, at_loopbindings,
  at_loopbinding, at_looptest, at_localFrame, at_frameBindings,
  at_letStar, at_identList, at_container, at_docString,
  at_letGather, agt_var, agt_uselhs, agt_literal,
  agt_tvar, agt_CompilationUnit, agt_definition, agt_type_definition,
  agt_tc_definition, agt_value_definition, agt_if_definition, agt_category,
  agt_openclosed, agt_fielditem, agt_qtype, agt_type,
  agt_expr, agt_expr_or_define, agt__AnonGroup0, agt_eform,
  agt_ucon, agt_ow
}
enum  { at_NUM_ASTTYPE = agt_ow }

Functions

std::string identTypeToString (IdentType id)
 Produces a printable string corresponding to the values in the above enumeration.


Define Documentation

#define AST_LOCATION_TYPE   sherpa::LexLoc

Definition at line 762 of file AST.hxx.

#define AST_LOCATION_TYPE   sherpa::LexLoc

Definition at line 762 of file AST.hxx.

#define AST_SMART_PTR   boost::shared_ptr

Definition at line 56 of file AST.hxx.

#define AST_SMART_PTR   boost::shared_ptr

Definition at line 56 of file AST.hxx.

#define AST_SUPERCLASS   boost::enable_shared_from_this<AST>

Definition at line 57 of file AST.hxx.

#define AST_SUPERCLASS   boost::enable_shared_from_this<AST>

Definition at line 57 of file AST.hxx.

#define AST_TOKEN_TYPE   sherpa::LToken

Definition at line 767 of file AST.hxx.

#define AST_TOKEN_TYPE   sherpa::LToken

Definition at line 767 of file AST.hxx.


Typedef Documentation

typedef sherpa::EnumSet<AstFlagValues> AstFlags

Definition at line 478 of file AST.hxx.

typedef sherpa::EnumSet<PrettyPrintFlagValues> PrettyPrintFlags

Definition at line 524 of file AST.hxx.

typedef sherpa::EnumSet<PrintFlagValues> PrintFlags

Definition at line 494 of file AST.hxx.


Enumeration Type Documentation

anonymous enum

Enumerator:
at_NUM_ASTTYPE 

Definition at line 752 of file AST.hxx.

enum AstFlagValues

Enumerator:
NO_FLAGS 
ID_IS_GLOBAL  Identifier is bound in type-level scope.
ID_IS_GENSYM  Identifier was internally generated by the compiler.
SELF_TAIL  Set in the tail recursion analysis pass to indicate that a use-occurrence of an identifier is a reference to the function currently being defined. Consulted in the SSA pass to add an LB_IS_DUMMY marker to the emitted let binding. Consulted in the gen-c pass to determine when looping rather than recursion should be used.
LB_IS_DUMMY  The SSA pass constructs some dummy let bindings. This is used to mark them so that no assignment for them will later be emitted in the code generator.

The SSA pass uses a trick to convert expression style code into statement style code. For example, in the expression (if e1 e2 e3), if E1, E2 and E3 are SSA converted forms of e1, e2 and e3 respectively, the SSA converter will produce

(let* ((temp ((if E1 (let* ((temp E2)) temp) (let* ((temp E3)) temp))))) ... rest of the code will use temp as the value of if-expr ...

Here, for the BitC resolver's point of view, the inner temp is different from the outer temp. So, it will resolve and type check corectly. The outer temp will have the result of the appropriate inner temp.

From the C code generator's point of view, we will ignore the outer let* binding, since we cannot bind the result of if-statements. This outer let* binding is therefore marked LB_IS_DUMMY. The code generator will only declare one local variable temp, and assign the result of the correct brach to it. The rest of the code after the if statement is fine since it just knows to use the name temp, which now stores the correct value.

LB_POSTPONED  Do not emit an assignment for this let binding. This is a special case similar to LB_IS_DUMMY. It is generated for like array, vector, etc., which should be ignored by the code generator, as the initialization will follow in a loop.
ID_IS_PRIVATE  Indicates a global identifier that is not exported from its defining unit of compilation. These are emitted by the back end with a static marker to enable later optimization by the C compiler.
DEF_IS_ENTRYPT  Marks a definition that is an external program entry point, and therefore a seed for polyinstantiation.
LB_REC_BIND  Marks a let binding as a member of a letrec. This is consulted in Symtab.cxx and TypeInfer.cxx to determine whether the bound identifier should be bound early or late. If it were someday useful, this could be eliminated by introducing a distinct at_letrecbinding.
DEF_IS_EXTERNAL  Marks a BitC identifier as already having an external name, which should be used during code generation in place of the BitC name.

This flag is not redundant in light of the externalName field. DEF_IS_EXTERNAL is marked for all external definitions, whether it has an external name or not only some external definitions have a external-name.

ENUM_UN  Used to mark a union consisting exclusively of constant legs, which is really an enum declaration. Marking is performed in TypeInfer, and is consulted in Type-size.cxx.

Bug:
Regrettably our current handling of enumerations is inadequate, because it doesn't allow us to specify the actual enumeration values. We are therefore going to have to introduce a defenum at some point.
SINGLE_LEG_UN  Marks a union having only a single leg.

Bug:
It seems to me that a union having only one leg should be syntactically rejected, in which case this flag should never arise. Is there some USEFUL counter-example?
Note:
If we reject single legged unions, this goes away, it is just maintained for completeness.
CARDELLI_UN  Marks a union that is subject to one of the required Cardelli optimizations.
NULLABLE_UN  Marks the NULLABLE union, which has a special representation known to the code generator.
ID_FOR_USWITCH  Marks the place-holder identifier that is introduced in the at_usw_leg AST to replicate the (switch ...) temporary identifier.

This flag ensures that an identifier that stores a de-constructed value in a switch statement can only appear on the left of a select (.) operator. The specification enforces this rule so that the de-constructed value does not escape as a whole (as a return value, by assignment or closure).

ID_IS_CAPTURED  Identifier is closed over by something.
ID_IS_CLOSED  Marks identifiers that are closed over by some lambda.

Only use occurences that actually lie within an enclosing lambda are marked with this flag.

ID_NEEDS_HEAPIFY  ID must be moved to the heap due to capture.
ID_IS_MUTATED  Mark if an identifier is shalowly mutated in the local context.

The flag is set if the identifier is a target of a set!. This information is used in heuristic type inference.

ID_MUT_CLOSED  Mark if the identifier's ID_IS_MUTATED flag can still be updated.

This flag ensures the global identifiers' mutability is not affected by usage beyond its definition. In the case of global definitions, the identifier's mutability is marked closed, at the end of its definition.

PROCLAIM_IS_INTERNAL  Marked on proclaimations generated by the compiler (ex: during closure conversion). The symbol resolver warns about local proclaimations in source modules without definitions. The flag indicates that such warnings must not be produced for compiler generated proclaimations.
DEF_IS_TRIVIAL_INIT  Initialization for this def can be implemented directly by the C compiler.

Set in the SSA pass to identify trivial initializers so that the code generator can avoid adding code in the per-UoC init procedure.

IDENT_MANGLED  Identifier has already been mangled.

Used in the instantiator to indicate identifiers that have already been mangled and should not be mangled a second time.

Bug:
I don't understand why this is needed. The assignment of a mangled name is, in effect, the assignment of an externalName. I would think that it would make sense to place the mangled name into the externalName field and check that, removing this flag. That works equally well when a pre-existing external name has been assigned, because we must not mangle those. Is this merely a different choice of implemenation approach? Would the approach that I am outlining work?
Note:
The instantiator can be made to work by using the approach you are suggesting. However, using this flag keeps the name mangling on the 's' field more regular. This is because, the 's' field of all definitions are mangled with a canonical encoding of their type. If we employ the rule that having an external name singifies that the identifier has already been mangled, the 's' field of definitions for which the programmer provided an external name will not be mangled at all. However, this still does not lead to an error because of name collision. We currently have a rule that definitions with programmer specified external names cannot be polymorphic, and will therefore be instantiated at most once.
LOCAL_NOGEN_VAR  Local, non-generalizable variable.

Marked for variables defined at non-generalizable boundaries. ex: Lambda parameters, identifiers defined at switch/catch, etc.

Local generalizable variables must be handled by creating a new let-binding with all concrete instantiations. Non-generalizable locals like lambda-parameters, identifier at switch, catch, do, etc. can be trivially handled by performing a name change to a canonical one.

LBS_PROCESSED  Used internally to track variable scoping.

This flag is used in determining the outermost let-binding at which a type variable used (scoped). The actual pointer to the let-binding is preset in tvarLB field.

This flag is set on at_letbinding once we are done processing it in the symbol resolver. When we encounter the use of a type variable, if we have finished processing the let-binding named by its tvarLB field, then we know that the scope of the variable is actually bigger, and thus update it.

ID_OBSERV_DEF  Identifier is observably defined.
TVAR_POLY_SPECIAL  Type variable that was temporarily created by the polyinstantiator.

The symbol resolver accepts type variables as defining occurences within expressions only at certain positions (when called with NEW_TV_OK). For example, this is legal within value definitions, but type variables not identified as arguments to type definitions are legal within the type definition itself. This flag marks type variables created by the polyinstantiator that must not be subject to the NEW_TV_OK check by the resolver, since the Instantiator makes some quasi RandTs in which this restriction might temporaroly not hold.

UNION_IS_REPR  Indicates that this DEFUNION is actually a DEFREPR that was converted to a union by the reprSimp pass.
FLD_IS_DISCM  Indicates a field that is a union discriminator (tag) field.
LAM_NEEDS_TRANS  Marked on top-level at_define.

Indicates that this is a hoisted lambda for a function that has a captured closure, and we therefore need to emit a transition function.

INNER_REF_NDX  Marks whether the inner_ref is indexing or selecting.

When set, it is an indexing inner_ref that is of the form (inner_ref (dup (array ..))) or (inner_ref (vector ... ))

When clear, it is a selecting inner_ref that is of the form (inner_ref (dup (structv ... ))) or (inner_red (structr ... ))

These two types of inner-ref must be handled differently in most passes. For example, the index argument must be independently resolved and type checked but the field argumet must not.

ARG_BYREF  Parameter is by-reference.

Set in the parser to indicate that a parameter identifier is by-reference. Consulted in gen-c.cxx to determine how the corresponding C parameter should be emitted.

In theory, the by-ref-ness is a part of the type, and must be obtainable from the identifier's type. However, the implementation does not encode by-ref this way. The by-ref is noted

  • On the components of a function type
  • As an AST flag on the identifier AST.

Encoding by-ref in this way lends itself to cleaner implementation. If by-ref were to be a type constructor, we must look beyond the by-ref constructor at every r-value usage of the identifier.

MASK_FLAGS_FROM_USE  Set of ast flag values that must be masked from definitions when copying to use cases.

Definition at line 195 of file AST.hxx.

enum AstType

Enumerator:
at_Null 
at_AnyGroup 
at_ident 
at_ifident 
at_usesel 
at_boolLiteral 
at_charLiteral 
at_intLiteral 
at_floatLiteral 
at_stringLiteral 
at_module 
at_interface 
at_defunion 
at_declunion 
at_defstruct 
at_declstruct 
at_defobject 
at_defrepr 
at_declrepr 
at_reprctrs 
at_reprctr 
at_reprrepr 
at_boxedCat 
at_unboxedCat 
at_opaqueCat 
at_oc_closed 
at_oc_open 
at_defexception 
at_deftypeclass 
at_tcdecls 
at_tyfn 
at_tcapp 
at_method_decls 
at_method_decl 
at_qualType 
at_constraints 
at_definstance 
at_tcmethods 
at_tcmethod_binding 
at_proclaim 
at_define 
at_recdef 
at_importAs 
at_provide 
at_import 
at_ifsel 
at_declares 
at_declare 
at_tvlist 
at_constructors 
at_constructor 
at_fields 
at_field 
at_fill 
at_methdecl 
at_bitfieldType 
at_byRefType 
at_arrayRefType 
at_boxedType 
at_unboxedType 
at_fn 
at_methType 
at_primaryType 
at_fnargVec 
at_arrayType 
at_vectorType 
at_mutableType 
at_constType 
at_typeapp 
at_exceptionType 
at_fieldType 
at_dummyType 
at_identPattern 
at_typeAnnotation 
at_mixfix 
at_unit 
at_suspend 
at_sizeof 
at_bitsizeof 
at_MakeVector 
at_vector 
at_array 
at_begin 
at_select 
at_fqCtr 
at_sel_ctr 
at_array_nth 
at_array_ref_nth 
at_vector_nth 
at_nth 
at_lambda 
at_argVec 
at_apply 
at_struct_apply 
at_object_apply 
at_ucon_apply 
at_if 
at_when 
at_unless 
at_and 
at_or 
at_cond 
at_cond_legs 
at_cond_leg 
at_condelse 
at_setbang 
at_deref 
at_dup 
at_inner_ref 
at_allocREF 
at_copyREF 
at_mkClosure 
at_setClosure 
at_mkArrayRef 
at_labeledBlock 
at_return_from 
at_uswitch 
at_usw_legs 
at_usw_leg 
at_otherwise 
at_try 
at_throw 
at_let 
at_letbindings 
at_letbinding 
at_letrec 
at_loop 
at_loopbindings 
at_loopbinding 
at_looptest 
at_localFrame 
at_frameBindings 
at_letStar 
at_identList 
at_container 
at_docString 
at_letGather 
agt_var 
agt_uselhs 
agt_literal 
agt_tvar 
agt_CompilationUnit 
agt_definition 
agt_type_definition 
agt_tc_definition 
agt_value_definition 
agt_if_definition 
agt_category 
agt_openclosed 
agt_fielditem 
agt_qtype 
agt_type 
agt_expr 
agt_expr_or_define 
agt__AnonGroup0 
agt_eform 
agt_ucon 
agt_ow 

Definition at line 591 of file AST.hxx.

enum IdentType

Different classifications of identifiers that we might encounter.

An IdentType is assigned by the resolver to each identifier AST node. There are two types of constants in this enumeration. Constants whose names are of the form id_name are identifier classifications. Each of these describes a particular type of identifier. All identifier AST nodes wll be assigned such a concrete classification.

Constants whose names are of the form idc_name are identifier categories. These describe sets of classifications that are legal in a given occurrence context. For example, a normal use occurrence of an identifier in an expression might validly resolve to id_value (normal values), id_ucon0 (union constructors having no elements, which are enumerands), or id_tcmethod (use-references to type class method identifiers). The identifier categories are primarily used in the resolver's recursive descent pass, where they specify what identifiers are legal in what positions.

Enumerator:
id_unresolved  An identifier whose classification is not yet decided.

This is the constructor-time default value that is assigned to the identType field of the AST. It should not occur on any identifier AST after the symbol resolution pass, except in temporary ASTs that are introduced for expediency in various later passes and then resolved.

id_tvar  Type variables.
id_union  Union/repr name.
id_struct  Structure type name.
id_object  Object type name.
id_typeclass  Type class name.
id_tcmethod  Type class method name.
id_field  Structure or union constructor field name.
id_method  Structure or capsule method name.
id_interface  Locally bound name that references the exported subset of an imported interface.

This is the local name defined in this module, not the fully qualified name.

id_value  An identifier defined at define, let, lambda, do, switch, case.
id_ucon  Union constructor having one or more argument.
id_ucon0  Union constructor taking no arguments.
id_block  Label used in block/labeled escape.
idc_type  Type, which is one of id_tvar, id_union, or id_struct.
idc_FIRST_CATEGORY 
idc_value  Value, which is one of id_value, id_ucon0, or id_tcmethod.
idc_uctor  Union constructor, which is one of id_ucon</ucode> or id_ucon0.
idc_ctor  Constructor, which is one of id_struct, id_ucon, or id_ucon0.
idc_apply  An entity (value or constructor) that can be applied, which is one of id_value, id_tcmethod, id_method, id_struct, or id_ucon.
idc_usesel_lhs  Any identifier that can legally appear to the left of the dot in an at_usesel ast.

This can either be a local name denoting an interface (id_interface) or a local name denoting a structure type (id_struct).

id_interface id_struct id_method

Definition at line 89 of file AST.hxx.

enum PrettyPrintFlagValues

Enumerator:
pp_NONE 
pp_ShowTypes  Show types during pretty printing.
pp_LitValues  Show the computed literal value as well as the original.

This is mainly for debugging.

pp_FinalNewline  Emit a final newline.

Note this one does not recurse!

pp_Raw  Show everything - even stuff we added in.

This is used in the back end.

pp_InLayoutBlock  Parent context is a layout brace context, so no braces are needed for begin blocks.

Note this one does not recurse!

Definition at line 496 of file AST.hxx.

enum PrintFlagValues

Enumerator:
pf_NONE 
pf_IMPLIED  This AST was inserted by the parser, and did not originate as user input.

This typically appears where an at_begin node was introduced to convert a block into a single expression.

pf_PARENWRAP  In the block syntax, indicates an expression tree that must be pretty-printed within parenthesis to preserve operator precedence.

Definition at line 480 of file AST.hxx.


Function Documentation

std::string identTypeToString ( IdentType  id  ) 

Produces a printable string corresponding to the values in the above enumeration.

Definition at line 633 of file ASTimpl.cxx.

References id_block, id_field, id_interface, id_method, id_object, id_struct, id_tcmethod, id_tvar, id_typeclass, id_ucon, id_ucon0, id_union, id_unresolved, id_value, idc_apply, idc_ctor, idc_type, idc_uctor, idc_usesel_lhs, and idc_value.

Referenced by resolve(), XMLd(), and XMLp().


Generated on Sat Feb 4 23:59:29 2012 for BitC Compiler by  doxygen 1.4.7