Discussion Session 8: Type System

Compiler must terminate!

Whether interpreter terminates depends on the content of the program.

Discussion

  • Why does your compilation program terminate?

Type Theory

Background

Type theory was invented by Bertrand Russell as a solution to his own well-known paradox, which cast doubt on the foundations of mathematics upon its discovery. The problem posed by the paradox is essentially

Given a set of all sets that do not contain themselves, does that set contain itself?

Type theory resolves this issue by classifying the members of sets according to some type, and in such a system a set definition like this one is not permissible.

Nowadays, type systems are at the heart of the development of many modern programming languages. What is the benefit of a type system in a programming language? In the words of Benjamin Pierce [1]: A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute.

Discussion

  • What are erroneous behaviors? (Hint: For our machine language, what program can cause our “machine” to crash?)

Robin Milner [2] provided the following slogan to describe type safety:

Well-typed programs cannot “go wrong”.

Type Inference / Judgement / Derivation

All these names are referring to the same concept: How to assign types to each part of the abstract syntax. The corresponding rules are normally closely related to the syntax of the language.

Example

Let’s add one more production rule to the language.

expression ::=  if expression then expression else expression

What type inference rule shall we add?

Type Checking Algorithm

Let’s implement the type checking algorithm for our language with indexed string type.

Sample Program:

# function foo(string[6] x) {
#   return x + x;
# }
# x = "abc"
# y = "def"
# z = foo(x + y)

program = {"Function": [ {"Variable": ["foo"]} \
                       , {"String": [{"Number": [6]}]} \
                       , {"Variable": ["x"]} \
                       , {"Add": [ {"Variable": ["x"]} \
                                 , {"Variable": ["x"]}]} \
                       , {"Assign": [ {"Variable": ["x"]} \
                                    , {"String": ["abc"]} \
                                    , {"Assign": [ {"Variable": ["y"]} \
                                                 , {"String": ["def"]} \
                                                 , {"Assign": [ {"Variable": ["z"]} \
                                                              , {"Apply": [ {"Variable": ["foo"]} \
                                                                          , {"Add": [ {"Variable": ["x"]} \
                                                                                    , {"Variable": ["y"]} ]} ]} \
                                                              , "End" ]} ]} ]} ]}

Solution: check_sol.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
Node = dict
Leaf = str

def isStringType(ty):
    if ty == None:
        return False
    if ty == "Void":
        return False
    for label in ty:
        if label == "String":
            return True
        else: # Arrow
            return False 

def tyExpr(env, e):
   if type(e) == Node:
        for label in e:
            children = e[label]

            if label == "String":
                return {"String": [{"Number": [len(children[0])]}]}

            elif label == 'Variable':
                x = children[0]
                return env[x]

            elif label == "Concat":
                (e1, e2) = children
                tyE1 = tyExpr(env, e1)
                tyE2 = tyExpr(env, e2)
                if isStringType(tyE1) and isStringType(tyE2):
                    index1 = tyE1["String"][0]
                    index2 = tyE2["String"][0]
                    index = {"Add": [index1, index2]}
                    return {"String": [index]}

            elif label == 'Apply':
                 [f, eArg] = children
                 f = f['Variable'][0] # Unpack.
                 tyArg = tyExpr(env, eArg)
                 tyPara = env[f]['Arrow'][0]

                 if tyArg == tyPara:
                     return env[f]['Arrow'][1]
                 else:
                     print("not match")
                     print("tyArg is", tyArg)
                     print("tyPara is", tyPara)

                 if isStringType(tyArg) == False:
                     return None

                 tv0 = evalIndex(tyArg["String"][0])
                 tv1 = evalIndex(tyPara["String"][0])

                 if tv0 == tv1:
                     print("tv0 == tv1 ==", tv0)
                     return env[f]['Arrow'][1]
                 else:
                     print("not match")
                     print("tyArg evaluates to", tv0)
                     print("tyPara evaluates to", tv1)

        
def tyProg(env, s):
   if type(s) == Leaf:
        if s == 'End':
            return 'Void'
   elif type(s) == Node:
        for label in s:
            if label == 'Assign':
                [x,e,p] = s[label]
                x = x['Variable'][0] # Unpack.
                tExpr = tyExpr(env, e)
                env[x] = tExpr  # add to environment
                tProg = tyProg(env, p)
                if isStringType(tExpr) and tProg == 'Void': # checking
                    return 'Void'
            if label == 'Function':
                [f, tyArg, x, e, p] = s[label]
                if not isStringType(tyArg): # checking
                    print("type error, tyAry is", tyAry)
                    return None
                name = f['Variable'][0]
                x = x['Variable'][0]
                envF = env.copy()
                envF[x] = tyArg
                tBody = tyExpr(envF, e)
                if not isStringType(tBody): # checking
                    print("type error, tBody is", tBody)
                    return None
                env[name] = {'Arrow':[tyArg,tBody]}
                tProg = tyProg(env, p)
                if tProg == 'Void':
                    return tProg

def evalIndex(ti):
    for label in ti:
        if label == "Number":
            return ti[label][0]
        elif label == "Add":
            ti0 = ti[label][0]
            ti1 = ti[label][1]
            return evalIndex(ti0) + evalIndex(ti1)

def main():
    # function foo(string[6] x) {
    #   return x + x;
    # }
    # x = "abc"
    # y = "def"
    # z = foo(x + y)

    program = {"Function": [ {"Variable": ["foo"]} \
                           , {"String": [{"Number": [6]}]} \
                           , {"Variable": ["x"]} \
                           , {"Concat": [ {"Variable": ["x"]} \
                                        , {"Variable": ["x"]}]} \
                           , {"Assign": [ {"Variable": ["x"]} \
                                        , {"String": ["abc"]} \
                                        , {"Assign": [ {"Variable": ["y"]} \
                                                     , {"String": ["def"]} \
                                                     , {"Assign": [ {"Variable": ["z"]} \
                                                                  , {"Apply": [ {"Variable": ["foo"]} \
                                                                              , {"Concat": [ {"Variable": ["x"]} \
                                                                                           , {"Variable": ["y"]} ]} ]} \
                                                                  , "End" ]} ]} ]} ]}
    print(program)
    print()
    tyProg({}, program)


if __name__ == "__main__":
    main()

        

Bibliography

[1]Pierce, Benjamin C. (2002). Types and Programming Languages. MIT Press. ISBN 978-0-262-16209-8.
[2]Milner, Robin (1978), A Theory of Type Polymorphism in Programming, Jcss 17: 348–375.