%
% Elimination of common subexpressions
%

\begin{onlystandalone}
\documentstyle[pagesize, 12pt, literate]{article}
\begin{document}
\title{Elimination of common subexpressions}
\author{Olaf Chitil}
\maketitle
\end{onlystandalone}

\section{Exports and Imports}

\begin{code}
#include "HsVersions.h"

module CommonSubsElim (elimComSubs) where


-- various types and type synonyms:
import Pretty(PrettyRep)
import PprStyle(PprStyle)

import Bag(Bag)
import Util(Ord3(..))  -- class Ord3 
       -- class needed for Id's, TyVar's, Literal's, Unique's,..
import Outputable(Outputable(..)) 
       -- class needed for Id's, BinderInfo's, ...
import Name(NamedThing(..), Name, OrigName, RdrName) 
       -- class needed for Id's and TyVar's,.. and types
import UniqFM(Uniquable(..), UniqFM) 
       -- class needed for Id's, TyVar's, ...
import Literal(Literal)
import PrimOp(PrimOp)
import Unique(Unique)
import CostCentre(CostCentre)
import BinderInfo(BinderInfo)
import Type(SYN_IE(Type), GenType)
import Kind(Kind)
import TyVar(SYN_IE(TyVar), GenTyVar(..)) -- for instance of Ord
import Usage(SYN_IE(Usage), GenUsage, SYN_IE(UVar))

-- what is actually needed:
import CoreSyn
-- import FiniteMap (FiniteMap, emtpyFM, addToFM, lookupFM) -- doesn't work
import FiniteMap 
import Id(SYN_IE(Id), GenId, SYN_IE(IdEnv), nullIdEnv, addOneToIdEnv, 
          lookupIdEnv)
import CoreCmpModAlpha
\end{code}

Precedence of infix operators:

\begin{code}
infixr 9 `thenCSE`, `thenCSE_`
\end{code}


\subsection{Some instances of Eq and Ord}


Aim: equality and comparision for expressions modula alpha conversion:

\begin{code}
instance Eq (GenCoreExpr Id Id TyVar UVar) where
  x == y = case (cmpModAlphaExpr emptyAlpha x y) of { _EQ -> True; _ -> False }

instance Ord (GenCoreExpr Id Id TyVar UVar) where
  _tagCmp = cmpModAlphaExpr emptyAlpha
\end{code}


Problem: no instance of Ord for type variables, just instance of Ord3

\begin{code}
instance Ord (GenTyVar a) where
    _tagCmp (TyVar u1 _ _ _) (TyVar u2 _ _ _) = u1 `_tagCmp` u2  
\end{code}



\subsection{Data type @Substitution@ for everything that shall be replaced}


(Storage for all expressions which have a name and thus can be replaced)

\begin{code}
data Substitution = Substitution (FiniteMap CoreExpr Id) (IdEnv Id)

emptySubstitution :: Substitution
emptySubstitution = Substitution emptyFM nullIdEnv

addExprToSubstitution :: Substitution -> CoreExpr -> Id -> Substitution
addExprToSubstitution (Substitution namedExprs renamedIds) expr id
  = Substitution (addToFM namedExprs expr id) renamedIds

addIdToSubstitution :: Substitution -> Id -> Id -> Substitution
addIdToSubstitution (Substitution namedExprs renamedIds) oldId newId
  = Substitution namedExprs (addOneToIdEnv renamedIds oldId newId)

lookupExprInSubstitution :: Substitution -> CoreExpr -> Maybe Id
lookupExprInSubstitution (Substitution namedExprs renamedIds) expr
  = lookupFM namedExprs expr

lookupIdInSubstitution :: Substitution -> Id -> Maybe Id
lookupIdInSubstitution (Substitution namedExprs renamedIds) id
  = lookupIdEnv renamedIds id
\end{code}



\subsection{Common Subexpression Elimination Monad}

Keeps track of statistical data: number of eliminated subexpressions, replaced variables, and deleted bindings.

\begin{code}
data CSEM result = CSEM Int Int Int result

returnCSEM :: a -> CSEM a
returnCSEM result = CSEM 0 0 0 result

thenCSEM :: CSEM a -> (a -> CSEM b) -> CSEM b
thenCSEM (CSEM cE1 cI1 cB1 val1) step = CSEM (cE1+cE2) (cI1+cI2) (cB1+cB2) val2
  where
  CSEM cE2 cI2 cB2 val2 = step val1

thenCSEM_ :: CSEM a -> CSEM b -> CSEM b
thenCSEM_ (CSEM cE1 cI1 cB1 _) (CSEM cE2 cI2 cB2 val) 
  = CSEM (cE1+cE2) (cI1+cI2) (cB1+cB2) val

recordReplaceExpr :: CSEM ()
recordReplaceExpr = CSEM 1 0 0 ()

recordReplaceId :: CSEM ()
recordReplaceId = CSEM 0 1 0 ()

recordDeleteBinding :: CSEM ()
recordDeleteBinding = CSEM 0 0 1 ()

breakCSEM :: CSEM a -> (a, String)
breakCSEM (CSEM cE cI cB value) 
  = (value, "Eliminated subexpressions: " ++ show cE ++ "\n" 
         ++ "Replaced variables: " ++ show cI ++ "\n"
         ++ "Deleted bindings: " ++ show cB ++ "\n")
\end{code}


\subsection{Main function}

\begin{code}
elimComSubs :: [CoreBinding] -> ([CoreBinding], String)

elimComSubs bindings = (bindings', "Statistics:\n" ++ statistics ++ "\n")
  where 
  ((bindings', substitution'), statistics) 
    = breakCSEM (elimComSubsBindings emptySubstitution bindings)
\end{code}


\subsection{Try substitution}


\begin{code}
maybeSubstituteExpr :: Substitution -> CoreExpr -> CSEM CoreExpr

maybeSubstituteExpr substitution expr
  = case lookupExprInSubstitution substitution expr of
      Just id -> recordReplaceExpr `thenCSEM_` returnCSEM (Var id)
      Nothing -> returnCSEM (expr)
\end{code}


\begin{code}
maybeSubstituteId :: Substitution -> Id -> CSEM Id

maybeSubstituteId substitution id
  = case lookupIdInSubstitution substitution id of
      Just id' -> recordReplaceId `thenCSEM_` returnCSEM id'
      Nothing  -> returnCSEM (id)
\end{code}


\subsection{Top level bindings (a program)}

\begin{code}
elimComSubsBindings :: Substitution -> [CoreBinding] 
                    -> CSEM ([CoreBinding], Substitution)

elimComSubsBindings substitution []
  = returnCSEM ([], substitution)
elimComSubsBindings substitution (binding : bindings)
  = elimComSubsBinding substitution binding `thenCSEM` \result ->
    case result of
      (Nothing, substitution1) 
        -> elimComSubsBindings substitution1 bindings
      (Just procBinding, substitution1)
        -> elimComSubsBindings substitution1 bindings `thenCSEM` 
           \(procBindings, substitution2) ->
           returnCSEM (procBinding:procBindings, substitution2)      
\end{code}


\subsection{Binding}

Bindings of variables to expressions.

\begin{code}
elimComSubsBinding :: Substitution -> CoreBinding 
		    -> CSEM (Maybe CoreBinding, Substitution)

elimComSubsBinding substitution binding@(NonRec valBdr expr)
  = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
    case procExpr of
      Var id -> recordDeleteBinding `thenCSEM_`
                returnCSEM ( Nothing -- delete this binding
                           , addIdToSubstitution substitution valBdr id)
      _      -> returnCSEM ( Just (NonRec valBdr procExpr)
                           , addExprToSubstitution substitution procExpr valBdr)
elimComSubsBinding substitution (Rec bindings)
  = elimComSubsPairs substitution bindings `thenCSEM` \procBindings ->
    returnCSEM ( Just (Rec procBindings)
               , extendSubstitutionForBindingPairs substitution procBindings)
\end{code}


\begin{code}
extendSubstitutionForBindingPairs :: Substitution -> [(Id, CoreExpr)]
                                  -> Substitution

extendSubstitutionForBindingPairs substitution bindingPairs
  = foldl add substitution bindingPairs
    where
    add :: Substitution -> (Id, CoreExpr) -> Substitution
    add substitution (valBdr, expr) 
      = addExprToSubstitution substitution expr valBdr
\end{code}



\subsection{Pairs of bindings}

\begin{code}
elimComSubsPairs :: Substitution -> [(Id, CoreExpr)] -> CSEM [(Id, CoreExpr)]

elimComSubsPairs substitution []
  = returnCSEM []
elimComSubsPairs substitution ((fst, expr) : pairs)
  = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
    elimComSubsPairs substitution pairs `thenCSEM` \procPairs ->
    returnCSEM ((fst, procExpr) : procPairs)
\end{code}


\subsection{Core Expressions}

\begin{code}
elimComSubsExpr :: Substitution -> CoreExpr -> CSEM CoreExpr

elimComSubsExpr substitution expr@(Var id) 
  -- replace variable which no longer exists (binding was removed)
  = maybeSubstituteId substitution id `thenCSEM` \procId ->
    returnCSEM (Var procId)
elimComSubsExpr substitution expr@(Lit _) = returnCSEM expr
elimComSubsExpr substitution (Con conVar args)
  -- this transformation is already done by the simplifier: 
  -- reuse of constructors
  = elimComSubsArgs substitution args `thenCSEM` \procArgs ->
    maybeSubstituteExpr substitution (Con conVar procArgs)
elimComSubsExpr substitution (Prim primOp args) 
  = elimComSubsArgs substitution args `thenCSEM` \procArgs ->
    maybeSubstituteExpr substitution (Prim primOp procArgs)
elimComSubsExpr substitution (Lam binder expr)
  = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
    maybeSubstituteExpr substitution (Lam binder procExpr)
elimComSubsExpr substitution (App fun arg)
  = elimComSubsExpr substitution fun `thenCSEM` \procFun ->
    elimComSubsArg substitution arg `thenCSEM` \procArg ->
    maybeSubstituteExpr substitution (App procFun procArg)
elimComSubsExpr substitution (Case expr alts)
  = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
    elimComSubsCaseAlts substitution alts `thenCSEM` \procAlts ->
    maybeSubstituteExpr substitution (Case procExpr procAlts) 
elimComSubsExpr substitution (Let binder expr)
  = elimComSubsBinding substitution binder `thenCSEM` \result ->
    case result of
      (Nothing, substitution') ->  elimComSubsExpr substitution' expr
      (Just procBinder, substitution') ->
        elimComSubsExpr substitution' expr `thenCSEM` \procExpr ->
        maybeSubstituteExpr substitution (Let procBinder procExpr)
elimComSubsExpr substitution (SCC costCentre expr)
  = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
    maybeSubstituteExpr substitution (SCC costCentre procExpr)
elimComSubsExpr substitution (Coerce coercion typeExpr expr)
  = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
    maybeSubstituteExpr substitution (Coerce coercion typeExpr procExpr)
\end{code}


\subsection{Case alternatives}

\begin{code}
elimComSubsCaseAlts :: Substitution -> CoreCaseAlts -> CSEM CoreCaseAlts

elimComSubsCaseAlts substitution (AlgAlts alts deflt) 
  = elimComSubsTriples substitution alts `thenCSEM` \procAlts ->
    elimComSubsCaseDefault substitution deflt `thenCSEM` \procDeflt ->
    returnCSEM (AlgAlts procAlts procDeflt)
    where
    elimComSubsTriples :: Substitution -> [(a, b, CoreExpr)] 
                       -> CSEM [(a, b, CoreExpr)]
    elimComSubsTriples substitution []
      = returnCSEM []
    elimComSubsTriples substitution ((fst, snd, expr) : triples)
      = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
        elimComSubsTriples substitution triples `thenCSEM` \procTriples ->
        returnCSEM ((fst, snd, procExpr) : procTriples)
  
elimComSubsCaseAlts substitution (PrimAlts alts deflt) 
  = elimComSubsPairs substitution alts `thenCSEM` \procAlts ->
    elimComSubsCaseDefault substitution deflt `thenCSEM` \procDeflt ->
    returnCSEM (PrimAlts procAlts procDeflt)
    where
    elimComSubsPairs :: Substitution -> [(a, CoreExpr)] 
                      -> CSEM [(a, CoreExpr)]
    elimComSubsPairs substitution []
      = returnCSEM []
    elimComSubsPairs substitution ((fst, expr) : pairs)
      = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
        elimComSubsPairs substitution pairs `thenCSEM` \procPairs ->
        returnCSEM ((fst, procExpr) : procPairs)



elimComSubsCaseDefault :: Substitution -> CoreCaseDefault 
                       -> CSEM CoreCaseDefault

elimComSubsCaseDefault substitution NoDefault
  = returnCSEM NoDefault
elimComSubsCaseDefault substitution (BindDefault valBdr expr)
  = elimComSubsExpr substitution expr `thenCSEM` \procExpr ->
    returnCSEM (BindDefault valBdr procExpr)
\end{code}


\subsection{Arguments}

\begin{code}
elimComSubsArgs :: Substitution -> [CoreArg] -> CSEM [CoreArg]

elimComSubsArgs substitution [] = returnCSEM []
elimComSubsArgs substitution (x:xs) 
  = elimComSubsArg substitution x `thenCSEM` \procX ->
    elimComSubsArgs substitution xs `thenCSEM` \procXs ->
    returnCSEM (procX:procXs)


elimComSubsArg :: Substitution -> CoreArg -> CSEM CoreArg

elimComSubsArg substitution (VarArg id) 
  = maybeSubstituteId substitution id `thenCSEM` \procId ->
    returnCSEM (VarArg procId)
elimComSubsArg _ arg -- nothing to do for literates, types, and usages
  = returnCSEM arg
\end{code}


\begin{onlystandalone}
\end{document}
\end{onlystandalone}
