SummerSchools/2015/LectureMaterials: Protests.scala

File Protests.scala, 18.2 KB (added by Chris.Fahlbusch, 2 years ago)
Line 
1package com.lmco.atl.protests
2
3import scala.collection.DefaultMap
4import scala.collection.mutable.HashMap
5import scala.io.Source._
6import com.cra.figaro.algorithm.sampling.Importance
7import com.cra.figaro.language.Apply
8import com.cra.figaro.language.Constant
9import com.cra.figaro.language.Element
10import com.cra.figaro.language.Flip
11import com.cra.figaro.library.atomic.continuous.Uniform
12import com.cra.figaro.library.atomic.continuous.Normal
13import com.cra.figaro.library.compound.If
14import com.cra.figaro.language.Universe
15import com.cra.figaro.library.atomic.continuous.Exponential
16import org.apache.commons.math3.distribution.NormalDistribution
17import com.cra.figaro.algorithm.sampling.MetropolisHastings
18import com.cra.figaro.algorithm.sampling.ProposalScheme
19import com.cra.figaro.library.atomic.continuous.Normal
20import akka.util.Timeout
21import java.util.concurrent.TimeUnit
22import com.cra.figaro.language._
23import annotation.tailrec
24import scala.math.{ tan, log, pow, Pi }
25import com.cra.figaro.util.{ random, bound }
26
27
28object SimpleProtests
29{
30    // The following code was borrowed from Michael Howard of CRA to add a Cauchy distribution
31    /**
32     * A InverseGamma distribution in which both the alpha and beta parameters are constants.
33     * Theta defaults to 1.
34     * InverseGamma(k, theta) is (1 / Gamma(k, 1/theta))
35     */
36    class AtomicNormal(name: Name[Double], mu: Double, scale: Double, collection: ElementCollection)
37      extends Element[Double](name, collection) with Atomic[Double] {
38      type Randomness = Double
39
40      def generateRandomness() = {
41        tan(random.nextDouble()*Pi)   
42      }
43
44      def generateValue(rand: Randomness) = mu + scale*rand
45
46      /**
47       * Density of a value.
48       */
49      def density(x: Double) = {
50        (1.0/Pi/scale)*(1.0 / (1 + pow((x-mu)/scale, 2.0))) 
51      }
52
53      override def toString =
54        "Cauchy(" + mu + ", " + scale + ")"
55    }
56
57    object Cauchy extends Creatable {
58      /**
59       * Create a Normal element in which both mu and scale parameters are constants.
60       */
61      def apply(mu: Double, scale: Double)(implicit name: Name[Double], collection: ElementCollection) =
62        new AtomicNormal(name, mu, scale, collection)
63
64      type ResultType = Double
65
66      def create(args: List[Element[_]]) = apply(args(0).asInstanceOf[Double], args(1).asInstanceOf[Double])
67    }
68    // End of Cauchy code
69   
70    /**
71     * This is the abstract model for a single week.
72     * We'll add the details in the individual implementations.
73     */
74    abstract class WeeklyModel
75    {
76        // Has there been an event to trigger a protest? (0.0 or 1.0)
77        val trigger : Element[Double]
78       
79        // The disposition of the people towards protest
80        val sentiment : Element[Double]
81       
82        // The intensity of the government's actions
83        val govtall : Element[Double]
84       
85        // A count of hostile actions by the government
86        val hostile : Element[Double]
87       
88        // The intensity of the people's actions
89        val alltgov : Element[Double]
90       
91        // Number of protests
92        val protests : Element[Double]
93       
94        // Number of weeks of consecutive high protest
95        val consecutiveHighProtest : Element[Double]
96    }
97
98    /**
99     * These are the parameter's we're going to try to learn.
100     * Most of them are coefficients of a linear combination
101     */
102    abstract class Parameters
103    {
104        // For Sendiment
105        val sent2sent : Element[Double]
106        val govtall2sent : Element[Double]
107        val hostile2sent : Element[Double]
108        val alltgov2sent : Element[Double]
109        val consecutiveHighProtest2sent : Element[Double]
110       
111        // Protests
112        val sent2protests : Element[Double]
113       
114        // Government
115        val protests2govtall : Element[Double]
116        val alltgov2govtall : Element[Double]
117        val protests2hostile : Element[Double]
118       
119        // People
120        val govtall2alltgov : Element[Double]
121        val hostile2alltgov : Element[Double]
122       
123        // Threshold - this is the number of protests we consider "High"
124        val highProtestThreshold : Element[Double]
125       
126        // Triggers - How much a trigger affects sentiment
127        val trigger2sent : Element[Double]
128       
129        /**
130         * Produce a list. We'll need this for training.
131         */
132        def toList() : List[Element[Double]]
133    }
134
135    /**
136     * The parameters for the Prior distribution.
137     * They'll all have distributions to sample from.
138     */
139    class PriorParameters extends Parameters
140    {
141        // Sentiment
142        val sent2sent = Cauchy( 1.0, 0.3 )
143        val govtall2sent = Cauchy( -1.0, 2.0 )
144        val hostile2sent = Cauchy( 1.0, 2.0 )
145        val alltgov2sent = Cauchy( -1.0, 2.0 )
146        val consecutiveHighProtest2sent = Cauchy( 30.0, 10.0 )
147       
148        // protests
149        val sent2protests = Cauchy( 30.0, 10.0 )
150       
151        // Government
152        val protests2govtall = Cauchy( -30.0, 10.0 )
153        val alltgov2govtall = Cauchy( 1.0, 2.0 )
154        val protests2hostile = Cauchy( 1.0, 2.0 )
155       
156        // People
157        val govtall2alltgov = Cauchy( 1.0, 3.0 )
158        val hostile2alltgov = Cauchy( 5.0, 5.0 )
159       
160        // Threshold
161        val highProtestThreshold = Cauchy( 30.0, 10.0 )
162       
163        // Triggers
164        val trigger2sent = Cauchy( 10.0, 3.0 )
165       
166        /**
167         * This is just a list of the parameters
168         */
169        def toList() = List(       
170        // Sentiment
171         sent2sent ,
172         govtall2sent ,
173         hostile2sent ,
174         alltgov2sent ,
175         consecutiveHighProtest2sent ,
176       
177        // protests
178         sent2protests ,
179       
180        // Government
181         protests2govtall ,
182         alltgov2govtall ,
183         protests2hostile ,
184       
185        // People
186         govtall2alltgov ,
187         hostile2alltgov ,
188       
189        // Threshold
190         highProtestThreshold ,
191       
192        // Triggers
193         trigger2sent
194     )
195    }
196
197    /**
198     * These are the Learned parameters.
199     * They're just constants, fat-fingered in.
200     */
201    class LearnedParameters extends Parameters
202    {
203          // Sentiment
204        val sent2sent = Constant(1.3677075485827346)
205        val govtall2sent = Constant(43.816705101176794)
206        val hostile2sent = Constant(1.3705215838506093)
207        val alltgov2sent = Constant(-0.30241081905766826)
208        val consecutiveHighProtest2sent = Constant(42.377031937327615)
209       
210        // protests
211        val sent2protests = Constant(30.869314384095745)
212       
213        // Government
214        val protests2govtall = Constant(-19.542948727635462)
215        val alltgov2govtall = Constant(2.574871223029797)
216        val protests2hostile = Constant(-5.9124152162747405)
217       
218        // People
219        val govtall2alltgov = Constant(29.32142628155293)
220        val hostile2alltgov = Constant(3.272390543018787)
221       
222        // Threshold
223        val highProtestThreshold = Constant(23.695876311150947)
224       
225        // Triggers
226        val trigger2sent = Constant(12.330984097687368)
227
228        def toList() = List()
229    }
230
231    /**
232     * This is the model for the first week.
233     * We don't have a previous week, so they're all just distributions.
234     */
235    class FirstWeeklyModel extends WeeklyModel
236    {
237          val trigger = Constant(0.0)
238          val sentiment = Normal( 0.0, 15.0 )
239          val govtall = Normal( -257.8, 273.477  )
240          val hostile = Exponential( 0.04 )
241          val alltgov = Normal( -117.2, 175.0 )
242          val protests = Exponential( 0.04 )
243          val consecutiveHighProtest = Normal( 30.0, 10.0 )
244    }
245
246    /**
247     * This is the model for weeks that have a previous week. This is really the heart of the model.
248     *
249     * @param previous Model of the previous week
250     * @param params Model parameters
251     */
252    class MiddleWeeklyModel( previous : WeeklyModel, params : Parameters ) extends WeeklyModel
253    {
254        /**
255         * This is a linear combination of two lists, with a little error thrown in.
256         * If the lists are (a,b,c) and (d,e,f), then this will produce
257         * a*d + b*e + c*f + Normal(0,3)
258         *
259         * @param prevs Values from previous week
260         * @param parms Linear coefficients
261         *
262         * @return A linear combination of prevs and parms, plus Normal(0,3)
263         */
264        def linear( prevs : List[Element[Double]], parms : List[Element[Double]] ) =
265        {
266            var mean = prevs.zip(parms).map( pair => Apply( pair._1, pair._2,
267                (a:Double, b:Double)=>(a*b)) ).foldLeft(Constant(0.0): Element[Double])((a,b) => (a++b))
268            Chain( mean, (m:Double) => Normal(m, 3.0))
269        }
270       
271        /**
272         * This adjusts some base value by value*scale, and adds some error.
273         * It returns base + value*scale + Normal(0,3)
274         *
275         * @param base A base value
276         * @param value An adjustment to the base
277         * @param scale A scaling factor for the adjustment
278         *
279         * @return base+value*scale+Normal(0,3)
280         */
281        def adjust( base : Element[Double], value : Element[Double], scale : Element[Double] ) =
282        {
283           var mean = Apply( base, value, scale, (b:Double, v:Double, s:Double ) => (b+v*s) )     
284           Chain( mean, (m:Double) => Normal(m, 3.0))
285        }
286       
287        // Triggers are very unlikely
288        val trigger = Select( 0.00001 -> 1.0, 0.99999 -> 0.0 );
289       
290        // The number of consecutive hight protests.
291        // If the previous week's protests were high, add one to the previous count.
292        // Otherwise, it's 0
293        val consecutiveHighProtest = If( Apply(previous.protests,  params.highProtestThreshold, (a:Double, b:Double)=>(a>b)),
294            previous.consecutiveHighProtest++Constant(1.0) , Constant(0.0) )
295           
296        // Sentiment
297        val sentiment = linear(
298            List(previous.sentiment, previous.govtall, previous.alltgov, previous.hostile, consecutiveHighProtest, trigger ),
299            List(params.sent2sent, params.govtall2sent, params.alltgov2sent, params.hostile2sent, params.consecutiveHighProtest2sent, params.trigger2sent )
300        )
301     
302        // Government
303        val govtall = linear(
304            List( previous.protests, previous.alltgov ),
305            List( params.protests2govtall, params.alltgov2govtall )
306        )
307       
308        // People
309        val alltgov = linear(
310            List( previous.govtall, previous.hostile ),
311            List( params.govtall2alltgov, params.hostile2alltgov )
312        )
313       
314        // Hostile events
315        val hostile = adjust( previous.hostile, previous.protests, params.protests2hostile )
316       
317        // Protests, plus some error
318        val pmean = Apply( sentiment, params.sent2protests, (a:Double, b:Double) => (a*b))   
319        val protests = Chain( pmean, (x:Double) => Normal( x, 3.0 ) )
320    }
321
322    /**
323     * This is the overall model - all of the weeks together.
324     *
325     * @param params The model parameters, wither Prior or Learned
326     * @param size The number of weeks
327     * @param training True if Training, False if Predicting
328     */
329    class OverallModel( params : Parameters, size : Int, training : Boolean )
330    {
331        // Save these things
332        val length = size
333        val parms = params
334       
335        // Build an array of Weekly models
336        val weeks : Array[WeeklyModel] = new Array[WeeklyModel](size)
337        weeks(0) = new FirstWeeklyModel
338        weeks(1) = new MiddleWeeklyModel( weeks(0), params )
339       
340        // For Training, we'll need them all.
341        // For predicting, we'll just set the parameters for week 0 and predict for week 1
342        if( training ) for( i <- 2 until size )
343        {
344            weeks(i) = new MiddleWeeklyModel( weeks(i-1), params )
345        }   
346
347        // The rest of the code in this class is all about observing values.
348        // This is a little tricky, for a couple fo reasons.
349        // First, since we're working in Doubles, we can't use observe.
350        // In a continuous, real-numbered distribution, the probability
351        // of any particular point value is 0.
352        // So, we've got to assert the data via constraints.
353        //
354        // Some of our numbers and differences get huge, so we'll use
355        // a trick. We'll use the log of the pdf of a Normal(0,20) at
356        // the different between the actual and the observer.
357        val dist = new NormalDistribution( 0.0, 20.0 )
358       
359        /**
360         * This function encapsulates our constraint strategy.
361         *
362         * @param observed Data value observed
363         * @param actual Value from our Bayesian process
364         *
365         * @return A value, which is bigger when observed is closer to actual
366         */
367        def constrainValue(observed: Double, actual: Double) =
368        {
369             dist.logDensity( observed-actual )   
370        }
371       
372        /**
373         * Observe a week's data
374         *
375         * @param i Week number
376         * @param values A HashMap holding observed values
377         * @param istrigger True if there was a trigger event this week
378         */
379        def populate( i : Int, values : HashMap[Symbol,Double], istrigger : Boolean )
380        {
381            println( "Populating row " + i );
382            weeks(i).govtall.removeConstraints()
383            weeks(i).alltgov.removeConstraints()
384            weeks(i).hostile.removeConstraints()
385            weeks(i).protests.removeConstraints()
386            weeks(i).trigger.removeConstraints()
387
388            weeks(i).govtall.setLogConstraint(x => constrainValue(values('govtall),x))
389            weeks(i).alltgov.setLogConstraint(x => constrainValue(values('alltgov),x))
390            weeks(i).hostile.setLogConstraint(x => constrainValue(values('hostile),x))
391            weeks(i).protests.setLogConstraint(x => constrainValue(values('protests),x))
392            weeks(i).trigger.setLogConstraint(x => constrainValue(if(istrigger) 1.0 else 0.0 ,x))
393        }
394    }
395
396    /**
397     * Parse a row of data.
398     *
399     * @param line A line of data from the data file.
400     *
401     * @return A HashMap mapping data types (as symbols) to values
402     */
403    def parserow( line : String ) =
404    {
405        val map = new HashMap[Symbol,Double]
406
407        val tokens = line.split(',')
408
409        map( 'govtall ) =  tokens(3).toDouble
410        map( 'alltgov ) =  tokens(4).toDouble
411        map( 'hostile ) =  tokens(2).toDouble
412        map( 'protests ) =  tokens(5).toDouble
413
414        println( "Map: " + map );
415        map
416    }
417
418    /**
419     * Learn the parameters of a model.
420     *
421     * @param Model The Model to learn parameters for
422     */
423    def train( model : OverallModel )
424    {
425        // First, populate the model with observations
426        val lines = fromFile("/home/peval/Desktop/PakData.csv").getLines.toList   
427        for( i <- 1 until model.length )
428        {
429          println( "Populating row " + i )
430          model.populate( i-1, parserow( lines(i) ), i==19 || i==168 || i==280 || i==610 || i==710 ) 
431        }
432
433        // We'll use the Algorithm of Last Resort, Metropolis-Hastings.
434        // We'll use it in an open-ended fashion, just letting it run unbounded
435        // and checking the learned parameters every 5 minutes.
436        val alg = MetropolisHastings( ProposalScheme.default, model.parms.toList:_* )
437
438        // It can take a long, time, so we're upping the timeout
439        alg.messageTimeout = new Timeout( 2, TimeUnit.MINUTES )
440        alg.start()
441        println( "alg started" )
442        while(true)
443        {
444          // Wait 5 minutes. Then, stop the algorithm print the current values of the parameters, then resume.
445          Thread.sleep( 300000 )
446          alg.stop()
447          println( "------------------------------" )
448          model.parms.toList.foreach { p => println( "= Constant(" + alg.expectation(p, (x:Double)=>x) + ")" ) }
449          alg.resume()
450        }   
451        // No need for alg.kill() - this is an infinite loop.
452    }
453
454    /**
455     * Predict a week's protests
456     *
457     * @param model The Model
458     * @param i The week to predict
459     * @param values The week's observed values
460     */
461    def predict( model : OverallModel, i : Int, values : HashMap[Symbol,Double] )
462    {
463        // Run M-H 50000 times, predict protests
464        // We'll reuse week(0) and week(1), always observing week(0) and predicting week(1)
465        val alg = MetropolisHastings( 50000, ProposalScheme.default, model.weeks(1).protests )     
466        alg.start()
467        alg.stop()
468        println( "predicted: " + alg.expectation(model.weeks(1).protests, (x:Double)=>x) + ", actual: " + values.get('protests) )
469        alg.kill()
470
471        // Prepare for the next prediction by observing the data values in week(0)
472        // The weeks with trigger events are hard-coded.
473        model.populate( 0, values, i==19 || i==168 || i==280 || i==610 || i==710 )
474    }
475
476    /**
477     * The main!
478     *
479     * @param args Command-line args, not used.
480     */
481    def main(args: Array[String])
482    {
483        // This is for training. We'll use the first 2 years (104 weeks)
484        // train() goes into an infinite loop, so for prediction,
485        // comment these 3 lines out.
486        val params = new PriorParameters
487        val model = new OverallModel( params, 104, true )     
488        train( model )   
489
490        // This is for prediction.
491        val learned = new LearnedParameters
492        val learnedmodel = new OverallModel( learned, 2, false )
493
494        // Read and populate the first line
495        // (which will be week 104, since weeks 0-103 were used for learning)
496        val lines = fromFile("/home/peval/Desktop/PakData.csv").getLines.toList   
497        learnedmodel.populate(0, parserow(lines(104)), false )
498
499        // And, predict a bunch more weeks.
500        for( i <- 1 until 100 )
501        {
502          predict( learnedmodel, i, parserow(lines(i+104)))
503        }
504    }
505}