# SummerSchools/2015/LectureMaterials: Protests.scala

File Protests.scala, 18.2 KB (added by Chris.Fahlbusch, 2 years ago) |
---|

Line | |
---|---|

1 | package com.lmco.atl.protests |

2 | |

3 | import scala.collection.DefaultMap |

4 | import scala.collection.mutable.HashMap |

5 | import scala.io.Source._ |

6 | import com.cra.figaro.algorithm.sampling.Importance |

7 | import com.cra.figaro.language.Apply |

8 | import com.cra.figaro.language.Constant |

9 | import com.cra.figaro.language.Element |

10 | import com.cra.figaro.language.Flip |

11 | import com.cra.figaro.library.atomic.continuous.Uniform |

12 | import com.cra.figaro.library.atomic.continuous.Normal |

13 | import com.cra.figaro.library.compound.If |

14 | import com.cra.figaro.language.Universe |

15 | import com.cra.figaro.library.atomic.continuous.Exponential |

16 | import org.apache.commons.math3.distribution.NormalDistribution |

17 | import com.cra.figaro.algorithm.sampling.MetropolisHastings |

18 | import com.cra.figaro.algorithm.sampling.ProposalScheme |

19 | import com.cra.figaro.library.atomic.continuous.Normal |

20 | import akka.util.Timeout |

21 | import java.util.concurrent.TimeUnit |

22 | import com.cra.figaro.language._ |

23 | import annotation.tailrec |

24 | import scala.math.{ tan, log, pow, Pi } |

25 | import com.cra.figaro.util.{ random, bound } |

26 | |

27 | |

28 | object SimpleProtests |

29 | { |

30 | // The following code was borrowed from Michael Howard of CRA to add a Cauchy distribution |

31 | /** |

32 | * A InverseGamma distribution in which both the alpha and beta parameters are constants. |

33 | * Theta defaults to 1. |

34 | * InverseGamma(k, theta) is (1 / Gamma(k, 1/theta)) |

35 | */ |

36 | class AtomicNormal(name: Name[Double], mu: Double, scale: Double, collection: ElementCollection) |

37 | extends Element[Double](name, collection) with Atomic[Double] { |

38 | type Randomness = Double |

39 | |

40 | def generateRandomness() = { |

41 | tan(random.nextDouble()*Pi) |

42 | } |

43 | |

44 | def generateValue(rand: Randomness) = mu + scale*rand |

45 | |

46 | /** |

47 | * Density of a value. |

48 | */ |

49 | def density(x: Double) = { |

50 | (1.0/Pi/scale)*(1.0 / (1 + pow((x-mu)/scale, 2.0))) |

51 | } |

52 | |

53 | override def toString = |

54 | "Cauchy(" + mu + ", " + scale + ")" |

55 | } |

56 | |

57 | object Cauchy extends Creatable { |

58 | /** |

59 | * Create a Normal element in which both mu and scale parameters are constants. |

60 | */ |

61 | def apply(mu: Double, scale: Double)(implicit name: Name[Double], collection: ElementCollection) = |

62 | new AtomicNormal(name, mu, scale, collection) |

63 | |

64 | type ResultType = Double |

65 | |

66 | def create(args: List[Element[_]]) = apply(args(0).asInstanceOf[Double], args(1).asInstanceOf[Double]) |

67 | } |

68 | // End of Cauchy code |

69 | |

70 | /** |

71 | * This is the abstract model for a single week. |

72 | * We'll add the details in the individual implementations. |

73 | */ |

74 | abstract class WeeklyModel |

75 | { |

76 | // Has there been an event to trigger a protest? (0.0 or 1.0) |

77 | val trigger : Element[Double] |

78 | |

79 | // The disposition of the people towards protest |

80 | val sentiment : Element[Double] |

81 | |

82 | // The intensity of the government's actions |

83 | val govtall : Element[Double] |

84 | |

85 | // A count of hostile actions by the government |

86 | val hostile : Element[Double] |

87 | |

88 | // The intensity of the people's actions |

89 | val alltgov : Element[Double] |

90 | |

91 | // Number of protests |

92 | val protests : Element[Double] |

93 | |

94 | // Number of weeks of consecutive high protest |

95 | val consecutiveHighProtest : Element[Double] |

96 | } |

97 | |

98 | /** |

99 | * These are the parameter's we're going to try to learn. |

100 | * Most of them are coefficients of a linear combination |

101 | */ |

102 | abstract class Parameters |

103 | { |

104 | // For Sendiment |

105 | val sent2sent : Element[Double] |

106 | val govtall2sent : Element[Double] |

107 | val hostile2sent : Element[Double] |

108 | val alltgov2sent : Element[Double] |

109 | val consecutiveHighProtest2sent : Element[Double] |

110 | |

111 | // Protests |

112 | val sent2protests : Element[Double] |

113 | |

114 | // Government |

115 | val protests2govtall : Element[Double] |

116 | val alltgov2govtall : Element[Double] |

117 | val protests2hostile : Element[Double] |

118 | |

119 | // People |

120 | val govtall2alltgov : Element[Double] |

121 | val hostile2alltgov : Element[Double] |

122 | |

123 | // Threshold - this is the number of protests we consider "High" |

124 | val highProtestThreshold : Element[Double] |

125 | |

126 | // Triggers - How much a trigger affects sentiment |

127 | val trigger2sent : Element[Double] |

128 | |

129 | /** |

130 | * Produce a list. We'll need this for training. |

131 | */ |

132 | def toList() : List[Element[Double]] |

133 | } |

134 | |

135 | /** |

136 | * The parameters for the Prior distribution. |

137 | * They'll all have distributions to sample from. |

138 | */ |

139 | class PriorParameters extends Parameters |

140 | { |

141 | // Sentiment |

142 | val sent2sent = Cauchy( 1.0, 0.3 ) |

143 | val govtall2sent = Cauchy( -1.0, 2.0 ) |

144 | val hostile2sent = Cauchy( 1.0, 2.0 ) |

145 | val alltgov2sent = Cauchy( -1.0, 2.0 ) |

146 | val consecutiveHighProtest2sent = Cauchy( 30.0, 10.0 ) |

147 | |

148 | // protests |

149 | val sent2protests = Cauchy( 30.0, 10.0 ) |

150 | |

151 | // Government |

152 | val protests2govtall = Cauchy( -30.0, 10.0 ) |

153 | val alltgov2govtall = Cauchy( 1.0, 2.0 ) |

154 | val protests2hostile = Cauchy( 1.0, 2.0 ) |

155 | |

156 | // People |

157 | val govtall2alltgov = Cauchy( 1.0, 3.0 ) |

158 | val hostile2alltgov = Cauchy( 5.0, 5.0 ) |

159 | |

160 | // Threshold |

161 | val highProtestThreshold = Cauchy( 30.0, 10.0 ) |

162 | |

163 | // Triggers |

164 | val trigger2sent = Cauchy( 10.0, 3.0 ) |

165 | |

166 | /** |

167 | * This is just a list of the parameters |

168 | */ |

169 | def toList() = List( |

170 | // Sentiment |

171 | sent2sent , |

172 | govtall2sent , |

173 | hostile2sent , |

174 | alltgov2sent , |

175 | consecutiveHighProtest2sent , |

176 | |

177 | // protests |

178 | sent2protests , |

179 | |

180 | // Government |

181 | protests2govtall , |

182 | alltgov2govtall , |

183 | protests2hostile , |

184 | |

185 | // People |

186 | govtall2alltgov , |

187 | hostile2alltgov , |

188 | |

189 | // Threshold |

190 | highProtestThreshold , |

191 | |

192 | // Triggers |

193 | trigger2sent |

194 | ) |

195 | } |

196 | |

197 | /** |

198 | * These are the Learned parameters. |

199 | * They're just constants, fat-fingered in. |

200 | */ |

201 | class LearnedParameters extends Parameters |

202 | { |

203 | // Sentiment |

204 | val sent2sent = Constant(1.3677075485827346) |

205 | val govtall2sent = Constant(43.816705101176794) |

206 | val hostile2sent = Constant(1.3705215838506093) |

207 | val alltgov2sent = Constant(-0.30241081905766826) |

208 | val consecutiveHighProtest2sent = Constant(42.377031937327615) |

209 | |

210 | // protests |

211 | val sent2protests = Constant(30.869314384095745) |

212 | |

213 | // Government |

214 | val protests2govtall = Constant(-19.542948727635462) |

215 | val alltgov2govtall = Constant(2.574871223029797) |

216 | val protests2hostile = Constant(-5.9124152162747405) |

217 | |

218 | // People |

219 | val govtall2alltgov = Constant(29.32142628155293) |

220 | val hostile2alltgov = Constant(3.272390543018787) |

221 | |

222 | // Threshold |

223 | val highProtestThreshold = Constant(23.695876311150947) |

224 | |

225 | // Triggers |

226 | val trigger2sent = Constant(12.330984097687368) |

227 | |

228 | def toList() = List() |

229 | } |

230 | |

231 | /** |

232 | * This is the model for the first week. |

233 | * We don't have a previous week, so they're all just distributions. |

234 | */ |

235 | class FirstWeeklyModel extends WeeklyModel |

236 | { |

237 | val trigger = Constant(0.0) |

238 | val sentiment = Normal( 0.0, 15.0 ) |

239 | val govtall = Normal( -257.8, 273.477 ) |

240 | val hostile = Exponential( 0.04 ) |

241 | val alltgov = Normal( -117.2, 175.0 ) |

242 | val protests = Exponential( 0.04 ) |

243 | val consecutiveHighProtest = Normal( 30.0, 10.0 ) |

244 | } |

245 | |

246 | /** |

247 | * This is the model for weeks that have a previous week. This is really the heart of the model. |

248 | * |

249 | * @param previous Model of the previous week |

250 | * @param params Model parameters |

251 | */ |

252 | class MiddleWeeklyModel( previous : WeeklyModel, params : Parameters ) extends WeeklyModel |

253 | { |

254 | /** |

255 | * This is a linear combination of two lists, with a little error thrown in. |

256 | * If the lists are (a,b,c) and (d,e,f), then this will produce |

257 | * a*d + b*e + c*f + Normal(0,3) |

258 | * |

259 | * @param prevs Values from previous week |

260 | * @param parms Linear coefficients |

261 | * |

262 | * @return A linear combination of prevs and parms, plus Normal(0,3) |

263 | */ |

264 | def linear( prevs : List[Element[Double]], parms : List[Element[Double]] ) = |

265 | { |

266 | var mean = prevs.zip(parms).map( pair => Apply( pair._1, pair._2, |

267 | (a:Double, b:Double)=>(a*b)) ).foldLeft(Constant(0.0): Element[Double])((a,b) => (a++b)) |

268 | Chain( mean, (m:Double) => Normal(m, 3.0)) |

269 | } |

270 | |

271 | /** |

272 | * This adjusts some base value by value*scale, and adds some error. |

273 | * It returns base + value*scale + Normal(0,3) |

274 | * |

275 | * @param base A base value |

276 | * @param value An adjustment to the base |

277 | * @param scale A scaling factor for the adjustment |

278 | * |

279 | * @return base+value*scale+Normal(0,3) |

280 | */ |

281 | def adjust( base : Element[Double], value : Element[Double], scale : Element[Double] ) = |

282 | { |

283 | var mean = Apply( base, value, scale, (b:Double, v:Double, s:Double ) => (b+v*s) ) |

284 | Chain( mean, (m:Double) => Normal(m, 3.0)) |

285 | } |

286 | |

287 | // Triggers are very unlikely |

288 | val trigger = Select( 0.00001 -> 1.0, 0.99999 -> 0.0 ); |

289 | |

290 | // The number of consecutive hight protests. |

291 | // If the previous week's protests were high, add one to the previous count. |

292 | // Otherwise, it's 0 |

293 | val consecutiveHighProtest = If( Apply(previous.protests, params.highProtestThreshold, (a:Double, b:Double)=>(a>b)), |

294 | previous.consecutiveHighProtest++Constant(1.0) , Constant(0.0) ) |

295 | |

296 | // Sentiment |

297 | val sentiment = linear( |

298 | List(previous.sentiment, previous.govtall, previous.alltgov, previous.hostile, consecutiveHighProtest, trigger ), |

299 | List(params.sent2sent, params.govtall2sent, params.alltgov2sent, params.hostile2sent, params.consecutiveHighProtest2sent, params.trigger2sent ) |

300 | ) |

301 | |

302 | // Government |

303 | val govtall = linear( |

304 | List( previous.protests, previous.alltgov ), |

305 | List( params.protests2govtall, params.alltgov2govtall ) |

306 | ) |

307 | |

308 | // People |

309 | val alltgov = linear( |

310 | List( previous.govtall, previous.hostile ), |

311 | List( params.govtall2alltgov, params.hostile2alltgov ) |

312 | ) |

313 | |

314 | // Hostile events |

315 | val hostile = adjust( previous.hostile, previous.protests, params.protests2hostile ) |

316 | |

317 | // Protests, plus some error |

318 | val pmean = Apply( sentiment, params.sent2protests, (a:Double, b:Double) => (a*b)) |

319 | val protests = Chain( pmean, (x:Double) => Normal( x, 3.0 ) ) |

320 | } |

321 | |

322 | /** |

323 | * This is the overall model - all of the weeks together. |

324 | * |

325 | * @param params The model parameters, wither Prior or Learned |

326 | * @param size The number of weeks |

327 | * @param training True if Training, False if Predicting |

328 | */ |

329 | class OverallModel( params : Parameters, size : Int, training : Boolean ) |

330 | { |

331 | // Save these things |

332 | val length = size |

333 | val parms = params |

334 | |

335 | // Build an array of Weekly models |

336 | val weeks : Array[WeeklyModel] = new Array[WeeklyModel](size) |

337 | weeks(0) = new FirstWeeklyModel |

338 | weeks(1) = new MiddleWeeklyModel( weeks(0), params ) |

339 | |

340 | // For Training, we'll need them all. |

341 | // For predicting, we'll just set the parameters for week 0 and predict for week 1 |

342 | if( training ) for( i <- 2 until size ) |

343 | { |

344 | weeks(i) = new MiddleWeeklyModel( weeks(i-1), params ) |

345 | } |

346 | |

347 | // The rest of the code in this class is all about observing values. |

348 | // This is a little tricky, for a couple fo reasons. |

349 | // First, since we're working in Doubles, we can't use observe. |

350 | // In a continuous, real-numbered distribution, the probability |

351 | // of any particular point value is 0. |

352 | // So, we've got to assert the data via constraints. |

353 | // |

354 | // Some of our numbers and differences get huge, so we'll use |

355 | // a trick. We'll use the log of the pdf of a Normal(0,20) at |

356 | // the different between the actual and the observer. |

357 | val dist = new NormalDistribution( 0.0, 20.0 ) |

358 | |

359 | /** |

360 | * This function encapsulates our constraint strategy. |

361 | * |

362 | * @param observed Data value observed |

363 | * @param actual Value from our Bayesian process |

364 | * |

365 | * @return A value, which is bigger when observed is closer to actual |

366 | */ |

367 | def constrainValue(observed: Double, actual: Double) = |

368 | { |

369 | dist.logDensity( observed-actual ) |

370 | } |

371 | |

372 | /** |

373 | * Observe a week's data |

374 | * |

375 | * @param i Week number |

376 | * @param values A HashMap holding observed values |

377 | * @param istrigger True if there was a trigger event this week |

378 | */ |

379 | def populate( i : Int, values : HashMap[Symbol,Double], istrigger : Boolean ) |

380 | { |

381 | println( "Populating row " + i ); |

382 | weeks(i).govtall.removeConstraints() |

383 | weeks(i).alltgov.removeConstraints() |

384 | weeks(i).hostile.removeConstraints() |

385 | weeks(i).protests.removeConstraints() |

386 | weeks(i).trigger.removeConstraints() |

387 | |

388 | weeks(i).govtall.setLogConstraint(x => constrainValue(values('govtall),x)) |

389 | weeks(i).alltgov.setLogConstraint(x => constrainValue(values('alltgov),x)) |

390 | weeks(i).hostile.setLogConstraint(x => constrainValue(values('hostile),x)) |

391 | weeks(i).protests.setLogConstraint(x => constrainValue(values('protests),x)) |

392 | weeks(i).trigger.setLogConstraint(x => constrainValue(if(istrigger) 1.0 else 0.0 ,x)) |

393 | } |

394 | } |

395 | |

396 | /** |

397 | * Parse a row of data. |

398 | * |

399 | * @param line A line of data from the data file. |

400 | * |

401 | * @return A HashMap mapping data types (as symbols) to values |

402 | */ |

403 | def parserow( line : String ) = |

404 | { |

405 | val map = new HashMap[Symbol,Double] |

406 | |

407 | val tokens = line.split(',') |

408 | |

409 | map( 'govtall ) = tokens(3).toDouble |

410 | map( 'alltgov ) = tokens(4).toDouble |

411 | map( 'hostile ) = tokens(2).toDouble |

412 | map( 'protests ) = tokens(5).toDouble |

413 | |

414 | println( "Map: " + map ); |

415 | map |

416 | } |

417 | |

418 | /** |

419 | * Learn the parameters of a model. |

420 | * |

421 | * @param Model The Model to learn parameters for |

422 | */ |

423 | def train( model : OverallModel ) |

424 | { |

425 | // First, populate the model with observations |

426 | val lines = fromFile("/home/peval/Desktop/PakData.csv").getLines.toList |

427 | for( i <- 1 until model.length ) |

428 | { |

429 | println( "Populating row " + i ) |

430 | model.populate( i-1, parserow( lines(i) ), i==19 || i==168 || i==280 || i==610 || i==710 ) |

431 | } |

432 | |

433 | // We'll use the Algorithm of Last Resort, Metropolis-Hastings. |

434 | // We'll use it in an open-ended fashion, just letting it run unbounded |

435 | // and checking the learned parameters every 5 minutes. |

436 | val alg = MetropolisHastings( ProposalScheme.default, model.parms.toList:_* ) |

437 | |

438 | // It can take a long, time, so we're upping the timeout |

439 | alg.messageTimeout = new Timeout( 2, TimeUnit.MINUTES ) |

440 | alg.start() |

441 | println( "alg started" ) |

442 | while(true) |

443 | { |

444 | // Wait 5 minutes. Then, stop the algorithm print the current values of the parameters, then resume. |

445 | Thread.sleep( 300000 ) |

446 | alg.stop() |

447 | println( "------------------------------" ) |

448 | model.parms.toList.foreach { p => println( "= Constant(" + alg.expectation(p, (x:Double)=>x) + ")" ) } |

449 | alg.resume() |

450 | } |

451 | // No need for alg.kill() - this is an infinite loop. |

452 | } |

453 | |

454 | /** |

455 | * Predict a week's protests |

456 | * |

457 | * @param model The Model |

458 | * @param i The week to predict |

459 | * @param values The week's observed values |

460 | */ |

461 | def predict( model : OverallModel, i : Int, values : HashMap[Symbol,Double] ) |

462 | { |

463 | // Run M-H 50000 times, predict protests |

464 | // We'll reuse week(0) and week(1), always observing week(0) and predicting week(1) |

465 | val alg = MetropolisHastings( 50000, ProposalScheme.default, model.weeks(1).protests ) |

466 | alg.start() |

467 | alg.stop() |

468 | println( "predicted: " + alg.expectation(model.weeks(1).protests, (x:Double)=>x) + ", actual: " + values.get('protests) ) |

469 | alg.kill() |

470 | |

471 | // Prepare for the next prediction by observing the data values in week(0) |

472 | // The weeks with trigger events are hard-coded. |

473 | model.populate( 0, values, i==19 || i==168 || i==280 || i==610 || i==710 ) |

474 | } |

475 | |

476 | /** |

477 | * The main! |

478 | * |

479 | * @param args Command-line args, not used. |

480 | */ |

481 | def main(args: Array[String]) |

482 | { |

483 | // This is for training. We'll use the first 2 years (104 weeks) |

484 | // train() goes into an infinite loop, so for prediction, |

485 | // comment these 3 lines out. |

486 | val params = new PriorParameters |

487 | val model = new OverallModel( params, 104, true ) |

488 | train( model ) |

489 | |

490 | // This is for prediction. |

491 | val learned = new LearnedParameters |

492 | val learnedmodel = new OverallModel( learned, 2, false ) |

493 | |

494 | // Read and populate the first line |

495 | // (which will be week 104, since weeks 0-103 were used for learning) |

496 | val lines = fromFile("/home/peval/Desktop/PakData.csv").getLines.toList |

497 | learnedmodel.populate(0, parserow(lines(104)), false ) |

498 | |

499 | // And, predict a bunch more weeks. |

500 | for( i <- 1 until 100 ) |

501 | { |

502 | predict( learnedmodel, i, parserow(lines(i+104))) |

503 | } |

504 | } |

505 | } |