{VERSION 5 0 "IBM INTEL NT" "5.0" } {USTYLETAB {CSTYLE "Maple Input" -1 0 "Courier" 0 1 255 0 0 1 0 1 0 0 1 0 0 0 0 1 }{CSTYLE "" -1 256 "" 0 1 0 128 0 1 0 0 0 0 0 0 0 0 0 0 } {CSTYLE "" -1 257 "" 0 1 0 128 0 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 258 "" 0 1 0 128 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 259 "" 0 1 128 128 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 260 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 261 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 262 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 } {CSTYLE "" -1 263 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 264 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 265 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 266 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 }{CSTYLE "" -1 267 "" 0 1 0 0 128 1 0 0 0 0 0 0 0 0 0 0 }{PSTYLE "Normal" -1 0 1 {CSTYLE "" -1 -1 "Times" 1 12 0 0 0 1 2 2 2 2 2 2 1 1 1 1 }1 1 0 0 0 0 1 0 1 0 2 2 0 1 }{PSTYLE "Heading 1" -1 3 1 {CSTYLE "" -1 -1 "Times" 1 18 0 0 0 1 2 1 2 2 2 2 1 1 1 1 }1 1 0 0 8 4 1 0 1 0 2 2 0 1 }{PSTYLE "Heading 2" -1 4 1 {CSTYLE "" -1 -1 "Time s" 1 14 0 0 0 1 2 1 2 2 2 2 1 1 1 1 }1 1 0 0 8 2 1 0 1 0 2 2 0 1 } {PSTYLE "Normal" -1 256 1 {CSTYLE "" -1 -1 "Times" 1 12 0 0 0 1 1 2 2 2 2 2 1 1 1 1 }1 1 0 0 0 0 1 0 1 0 2 2 0 1 }} {SECT 0 {PARA 0 "" 0 "" {TEXT -1 0 "" }{TEXT 257 34 "High School Modul es > Precalculus\n" }}{PARA 3 "" 0 "" {TEXT -1 4 " " }{TEXT 256 19 "Averages & Boxplots" }}{PARA 0 "" 0 "" {TEXT -1 130 "\nAn graphical e xploration of the mean, median, mode, quartiles, deciles, and box plot s - including range and interquartile range.\n" }}{PARA 0 "" 0 "" {TEXT 258 153 "[Directions : Execute the Code Resource section first. \+ Although there will be no output immediately, these definitions are us ed later in this worksheet.]" }}{PARA 0 "" 0 "" {TEXT -1 0 "" }}{SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " }{TEXT 260 7 "0. Code" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 9 "restart; " }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 25 "with(plots): with(stats):" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 508 "#============================================== ========\n\ndata := [3,3,4,8,8,8, 10,13,15, 16,16,18,21,23,24]:\n\ntw o_extremes_data :=[3,3,3,3,3,3,3,3,3,3,25,25,25,25,25,25,25,25,25,25]: \n\nevenly_distributed_data :=\n[2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26]:\n\nbig_data_set :=\n[42,42,42,42,42,43 ,43,43,43,43,43,45,45,45,45,45,45,45,45,\n46,46,46,47,47,47,47,48,48,4 8,48,49,49,49,51,51,51,51,52,52,55,\n55,55,58,58,58,59,61,61,62,62,62, 63,63,63,64,64,65,65,65,66,68,69,\n69,69,70,71,71 ]:\n\n" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 573 "#================================= =====================\n\nh := 2: h2 := .8: h3 := .45: \+ \nCG := 'color = COLOR(RGB, .05,.4,.05)':\nCBr := 'color = COLOR(RGB , .3,.2,0)':\nCGSS := 'color = COLOR(RGB, .05,.4,.05), symbolsize = 1 5, symbol=BOX':\n\nCGF := 'color = COLOR(RGB, .05,.4,.05), style=pa tchnogrid, filled = true':\nCGrF := 'color = green, style=patchnogrid , filled = true':\nCBF := 'color = black, style=patchnogrid, filled \+ = true':\nCRF := 'color = red, style=patchnogrid, filled = true': \nCCF := 'color = coral, style=patchnogrid, filled = true':" }}} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 554 "#========================== ============================\n\nGetAverages := proc(dataset)\n globa l q1,q2,q3,me,mo,mn,mx, dec;\n local i,MM;\n q1 := evalf( describe [quartile[1]](dataset));\n q2 := evalf( describe[median](dataset) ); \n q3 := evalf( describe[quartile[3]](dataset));\n me := evalf( de scribe[mean](dataset));\n mo := describe[mode](dataset);\n MM := d escribe[range](dataset);\n mn := op(1,MM);\n mx := op(2,MM);\n d ec||0 := mn; dec||10:= mx;\n for i from 1 to 9 do\n dec||i := e valf( describe[decile[i]](dataset)); od;\nend proc:\n" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 2385 "#================================ ======================\nGetPlots := proc(dataset)\n global q1,q2,q3 ,me,mo,mn,mx,dec,\n MN,MN2,MD,MD2,MO,MO2,DP,LQ,RQ,Ln,LE,RE,L R,LIQR,Tends,PP;\n local i,h,h2,h3,y;\n\n #--------- Constants \+ -----------------------------\n h := 2; h2 := .8; h3 := .5; \n\n #--------- Plots on Original Data ---------------------------- -\n # base line, Left end, right end, mean for original data, stand \+ dev\n\n Ln := plot( [[mn,0],[mx+.5,0]], color = blue, thickness = \+ 4): \n LE := plot( [[mn,-h3],[mn+h3,-h3],[mn+h3,h3], [mn,h3],[mn,- h3]],CGrF):\n RE := plot( [[mx+.5,-h3],[mx-h3+.5,-h3],[mx-h3+.5,h3 ],\n [mx+.5, h3],[mx+.5, -h3]], CGrF):\n #------- ----- MEAN ------------------\n MN := plot( [[me, h2],[me-h2,0],[m e+h2,0],[me, h2]], CBF):\n MN2 := plot( [[me , +h+h2] ,[me,0]], color = black, linestyle = 2):\n\n #------------ MEDIAN -- ----------------\n MD := plot( [[q2 ,-h2],[q2-h2,0],[q2+h2,0],[q2,- h2]], \n color = white, style=patchnogrid, filled = true): \n MD2 := plot( [[q2, h],[q2,-h]], color = white, linestyle = 2):\n \n #------------ MODE ------------------\n MO := plot( [[mo ,-h2] ,[mo-h2,0],[mo+h2,0],[mo,-h2]], \n CBr, style=patchnogrid, filled = true):\n MO2 := plot( [[mo, h],[mo,-h]], CBr, linestyle = \+ 2):\n\n #------------ BOXES ------------------\n LQ := plot( [[q1, -h],[q2,-h],[q2,h],[q1,h],[q1,-h]], CRF):\n RQ := plot( [[q2,-h],[q 3,-h],[q3,h],[q2,h],[q2,-h]], CCF):\n\n #------------ DECILES ----- ------------- \n DP := plot( [seq( [[dec||i ,-h],[dec||i,-1.4*h]],i \+ = 0..10)], \n color = black):\n\n #------------ RANGES - ----------------- \n LR := plot( [[mn,5-h2],[mn,5],[mx,5],[mx,5-h2]] , \n color = blue, thickness = 1):\n LIQR := plot( [[ q1,4-h2],[q1,4],[q3,4],[q3,4-h2]], \n color = red, thic kness = 1):\n \n #------------ TEXT ------------------\n Tends := plots[textplot]( \{[mn, -h/2, mn],[mx, -h/2, mx]\},\n ali gn=\{BOTTOM,RIGHT\},font=[TIMES,ROMAN,12],color = black):\n\n \n #-- ---------- Original Distribution Points ------------------\n PP||1 : = pointplot( [dataset[1],-3-h3], CGSS ):\n y := h3;\n\n for i from 2 to nops(dataset) do \n if (dataset[i]=dataset[i-1]) then y := y+h3; else y:= h3; fi;\n PP||i := pointplot( [dataset[i],-3-y ] , CGSS ): \n od:\n\nend proc:\n\n\n" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 222 "#=================================================== \nBasicDataPlot := proc(dataset)\n display([ Ln,LE,RE,Tends, \n \+ seq(PP||i, i = 1..nops(dataset))\n ], scaling = con strained, axes = none );\nend proc:" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 226 "#=================================================== \nMeanPlot := proc(dataset)\n display([ MN, MN2, Ln,LE,RE,Tends, \n seq(PP||i, i = 1..nops(dataset))\n ], scaling = constrained, axes = none );\nend proc:" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 239 "#=================================================== \nMeanModePlot := proc(dataset)\n display([ MN, MN2,MO,MO2,Ln,LE,RE, LR,Tends, \n seq(PP||i, i = 1..nops(dataset))\n \+ ], scaling = constrained, axes = none );\nend proc:" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 244 "#======================================= ============\nBoxPlot := proc(dataset)\n display([ MN,MN2,MD,MD2,LQ, RQ,Ln,LE,RE,LR,LIQR,Tends, \n seq(PP||i, i = 1..nops(data set))\n ], scaling = constrained, axes = none );\nend proc: " }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 237 "#===================== ==============================\nDecPlot := proc(dataset)\n display([ MD,MD2,DP,LQ,RQ,Ln,LE,RE,LIQR,Tends, \n seq(PP||i, i = 1 ..nops(dataset))\n ], scaling = constrained, axes = none ); \nend proc:" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 270 "#========== =========================================\nCompleteDescriptionPlot := \+ proc(dataset)\n display([ MN,MN2,MD,MD2,MO,MO2,DP,LQ,RQ,Ln,LE,RE,LR, LIQR,Tends, \n seq(PP||i, i = 1..nops(dataset))\n \+ ], scaling = constrained, axes = none );\nend proc:" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 167 "PricePlot := proc(dataset)\n dis play([ MN2,MD2,DP,LQ,RQ,Ln,LE,RE,LR,LIQR,Tends, \n seq(PP ||i, i = 1..nops(dataset))\n ], axes = none );\nend proc:" } }}}{SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " }{TEXT 261 21 "1. Data Distr ibutions" }}{PARA 0 "" 0 "" {TEXT -1 226 "\nAny collection of data val ues can be expressed graphically, by drawing one cell for each occurre nces of a particular data value at its location on the x-axis, stackin g them if there are multiple occurrences at the same value." }}{PARA 0 "" 0 "" {TEXT -1 0 "" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 40 "Ge tAverages(data):\nGetPlots(data):\ndata;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 20 "BasicDataPlot(data);" }}}{PARA 0 "" 0 "" {TEXT -1 586 "\nThere is a box for each data value. The minimum is 3 and maximu m is 24. There are some gaps between the green boxes, where no data ex ists. And the boxes are stacked up when there are multiple data entrie s sharing the same value. This is a visual representation of the origi nal data distribution.\n\n\nThese distributions can be quite different . The next distribution is evenly distributed. Each data value has a f requency of 1, and every data value in the range is covered so there a re no gaps. This has an artificial look to it, but its good to conside r a wide variety of distributions.\n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 97 "GetAverages(evenly_distributed_data):\nGetPlots(evenl y_distributed_data):\nevenly_distributed_data;" }}}{EXCHG {PARA 0 "> \+ " 0 "" {MPLTEXT 1 0 39 "BasicDataPlot(evenly_distributed_data);" }}} {PARA 0 "" 0 "" {TEXT -1 113 "\n This distribution is quite different. It only consists of two distinct values, each repeated a number of ti mes." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 79 "GetAverages(two_extr emes_data):\nGetPlots(two_extremes_data):\ntwo_extremes_data;" }}} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 33 "BasicDataPlot(two_extremes_d ata);" }}}{PARA 0 "" 0 "" {TEXT -1 40 "\nHere is a run of the mill dis tribution." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 64 "GetAverages(bi g_data_set):\nGetPlots(big_data_set):\nbig_data_set;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 28 "BasicDataPlot(big_data_set);" }}}}{SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " }{TEXT 262 11 "2. The Mean" }}{PARA 0 "" 0 "" {TEXT -1 94 "\nThe mean is computed by adding up all the val ues and dividing by the number of data values. \n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 40 "GetAverages(data):\nGetPlots(data):\ndata;" } }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 215 "( data[1] + data[2] + \+ data[3] + data[4] + data[5] + \n data[6] + data[7] + data[8] + data[9] + data[10] + \n data[11] + data[12] + data[13] + data[14] \+ + data[15] )/nops(data);\n`the mean ` = evalf(%,15);" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 3 "me;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 15 "MeanPlot(data);" }}}{PARA 0 "" 0 "" {TEXT -1 150 "\nT he mean is indicated by the small black triangle facing upward. This i s the balancing point for the data. \n\nLets look at some other distri butions.\n " }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 72 "GetAverages(e venly_distributed_data):\nGetPlots(evenly_distributed_data):" }}} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 34 "MeanPlot(evenly_distributed_ data);" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 50 "GetAverages(big_d ata_set):\nGetPlots(big_data_set):" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 23 "MeanPlot(big_data_set);" }}}}{SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " }{TEXT 263 11 "3. The Mode" }}{PARA 0 "" 0 "" {TEXT -1 42 "\nThe mode is the most common data value. \n" }}{EXCHG {PARA 0 "> \+ " 0 "" {MPLTEXT 1 0 40 "GetAverages(data):\nGetPlots(data):\ndata;" }} }{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 3 "mo;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 19 "MeanModePlot(data);" }}}{PARA 0 "" 0 "" {TEXT -1 357 "\nThe mode is indicated by the brown triangle pointing downwar d. The mean is still the black triangle pointing upward. We can see wh y the mean is 8. There are clearly three boxes stacked at 8 - more tha n any other value. Note that the mean and mode are quite different - a nd pretty much unrelated in general.\n\nSometimes there is a tie among several values.\n " }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 64 "GetAv erages(big_data_set):\nGetPlots(big_data_set):\nbig_data_set;" }}} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 3 "mo;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 27 "MeanModePlot(big_data_set);" }}}}{SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " }{TEXT 264 13 "4. The Median" }}{PARA 0 "" 0 "" {TEXT -1 249 "\nThe median, in theory, divides the data into two ha lves. Half of the data should be below the median, and half above. Of \+ course, there are complications when the median actually falls on a da ta value, but we won't go into every detail here and now." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 40 "GetAverages(data):\nGetPlots(data): \ndata;" }}}{PARA 0 "" 0 "" {TEXT -1 122 "\nLuckily this data is alrea dy sorted for us. There are 15 data values. The middle value, the 8th \+ of 15, is the number 13." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 19 "`the median ` = q2;" }}}{PARA 0 "" 0 "" {TEXT -1 107 "\nWe'll postpon e plotting this just for a moment, because medians are a part of the b ox plot that follows.\n " }}}{SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " } {TEXT 265 27 "5. Quartiles & the Box Plot" }}{PARA 0 "" 0 "" {TEXT -1 196 "\nMedians divide the data into two halves. Quartiles divide the d ata into four quarters. Again, there are complications when the quarti les actually fall on data values, but this is the rough idea.\n" }} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 34 "GetAverages(data):\nGetPlots (data):" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 11 "q1; q2; q3;" }}} {PARA 0 "" 0 "" {TEXT -1 599 "\nThus, the data should be distributed i n this way :\n\n - 1/4 of the data is less than or equal to Q1\n - 1/2 of the data is less than or equal to Q2 = medi an\n - 3/4 of the data is less than or equal to Q3\n\nThis lends itself to what is called a box plot. Such a plot depends on the three quartiles, and the minimum value and maximum value. We draw a b ox from the first quartile to the second, and from the second to the t hird. Then we draw \"whiskers\" (lines) from the minimum to quartile 1 , and from quartile 3 to the maximum. A picture will demonstrate the c oncept.\n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 19 "QuartilePlot(da ta);" }}}{PARA 0 "" 0 "" {TEXT -1 551 "\nThe green data boxes at the b ottom are the original data, just as they were before. Also the black \+ triangle indicates the mean just as before. The red box is the box fro m Q1 to Q2, and the orange box is the box from Q2 to Q3. The median i s indicated by a white triangle facing downward. As you can see, it is exactly at the place where the red and orange boxes meet. \n\nThe blu e line above is the range of the data - the difference in the largest \+ and smallest values. The red line is called the interquartile range. I t is the distance from Q1 to Q3.\n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 51 "`range` = mx - mn;\n`interquartile range` = q3 - q1; " }}}{PARA 0 "" 0 "" {TEXT -1 154 "\nIf we take the ratio of the inter quartile range to the range, we get an idea of how closely bunched the middle 50% of the data is compared to the rest. " }}{EXCHG {PARA 0 " > " 0 "" {MPLTEXT 1 0 20 "(q3 - q1)/(mx - mn);" }}}{EXCHG {PARA 0 "> \+ " 0 "" {MPLTEXT 1 0 0 "" }}}{PARA 0 "" 0 "" {TEXT -1 193 " \nHere is a nother example ...with evenly distributed data, which will yield symme tric results. Note the mean and median agree - which is usually not th e case for more randomly distributed data." }}{EXCHG {PARA 0 "> " 0 " " {MPLTEXT 1 0 72 "GetAverages(evenly_distributed_data):\nGetPlots(eve nly_distributed_data):" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 33 "B oxPlot(evenly_distributed_data);" }}}{PARA 0 "" 0 "" {TEXT -1 108 "\nH ere is another example with less orderly data. Note that the red box a nd left box are not the same width. " }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 50 "GetAverages(big_data_set):\nGetPlots(big_data_set):" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 22 "BoxPlot(big_data_set);" } }}{PARA 0 "" 0 "" {TEXT -1 2 "\n " }}}{SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " }{TEXT 266 10 "6. Deciles" }}{PARA 0 "" 0 "" {TEXT -1 209 "\n Deciles are somewhat similar to quartiles. Quartiles attempt to break \+ the data set into four quarters with equal numbers of data elements. D eciles attempt to break the data into ten equal sized subgroupings.\n " }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 40 "GetAverages(data):\nGetP lots(data):\ndata;" }}}{PARA 0 "" 0 "" {TEXT -1 183 "\nIf we include t he minimum value and the maximum value, we get these values for the de ciles. There is approximately 10% of the data in between any two conse qutive numbers that follow." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 39 "for i from 0 to 10 do\n dec||i; od;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 14 "DecPlot(data);" }}}{PARA 0 "" 0 "" {TEXT -1 202 "\nThe black lines indicate the 9 deciles along with min and max - to \+ create ten regions which contain roughly the same number of data value s. Since this is small distribution, there are some rough edges." }} {PARA 0 "" 0 "" {TEXT -1 136 "\nLets look at another distribution. Thi s data is evenly distributed, so we would expect to find the deciles a nd quartiles of equal size." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 97 "GetAverages(evenly_distributed_data):\nGetPlots(evenly_distributed _data):\nevenly_distributed_data;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 39 "for i from 0 to 10 do\n dec||i; od;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 33 "DecPlot(evenly_distributed_data);" }}}{PARA 0 "" 0 "" {TEXT -1 56 "\nA larger and less ordered distributi on is more typical." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 64 "GetAv erages(big_data_set):\nGetPlots(big_data_set):\nbig_data_set;" }}} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 39 "for i from 0 to 10 do\n \+ dec||i; od;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 22 "DecPlot(big _data_set);" }}}{PARA 0 "" 0 "" {TEXT -1 296 "\nThe fact that the data is more concentrated at the left (the smaller numbers) is evident in \+ several ways. The red box for Q1 to Q2 is smaller, than the orange box from Q2 to Q3. In the same way, you'll see the decile lines are close ly packed on the left, but more spread out on the right edge.\n " }}} {SECT 1 {PARA 4 "" 0 "" {TEXT -1 1 " " }{TEXT 267 25 "7. Comparing Des criptions" }}{PARA 0 "" 0 "" {TEXT -1 85 "\nLets see all of this infor mation in one diagram and compare the different averages.\n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 40 "GetAverages(data):\nGetPlots(data): \ndata;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 42 "`mean` = me; \n` median` = q2; \n`mode` = mo;" }}}{PARA 0 "" 0 "" {TEXT -1 223 "\nThe m ode is fairly independent since it a simply a measure of the most comm on data value. The mean and median are more representative of \"averag es\" of the data set. You can see in this example, they are not too fa r apart. " }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 30 "CompleteDescrip tionPlot(data);" }}}{PARA 0 "" 0 "" {TEXT -1 95 "\nHere is another exa mple, with the big data set which is concentrated near the smaller num bers." }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 64 "GetAverages(big_dat a_set):\nGetPlots(big_data_set):\nbig_data_set;" }}{PARA 0 "> " 0 "" {MPLTEXT 1 0 42 "`mean` = me; \n`median` = q2; \n`mode` = mo;" }}} {EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 38 "CompleteDescriptionPlot(big_ data_set);" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 0 "" }}}{PARA 0 " " 0 "" {TEXT -1 134 "\n\nLets compare these two data sets - which are \+ identical except for one value. Which of the mean, median, and mode wi ll be different.\n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 94 "data2A :=\n[40,40,50,50,50,50,60,60,60,70,70]:\n\ndata2B :=\n[10,40,50,50,50 ,50, 60,60,60,70,70]:\n" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 46 " data2A;\nGetAverages(data2A):\nGetPlots(data2A):" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 42 "`mean` = me; \n`median` = q2; \n`mode` = mo; " }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 0 "" }}}{EXCHG {PARA 0 "> \+ " 0 "" {MPLTEXT 1 0 46 "data2B;\nGetAverages(data2B):\nGetPlots(data2B ):" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 42 "`mean` = me; \n`media n` = q2; \n`mode` = mo;" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 32 " CompleteDescriptionPlot(data2B);" }}}{PARA 0 "" 0 "" {TEXT -1 338 "\nT he mode and median are the same. However, the mean is different. The m ean is affected by even a single extreme data value, where as the medi an is not. This is why the median is often used for statistics on inco mes and housing prices - because there are small numbers of extreme va lues which can skew the mean. Here is a little example\n\n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 238 "`Housing prices in thousands of do llars`;\nhousing_prices := [ 100, 120, 135, 150, 155, 170, 195, \n \+ 205, 220, 235, 247, 255, 275, 289, 1850\n \+ ];\nGetAverages( housing_prices):\nGetPlots( housing_prices): " }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 29 "`mean` = me; \n`median` = q2; " }}}{PARA 0 "" 0 "" {TEXT -1 69 "\nNote that the mean is great er than all but one of the data values! \n" }}{EXCHG {PARA 0 "> " 0 " " {MPLTEXT 1 0 22 "14/15: % = evalf(%,2);" }}}{PARA 0 "" 0 "" {TEXT -1 494 "\nAnother way of looking at it is that 93% of the data lies be low the mean, and only 7% lies above the mean! That doesn't seem to be a good representative average for this data set. If someone said how \+ many members of the community can afford an \"average priced\" house, \+ and you were to use the mean, then it might be that 93% of the people \+ can NOT afford an average priced house - if the average used were the \+ mean. This is why the median, 205, is much more indicative of an avera ge home price.\n" }}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 26 "PricePlo t(housing_prices);" }}}{EXCHG {PARA 0 "> " 0 "" {MPLTEXT 1 0 0 "" }}} {PARA 0 "" 0 "" {TEXT -1 0 "" }{TEXT -1 208 "\nClearly most of the dat a is on the far left - as are the 9 deciles, the 3 quartiles, and the \+ median. The mean is sticking out above all of the data values. And the single expensive home is on the far right.\n" }}}{PARA 0 "" 0 "" {TEXT 259 73 "\n \251 2002 Waterloo Maple Inc & Gregory Moore, all rights reserved." }}}{MARK "14" 0 }{VIEWOPTS 1 1 0 1 1 1803 1 1 1 1 }{PAGENUMBERS 0 1 2 33 1 1 }