In the pursuit of knowledge, data is a collection of discrete units of information in a conceptual model that in their most basic forms convey quantity, quality, fact, statistics, or other basic units of meaning.
Data usually is organized into structures such as tables that provide additional context and meaning, and which may themselves be used as data in larger structures.
Data may be used as variables in a computational process.
Data may represent abstract ideas or concrete measurements.
Data is commonly used in scientific research, finance, and in virtually every other form of human organizational activity.
library(HistData)
*Galton’s data on the heights of parents and their children, by child
Galton[1:10,]
## parent child
## 1 70.5 61.7
## 2 68.5 61.7
## 3 65.5 61.7
## 4 64.5 61.7
## 5 64.0 61.7
## 6 67.5 62.2
## 7 67.5 62.2
## 8 67.5 62.2
## 9 66.5 62.2
## 10 66.5 62.2
dim(Galton)
## [1] 928 2
GaltonFamilies[1:10, ]
## family father mother midparentHeight children childNum gender childHeight
## 1 001 78.5 67.0 75.43 4 1 male 73.2
## 2 001 78.5 67.0 75.43 4 2 female 69.2
## 3 001 78.5 67.0 75.43 4 3 female 69.0
## 4 001 78.5 67.0 75.43 4 4 female 69.0
## 5 002 75.5 66.5 73.66 4 1 male 73.5
## 6 002 75.5 66.5 73.66 4 2 male 72.5
## 7 002 75.5 66.5 73.66 4 3 female 65.5
## 8 002 75.5 66.5 73.66 4 4 female 65.5
## 9 003 75.0 64.0 72.06 2 1 male 71.0
## 10 003 75.0 64.0 72.06 2 2 female 68.0
dim(GaltonFamilies)
## [1] 934 8
*Darwin’s Heights of Cross- and Self-fertilized Zea May Pairs
ZeaMays
## pair pot cross self diff
## 1 1 1 23.500 17.375 6.125
## 2 2 1 12.000 20.375 -8.375
## 3 3 1 21.000 20.000 1.000
## 4 4 2 22.000 20.000 2.000
## 5 5 2 19.125 18.375 0.750
## 6 6 2 21.500 18.625 2.875
## 7 7 3 22.125 18.625 3.500
## 8 8 3 20.375 15.250 5.125
## 9 9 3 18.250 16.500 1.750
## 10 10 3 21.625 18.000 3.625
## 11 11 3 23.250 16.250 7.000
## 12 12 4 21.000 18.000 3.000
## 13 13 4 22.125 12.750 9.375
## 14 14 4 23.000 15.500 7.500
## 15 15 4 12.000 18.000 -6.000
*Halley’s Life Table
HalleyLifeTable
## age deaths number ratio
## 1 1 238 1000 0.8550000
## 2 2 145 855 0.9333333
## 3 3 57 798 0.9523810
## 4 4 38 760 0.9631579
## 5 5 28 732 0.9699454
## 6 6 22 710 0.9746479
## 7 7 18 692 0.9826590
## 8 8 12 680 0.9852941
## 9 9 10 670 0.9865672
## 10 10 9 661 0.9878971
## 11 11 8 653 0.9892802
## 12 12 7 646 0.9907121
## 13 13 6 640 0.9906250
## 14 14 6 634 0.9905363
## 15 15 6 628 0.9904459
## 16 16 6 622 0.9903537
## 17 17 6 616 0.9902597
## 18 18 6 610 0.9901639
## 19 19 6 604 0.9900662
## 20 20 6 598 0.9899666
## 21 21 6 592 0.9898649
## 22 22 6 586 0.9880546
## 23 23 7 579 0.9896373
## 24 24 6 573 0.9895288
## 25 25 6 567 0.9876543
## 26 26 7 560 0.9875000
## 27 27 7 553 0.9873418
## 28 28 7 546 0.9871795
## 29 29 7 539 0.9851577
## 30 30 8 531 0.9849341
## 31 31 8 523 0.9847036
## 32 32 8 515 0.9844660
## 33 33 8 507 0.9842209
## 34 34 8 499 0.9819639
## 35 35 9 490 0.9816327
## 36 36 9 481 0.9812890
## 37 37 9 472 0.9809322
## 38 38 9 463 0.9805616
## 39 39 9 454 0.9801762
## 40 40 9 445 0.9797753
## 41 41 9 436 0.9793578
## 42 42 9 427 0.9765808
## 43 43 10 417 0.9760192
## 44 44 10 407 0.9754300
## 45 45 10 397 0.9748111
## 46 46 10 387 0.9741602
## 47 47 10 377 0.9734748
## 48 48 10 367 0.9727520
## 49 49 10 357 0.9691877
## 50 50 11 346 0.9682081
## 51 51 11 335 0.9671642
## 52 52 11 324 0.9660494
## 53 53 11 313 0.9648562
## 54 54 11 302 0.9668874
## 55 55 10 292 0.9657534
## 56 56 10 282 0.9645390
## 57 57 10 272 0.9632353
## 58 58 10 262 0.9618321
## 59 59 10 252 0.9603175
## 60 60 10 242 0.9586777
## 61 61 10 232 0.9568966
## 62 62 10 222 0.9549550
## 63 63 10 212 0.9528302
## 64 64 10 202 0.9504950
## 65 65 10 192 0.9479167
## 66 66 10 182 0.9450549
## 67 67 10 172 0.9418605
## 68 68 10 162 0.9382716
## 69 69 10 152 0.9342105
## 70 70 10 142 0.9225352
## 71 71 11 131 0.9160305
## 72 72 11 120 0.9083333
## 73 73 11 109 0.8990826
## 74 74 11 98 0.8979592
## 75 75 10 88 0.8863636
## 76 76 10 78 0.8717949
## 77 77 10 68 0.8529412
## 78 78 10 58 0.8620690
## 79 79 8 50 0.8200000
## 80 80 9 41 0.8292683
## 81 81 7 34 0.8235294
## 82 82 6 28 0.8214286
## 83 83 5 23 0.8695652
## 84 84 3 20 NA
load("BANK1.Rdata")
BANK1
## X Employee EducLev JobGrade YrHired YrBorn Gender YrsPrior PCJob Salary
## 1 1 1 3 1 92 69 Male 1 No 32.00
## 2 2 2 1 1 81 57 Female 1 No 39.10
## 3 3 3 1 1 83 60 Female 0 No 33.20
## 4 4 4 2 1 87 55 Female 7 No 30.60
## 5 5 5 3 1 92 67 Male 0 No 29.00
## 6 6 6 3 1 92 71 Female 0 No 30.50
## 7 7 7 3 1 91 68 Female 0 No 30.00
## 8 8 8 3 1 87 62 Male 2 No 27.00
## 9 9 9 1 1 91 33 Female 0 No 34.00
## 10 10 10 3 1 86 64 Female 0 No 29.50
## 11 11 11 3 1 86 61 Female 2 No 26.80
## 12 12 12 2 1 87 58 Female 8 No 31.30
## 13 13 13 2 1 86 58 Female 0 No 31.20
## 14 14 14 2 1 85 37 Female 6 No 34.70
## 15 15 15 3 1 91 62 Female 0 No 30.00
## 16 16 16 3 1 92 68 Female 0 No 31.00
## 17 17 17 3 1 89 65 Female 0 No 27.00
## 18 18 18 2 1 87 58 Female 9 No 29.60
## 19 19 19 3 1 90 51 Female 6 No 32.60
## 20 20 20 2 1 91 66 Female 3 No 29.60
## 21 21 21 3 1 91 59 Female 2 No 29.50
## 22 22 22 2 1 92 67 Male 3 No 31.00
## 23 23 23 1 1 90 50 Female 0 No 28.50
## 24 24 24 2 1 92 62 Male 4 No 26.70
## 25 25 25 3 1 92 71 Male 1 No 30.75
## 26 26 26 3 1 92 68 Male 1 No 29.50
## 27 27 27 2 1 79 35 Female 6 No 42.20
## 28 28 28 1 1 82 47 Female 0 No 37.60
## 29 29 29 1 1 83 55 Female 6 No 34.00
## 30 30 30 2 1 91 62 Female 7 No 33.00
## 31 31 31 1 1 88 60 Female 4 No 28.76
## 32 32 32 1 1 84 51 Female 0 No 35.40
## 33 33 33 3 1 92 52 Male 8 No 31.00
## 34 34 34 2 1 77 49 Female 2 No 38.80
## 35 35 35 2 1 81 53 Female 0 No 34.30
## 36 36 36 1 1 76 48 Female 0 No 35.00
## 37 37 37 3 1 92 70 Female 2 Yes 34.60
## 38 38 38 2 1 93 65 Female 4 No 28.50
## 39 39 39 1 1 84 55 Female 0 No 29.50
## 40 40 40 3 1 92 69 Male 2 No 30.50
## 41 41 41 3 1 90 63 Female 1 No 34.20
## 42 42 42 1 1 80 44 Female 0 No 43.60
## 43 43 43 5 1 88 60 Female 0 Yes 33.50
## 44 44 44 3 1 83 58 Female 1 No 33.00
## 45 45 45 1 1 77 51 Female 0 No 45.30
## 46 46 46 1 1 78 42 Male 3 No 38.80
## 47 47 47 1 1 85 55 Female 0 No 29.90
## 48 48 48 3 1 90 44 Male 10 No 31.20
## 49 49 49 1 1 80 53 Female 0 No 34.00
## 50 50 50 2 1 93 42 Female 0 No 30.45
## 51 51 51 1 1 92 37 Male 3 No 35.50
## 52 52 52 1 1 91 51 Female 10 Yes 34.00
## 53 53 53 2 1 88 64 Female 0 No 29.10
## 54 54 54 1 1 87 31 Female 0 No 29.65
## 55 55 55 3 1 80 48 Female 1 No 29.20
## 56 56 56 3 1 86 58 Female 0 Yes 29.80
## 57 57 57 2 1 79 49 Female 0 No 33.50
## 58 58 58 1 1 87 40 Female 0 No 34.00
## 59 59 59 1 1 86 56 Female 0 No 29.60
## 60 60 60 3 1 77 44 Female 0 No 34.00
## 61 61 61 2 2 92 58 Female 8 No 37.25
## 62 62 62 2 2 89 65 Male 3 No 33.00
## 63 63 63 3 2 91 69 Female 0 No 28.60
## 64 64 64 5 2 90 54 Female 1 Yes 36.00
## 65 65 65 3 2 91 61 Female 4 Yes 37.30
## 66 66 66 2 2 88 38 Male 4 No 29.90
## 67 67 67 1 2 84 42 Female 8 No 31.50
## 68 68 68 3 2 90 63 Female 4 Yes 41.40
## 69 69 69 1 2 78 51 Female 5 No 32.74
## 70 70 70 3 2 92 70 Male 1 No 33.50
## 71 71 71 1 2 90 64 Female 9 No 32.00
## 72 72 72 1 2 86 45 Female 0 No 30.80
## 73 73 73 5 2 92 48 Female 3 Yes 42.00
## 74 74 74 3 2 91 60 Male 0 No 34.00
## 75 75 75 2 2 79 52 Female 0 No 32.50
## 76 76 76 2 2 86 49 Female 10 No 31.70
## 77 77 77 5 2 92 60 Male 0 No 36.50
## 78 78 78 3 2 91 73 Male 0 No 33.00
## 79 79 79 2 2 87 37 Female 0 No 31.20
## 80 80 80 5 2 87 55 Female 0 No 34.00
## 81 81 81 3 2 89 65 Female 0 No 33.00
## 82 82 82 5 2 91 66 Female 4 No 33.90
## 83 83 83 1 2 92 64 Female 9 Yes 39.00
## 84 84 84 2 2 83 43 Male 18 No 34.92
## 85 85 85 5 2 92 62 Male 5 No 39.00
## 86 86 86 1 2 87 46 Female 0 No 34.00
## 87 87 87 2 2 89 61 Female 7 No 31.90
## 88 88 88 5 2 92 69 Male 1 No 37.00
## 89 89 89 5 2 91 67 Male 0 No 34.00
## 90 90 90 5 2 92 60 Female 2 No 36.40
## 91 91 91 1 2 80 48 Female 1 Yes 38.20
## 92 92 92 1 2 80 44 Female 0 No 35.30
## 93 93 93 3 2 92 69 Male 2 No 34.50
## 94 94 94 3 2 83 62 Female 0 No 30.50
## 95 95 95 4 2 93 68 Male 2 No 30.00
## 96 96 96 5 2 87 61 Female 0 Yes 37.30
## 97 97 97 4 2 90 66 Female 0 No 40.20
## 98 98 98 3 2 90 68 Male 0 No 35.50
## 99 99 99 1 2 84 52 Female 0 No 35.00
## 100 100 100 3 2 91 59 Female 3 No 38.00
## 101 101 101 1 2 86 57 Female 0 No 35.30
## 102 102 102 2 2 81 35 Female 0 No 34.10
## 103 103 103 3 3 91 52 Female 5 Yes 43.20
## 104 104 104 2 3 80 47 Female 5 No 36.10
## 105 105 105 5 3 88 63 Female 3 No 34.60
## 106 106 106 3 3 90 64 Male 0 No 36.00
## 107 107 107 5 3 88 66 Female 2 No 36.20
## 108 108 108 3 3 88 60 Female 0 No 37.50
## 109 109 109 3 3 91 58 Female 12 No 41.00
## 110 110 110 2 3 85 52 Female 0 No 35.60
## 111 111 111 3 3 90 62 Female 5 No 39.80
## 112 112 112 4 3 84 37 Female 4 Yes 41.30
## 113 113 113 3 3 86 51 Female 7 No 42.50
## 114 114 114 3 3 91 58 Female 8 Yes 45.80
## 115 115 115 5 3 90 47 Female 6 No 34.90
## 116 116 116 5 3 91 69 Male 0 No 41.50
## 117 117 117 3 3 90 70 Female 0 No 38.00
## 118 118 118 4 3 89 57 Female 0 No 35.00
## 119 119 119 3 3 89 54 Female 0 No 40.00
## 120 120 120 3 3 90 66 Male 0 No 36.00
## 121 121 121 2 3 86 36 Female 0 No 33.70
## 122 122 122 2 3 90 66 Male 4 No 36.30
## 123 123 123 3 3 92 68 Female 2 Yes 38.00
## 124 124 124 5 3 91 65 Female 0 No 39.50
## 125 125 125 2 3 88 61 Female 5 No 36.30
## 126 126 126 3 3 87 60 Female 2 No 32.50
## 127 127 127 2 3 83 45 Female 6 No 37.00
## 128 128 128 5 3 92 62 Female 1 No 32.60
## 129 129 129 3 3 91 69 Female 0 No 36.00
## 130 130 130 5 3 92 59 Female 0 No 35.00
## 131 131 131 5 3 92 62 Female 5 Yes 43.60
## 132 132 132 3 3 87 48 Female 0 No 33.80
## 133 133 133 1 3 74 44 Female 0 No 35.30
## 134 134 134 1 3 79 53 Female 6 No 42.40
## 135 135 135 5 3 90 64 Male 0 No 39.50
## 136 136 136 2 3 70 33 Female 10 No 43.50
## 137 137 137 5 3 89 49 Male 1 No 42.00
## 138 138 138 3 3 74 35 Female 9 No 40.30
## 139 139 139 4 3 89 52 Male 5 No 44.00
## 140 140 140 1 3 70 42 Female 2 No 40.66
## 141 141 141 3 3 82 57 Female 1 No 39.70
## 142 142 142 5 3 89 56 Female 5 No 45.00
## 143 143 143 5 3 88 60 Female 0 No 43.90
## 144 144 144 4 3 87 55 Female 3 No 38.00
## 145 145 145 5 3 90 63 Female 3 No 39.02
## 146 146 146 5 4 90 62 Male 3 No 44.50
## 147 147 147 5 4 91 65 Male 1 No 41.00
## 148 148 148 5 4 89 58 Male 3 No 44.00
## 149 149 149 5 4 89 65 Male 0 No 44.00
## 150 150 150 5 4 90 63 Female 4 No 42.50
## 151 151 151 5 4 88 58 Female 3 No 40.26
## 152 152 152 5 4 90 66 Male 1 No 44.50
## 153 153 153 1 4 82 45 Female 9 No 35.50
## 154 154 154 5 4 89 66 Male 0 No 42.50
## 155 155 155 5 4 88 63 Female 0 No 44.00
## 156 156 156 5 4 89 64 Male 2 No 45.00
## 157 157 157 2 4 80 48 Female 4 No 44.40
## 158 158 158 3 4 78 51 Female 0 No 38.00
## 159 159 159 5 4 91 68 Male 0 No 41.80
## 160 160 160 1 4 72 40 Male 0 No 45.50
## 161 161 161 3 4 90 43 Male 4 No 42.50
## 162 162 162 5 4 92 45 Female 12 No 44.00
## 163 163 163 3 4 76 36 Female 8 Yes 54.30
## 164 164 164 3 4 69 48 Female 0 No 44.80
## 165 165 165 3 4 89 52 Male 4 No 47.00
## 166 166 166 5 4 80 54 Female 0 No 43.80
## 167 167 167 1 4 83 56 Female 4 Yes 48.00
## 168 168 168 5 4 86 56 Female 0 No 42.70
## 169 169 169 3 4 81 55 Female 1 Yes 48.50
## 170 170 170 3 4 79 46 Female 0 No 42.00
## 171 171 171 2 4 79 42 Female 1 No 45.50
## 172 172 172 3 4 84 58 Female 0 No 44.50
## 173 173 173 2 4 82 55 Female 2 No 51.20
## 174 174 174 5 5 88 61 Male 0 No 47.50
## 175 175 175 5 5 87 58 Female 0 No 44.50
## 176 176 176 5 5 87 64 Male 0 No 47.00
## 177 177 177 5 5 89 54 Male 10 No 47.00
## 178 178 178 3 5 78 49 Female 4 No 43.10
## 179 179 179 5 5 87 58 Male 2 No 49.00
## 180 180 180 5 5 87 62 Male 0 No 48.50
## 181 181 181 3 5 87 60 Female 5 No 45.00
## 182 182 182 5 5 79 46 Female 5 No 52.50
## 183 183 183 5 5 89 62 Male 2 No 47.50
## 184 184 184 5 5 88 64 Male 0 No 48.00
## 185 185 185 5 5 87 46 Male 4 No 46.50
## 186 186 186 5 5 83 55 Female 2 No 61.50
## 187 187 187 5 5 86 58 Female 2 No 50.00
## 188 188 188 5 5 83 49 Female 2 No 61.80
## 189 189 189 4 5 79 52 Female 0 No 43.00
## 190 190 190 5 5 84 59 Male 1 No 47.00
## 191 191 191 5 5 86 58 Female 6 No 58.50
## 192 192 192 5 5 79 55 Male 7 No 55.00
## 193 193 193 3 5 71 41 Male 3 No 57.00
## 194 194 194 5 5 78 38 Male 1 No 57.00
## 195 195 195 5 6 81 46 Male 0 No 60.00
## 196 196 196 3 6 82 54 Male 0 No 60.00
## 197 197 197 5 6 76 36 Male 4 No 59.00
## 198 198 198 5 6 83 44 Male 0 No 60.00
## 199 199 199 5 6 75 50 Male 0 No 65.00
## 200 200 200 5 6 75 39 Male 1 No 52.00
## 201 201 201 5 6 73 38 Male 0 No 58.00
## 202 202 202 4 6 74 42 Male 0 No 60.00
## 203 203 203 5 6 56 30 Male 0 No 74.00
## 204 204 204 3 6 61 35 Male 0 No 95.00
## 205 205 205 5 6 59 34 Male 0 No 97.00
## 206 206 206 5 6 63 33 Male 0 No 88.00
## 207 207 207 5 6 60 36 Male 0 No 94.00
## 208 208 208 5 6 62 33 Female 0 No 30.00
Sample size of the data is not too large.
Dimension of the data is relative small.
Sampling frequency is relative low.
Data is relative homogeneous
“Little Data”!!!
This dataset is a snapshot of most of the new news content published online over one week. It covers the 7 Day-period of August 24 through August 30 for the years 2017 and 2018.
Year 2017: 1,398,431 ; Year 2018: 1,912,872
It includes approximately 3.3 million articles, with 20,000 news sources and 20+ languages.
This dataset has just four fields (as per the column metadata):
publish_time - earliest known time of the url appearing online in yyyyMMddHHmm format, IST timezone
feed_code - unique identifier for the publisher or domain
source_url - url of the article
headline_text - Headline of the article (UTF8, Any possible languages)
(https://www.kaggle.com/datasets/therohk/global-news-week)
This is a public domain speech dataset consisting of 13,100 short audio clips of a single speaker reading passages from 7 non-fiction books. A transcription is provided for each clip. Clips vary in length from 1 to 10 seconds and have a total length of approximately 24 hours.
The texts were published between 1884 and 1964, and are in the public domain. The audio was recorded in 2016-17 by the LibriVox project and is also in the public domain.
(https://keithito.com/LJ-Speech-Dataset/)
Sample size of the data is large, or even huge
Dimension of the data is relative large, and sometimes it is much larger than the sample size.
Sampling frequency is various, from low frequency to high frequency.
Data is relative heterogeneous
“Big Data”!!!
Pay_trend<-read.csv("HKData/2020_Gross_Pay_Trend_Indicators_(EN).csv")
attributes(Pay_trend)
## $names
## [1] "Year"
## [2] "No..of.surveyed.companies"
## [3] "No..of.surveyed.employees"
## [4] "Lower.Salary.Band....Basic.Pay.Indicator......a."
## [5] "Lower.Salary.Band...Additional.Pay.Indicator.......b."
## [6] "Lower.Salary.Band....Gross.Pay.Trend.Indicator......a.....b."
## [7] "Middle.Salary.Band...Basic.Pay.Indicator.......c."
## [8] "Middle.Salary.Band...Additional.Pay.Indicator.......d."
## [9] "Middle.Salary.Band...Gross.Pay.Trend.Indicator......c.....d."
## [10] "Upper.Salary.Band....Basic.Pay.Indicator......e."
## [11] "Upper.Salary.Band...Additional.Pay.Indicator.......f."
## [12] "Upper.Salary.Band....Gross.Pay.Trend.Indicator.......e....f."
##
## $class
## [1] "data.frame"
##
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
par(mfrow=c(1,1))
plot(Pay_trend[,c(1,4)], type="b", ylim=c(1,7), main="Payment Trend in Hong Kong", ylab="Basic Salary", col=2, pch=1)
lines(Pay_trend[,c(1,7)], type="b",col=4, pch=2)
lines(Pay_trend[,c(1,10)], type="b",col=7, pch=3)
legend(2016,7, c("salary of low band ", "salaryof middle band","salary of upper band"), cex=0.8, col=c(2,4,7), pch=1:3)
https://chp-dashboard.geodata.gov.hk/covid-19/en.html
coronvirus_case <- read.csv("HKData/Coronvirus/latest_situation_of_reported_cases_covid_19_eng.csv")
names(coronvirus_case)
## [1] "As.of.date"
## [2] "As.of.time"
## [3] "Number.of.confirmed.cases"
## [4] "Number.of.ruled.out.cases"
## [5] "Number.of.cases.still.hospitalised.for.investigation"
## [6] "Number.of.cases.fulfilling.the.reporting.criteria"
## [7] "Number.of.death.cases"
## [8] "Number.of.discharge.cases"
## [9] "Number.of.probable.cases"
## [10] "Number.of.hospitalised.cases.in.critical.condition"
## [11] "Number.of.cases.tested.positive.for.SARS.CoV.2.virus.by.nucleic.acid.tests"
## [12] "Number.of.cases.tested.positive.for.SARS.CoV.2.virus.by.rapid.antigen.tests"
dim(coronvirus_case)
## [1] 951 12
number_case<-coronvirus_case[2:725,3]-coronvirus_case[1:724,3]
par(mfrow=c(1,2),oma = c(0, 0, 4, 0))
plot(as.Date.character(coronvirus_case[,1],"%d/%m/%Y"), coronvirus_case[,3],type="b", xlab="Time: 8 Jan. 2020 to 31 Dec. 2021",ylab="Total number of confirmed cases", pch=1, col="blue")
plot(as.Date.character(coronvirus_case[2:725,1],"%d/%m/%Y"), number_case,type="b", xlab="Time: 8 Jan. 2020 to 31 Dec. 2021`",ylab="Nnumber of confirmed cases every day", pch=1, col="blue")
mtext("Hong Kong Coronvirus Confirmed Cases", outer = TRUE, cex = 2)
X_HSI_day <- read.csv("HKData/Yahoo Finance/^HSI_day.csv")
X_HSI_day<-na.omit(X_HSI_day)
names(X_HSI_day)
## [1] "Date" "Open" "High" "Low" "Close" "Adj.Close"
## [7] "Volume"
dim(X_HSI_day)
## [1] 3704 7
X_HSI_week <- read.csv("HKData/Yahoo Finance/^HSI_week.csv")
X_HSI_week<-na.omit(X_HSI_week)
names(X_HSI_week)
## [1] "Date" "Open" "High" "Low" "Close" "Adj.Close"
## [7] "Volume"
dim(X_HSI_week)
## [1] 783 7
X_HSI_month <- read.csv("HKData/Yahoo Finance/^HSI_month.csv")
X_HSI_month<-na.omit(X_HSI_month)
names(X_HSI_month)
## [1] "Date" "Open" "High" "Low" "Close" "Adj.Close"
## [7] "Volume"
dim(X_HSI_month)
## [1] 180 7
Days <- length(X_HSI_day$Close)
ratio_days<-log(as.numeric(X_HSI_day$Close[2:Days])/as.numeric(X_HSI_day$Close[1:(Days-1)]))
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
weeks <- length(X_HSI_week$Close)
ratio_weeks<-log(as.numeric(X_HSI_week$Close[2:weeks])/as.numeric(X_HSI_week$Close[1:(weeks-1)]))
months <- length(X_HSI_month$Close)
ratio_months<-log(as.numeric(X_HSI_month$Close[2:months])/as.numeric(X_HSI_month$Close[1:(months-1)]))
par(mfrow=c(3,2),oma = c(0, 0, 4, 0))
plot(as.Date(X_HSI_day$Date[1:Days],"%m/%d/%Y"), as.numeric(X_HSI_day$Close) ,type="l", xlab="Days: 3 Jan. 2006 to 30 Dec. 2020",ylab="Hang Seng Index", pch=1, col="blue")
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
plot(as.Date(X_HSI_day$Date[2:Days],"%m/%d/%Y"), ratio_days,type="l", xlab="Days: 4 Jan. 2006 to 30 Dec. 2020",ylab="Hang Seng Index return ratio", pch=1, col="blue")
plot(as.Date(X_HSI_week$Date[1:weeks],"%m/%d/%Y"), as.numeric(X_HSI_week$Close) ,type="l", xlab="Weeks: 3 Jan. 2006 to 30 Dec. 2020",ylab="Hang Seng Index", pch=1, col="blue")
plot(as.Date(X_HSI_week$Date[2:weeks],"%m/%d/%Y"), ratio_weeks,type="l", xlab="Weeks: 4 Jan. 2006 to 30 Dec. 2020",ylab="Hang Seng Index return ratio", pch=1, col="blue")
plot(as.Date(X_HSI_month$Date[1:months],"%m/%d/%Y"), as.numeric(X_HSI_month$Close) ,type="l", xlab="Months: 3 Jan. 2006 to 30 Dec. 2020",ylab="Hang Seng Index", pch=1, col="blue")
plot(as.Date(X_HSI_month$Date[2:months],"%m/%d/%Y"), ratio_months,type="l", xlab="Months: 4 Jan. 2006 to 30 Dec. 2020",ylab="Hang Seng Index return ratio", pch=1, col="blue")
mtext("Hang Seng Index 3/1/2006 to 30/12/2020", outer = TRUE, cex = 2)
X0005_HK_HSBC_day <- read.csv("HKData/Yahoo Finance/0005.HK_HSBC_day.csv")
X0005_HK_HSBC_day<-na.omit(X0005_HK_HSBC_day)
names(X0005_HK_HSBC_day)
## [1] "Date" "Open" "High" "Low" "Close" "Adj.Close"
## [7] "Volume"
dim(X0005_HK_HSBC_day)
## [1] 3708 7
X0005_HK_HSBC_week <- read.csv("HKData/Yahoo Finance/0005.HK_HSBC_week.csv")
X0005_HK_HSBC_week<-na.omit(X0005_HK_HSBC_week)
names(X0005_HK_HSBC_week)
## [1] "Date" "Open" "High" "Low" "Close" "Adj.Close"
## [7] "Volume"
dim(X0005_HK_HSBC_week)
## [1] 784 7
Days_HSBC <- length(X0005_HK_HSBC_day$Close)
HSBC_ratio_days<-log(as.numeric(X0005_HK_HSBC_day$Close[2:Days_HSBC])/as.numeric(X0005_HK_HSBC_day$Close[1:(Days_HSBC-1)]))
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
weeks_HSBC <- length(X0005_HK_HSBC_week$Close)
HSBC_ratio_weeks<-log(as.numeric(X0005_HK_HSBC_week$Close[2:weeks_HSBC])/as.numeric(X0005_HK_HSBC_week$Close[1:(weeks_HSBC-1)]))
par(mfrow=c(2,2),oma = c(0, 0, 4, 0))
plot(as.Date(X0005_HK_HSBC_day$Date[1:Days_HSBC],"%m/%d/%Y"), as.numeric(X0005_HK_HSBC_day$Close) ,type="l", xlab="Days: 3 Jan. 2006 to 30 Dec. 2020",ylab="HSBC 0005 Stock daily close price", pch=1, col="blue")
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
plot(as.Date(X0005_HK_HSBC_day$Date[2:Days_HSBC],"%m/%d/%Y"), HSBC_ratio_days,type="l", xlab="Days: 4 Jan. 2006 to 30 Dec. 2020",ylab="Stock HK0005 HSBC daily return ratio", pch=1, col="blue")
plot(as.Date(X0005_HK_HSBC_week$Date[1:weeks_HSBC],"%m/%d/%Y"), as.numeric(X0005_HK_HSBC_week$Close) ,type="l", xlab="Days: 3 Jan. 2006 to 30 Dec. 2020",ylab="HSBC 0005 Stock weekily close price", pch=1, col="blue")
plot(as.Date(X0005_HK_HSBC_week$Date[2:weeks_HSBC],"%m/%d/%Y"), HSBC_ratio_weeks,type="l", xlab="Days: 4 Jan. 2006 to 30 Dec. 2020",ylab="Stock HK0005 HSBC weekily return ratio", pch=1, col="blue")
mtext("Close price of Stock HK0005 HSBC 3/1/2006 to 30/12/2020", outer = TRUE, cex = 2)