scores分析文文档格式.docx

资源描述

scores分析文文档格式.docx

《scores分析文文档格式.docx》由会员分享，可在线阅读，更多相关《scores分析文文档格式.docx（10页珍藏版）》请在冰点文库上搜索。

scores分析文文档格式.docx

-read.table（"

scores.txt"

header=TRUE,row.names="

num"

head（scores）

str（scores）

#显示对象的结构

names（scores）

#显示每一列的名称

attach（scores）

#给出数据的概略信息

summary（scores）

summary（scores$math）

Min.1stQu.

Median

Mean3rdQu.

Max.

3.00

84.00

100.00

93.98

111.00

120.00

#1stQu.第一个4分位数

#选择某行

child<

-scores['

239'

]

sum（child）#求孩子的总分

[1]647.45

scores.class4<

-scores[class==4,]

#挑出4班的

#求每个班的平均数学成绩

aver<

-tapply（math,class,mean）

#画条曲线看看每个班的数学平均成绩

plot（aver,type='

ylim=c（80,100）,main="

各班数学成绩平均分"

xlab="

班级"

ylab="

数学平均分"

#生成数据的二维列联表

table（math,class）

class

math

12345678910

000000100

100000000

101000000

000101001

……………

#求4班每一科的平均成绩

subjects<

-c（'

chn'

math'

eng'

phy'

chem'

politics'

bio'

history'

geo'

pe'

sapply（scores[class==4,subjects],mean）

chn

math

eng

phy

chempolitics

bio

history

geo

83.1093897.2968885.6015654.3046934.6796942.4140641.7968836.7734444.2421954.31250

#求各班各科的平均成绩

aggregate（scores[subjects],by=list（class）,mean）

Group.1chnmathengphychempoliticsbiohistorygeope

1182.9838792.8225892.4516156.0451634.9516142.5725842.2983937.0322643.4435554.12903

2281.5775993.1724185.0172454.3948334.6077643.1379342.0517238.5948343.6034554.68966

3382.6206988.5862182.4655251.5948332.3319041.9913841.5948335.4913842.9741454.55172

4483.1093897.2968885.6015654.3046934.6796942.4140641.7968836.7734444.2421954.31250

5584.7410797.8928683.6696456.1000033.9151842.0535742.5714337.7767943.9642954.00000

6683.1440792.4067878.5762751.7406833.3686440.6440741.5593234.4661043.3728853.22034

7783.0172490.2931087.0086251.7517233.9827641.6379342.5172437.4655244.2241453.72414

8883.6583398.6500086.9166756.0233336.0791741.7000042.4083337.8416744.8166752.93333

9983.2096894.3548486.4838754.2951636.1169441.9435542.7258136.0725844.3064553.48387

101084.3387194.0806586.6677455.0854836.0121041.8629042.2258136.7822644.1451653.61290

#看看数学成绩的分布图

hist（math）

默认是按频数形成的直方图，设置freq参数可以画密度分布图。

hist（math,freq=FALSE）

lines（density（math）,col='

blue'

rug（jitter（math））

#轴须图，在轴旁边出现一些小线段，jitter是加噪函数

#核密度图

plot（density（chn）,col='

lwd=2）

red'

text（locator

（2）,c（"

语文"

数学"

））

#用鼠标拾取点，加上文本标注

#箱线图

boxplot（math）

boxplot.stats（math）#这个函数可以看到画出箱线图的具体的数据值

[1]4484100111120

[1]599

#有效样本点个数

$conf

[1]98.25696101.74304

$out

#离群值

[1]38423540433641403618263642324129182410203419103

[25]35203518229

#并列箱线图，看各班的数据分布情况

boxplot（math~class,data=scores）

lines（tapply（math,class,mean）,col='

type='

）#加上平均值

可以看出2班没有拖后腿的，4班有6个拖后腿的

#看看各科成绩的相关性

#可以看出：

数学和物理的相关性达88%，物理和化学成绩的相关性达86%。

cor（scores[,subjects]）

chem

politics

chn

1.00000000.65881260.73267780.65781720.62711550.72570030.69022820.69711450.64386620.2712453

0.65881261.0000000

0.8079255

0.8860467

0.8304643

0.70906810.79519870.77327910.77238530.3300249

eng

0.73267780.80792551.0000000

0.8170998

0.78687100.74989460.77310440.79482190.72654060.3159347

phy

0.65781720.88604670.81709981.0000000

0.8615512

0.70817170.8077105

0.81005990.78141520.3251233

chem

0.62711550.83046430.7868710

1.00000000.64413340.75787700.79932980.72648140.2769066

politics0.72570030.70906810.74989460.70817170.64413341.00000000.70711810.71928600.69069300.3033607

bio

0.69022820.79519870.77310440.80771050.75787700.70711811.00000000.77717350.83825250.2428081

history

0.69711450.77327910.79482190.81005990.79932980.71928600.77717351.00000000.77310440.2708434

geo

0.64386620.77238530.72654060.78141520.72648140.69069300.83825250.77310441.00000000.2605251

0.27124530.33002490.31593470.32512330.27690660.30336070.24280810.27084340.26052511.0000000

#画个图出来看看

pairs（scores[,subjects]）

#详细看看数学和物理的线性相关性

cor_phy_math<

-lm（phy~math,scores）

plot（math,phy）

abline（cor_phy_math）

cor_phy_math

#也就是说拟合公式为：

phy=0.5258*math+4.7374，为什么是0.52？

因为数学最高分为120，物理最高分为70

Call:

lm（formula=phy~math,data=scores）

Coefficients:

（Intercept）

4.7374

0.5258

展开阅读全文