-
Notifications
You must be signed in to change notification settings - Fork 0
/
pep_clivage_final.Rmd
278 lines (186 loc) · 6.69 KB
/
pep_clivage_final.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
---
title: "Peptidomique_clivages_final"
author: "Marion Hardy"
date: "2023-04-13"
output:
html_document:
toc: true
theme: spacelab
highlight: monochrome
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(message = FALSE, cache = TRUE, echo = FALSE, warning = F, cache.lazy = F)
library(tidyverse)
library(readxl)
```
```{r data loading}
data = read_xlsx("./data/Peptidomics36_peptides_site clivage.xlsm")
prot = read_xlsx("./data/BVDE_929_data.xlsx", sheet = "PeptideGroups")
list = read_xlsx("./data/Peptidomics36_peptides_site clivage.xlsm", sheet = "enriched_pep")
```
## Proteomic data
First, we will filter those proteomics data.
```{r, include=TRUE, echo=TRUE}
prot =
prot %>%
filter(Contaminant != "TRUE")
# Reduce the number of rows to keep only the ones of interest
prot = prot[,c(3:5,9,10:11,16:23,29:44)]
colnames(prot)[11:14] = c("F1_H2O2","F1_ctrl","F2_H2O2","F2_ctrl")
# Filter out peptides which appear in <4 conditions
cols = sapply(prot[15:30], grepl, pattern = "Not.*{2,}")
prot = prot[rowSums(cols) >= 3, ]
# Add the "oxidated" and "not oxidated" annotations
prot =
prot %>%
mutate(state = case_when(
grepl("xidation", Modifications) ~ "Oxidated",
!grepl("xidation", Modifications) ~ "Not oxidated"))
```
```{r, include=F}
# If we wanted to keep only the enriched in H2O2 and ctrl.
prot_f =
prot %>%
filter(`Abundance Ratio: (F1, H2O2) / (F1, Non R)` != 1 |
`Abundance Ratio: (F2, H2O2) / (F2, Non R)` != 1,
`Abundance Ratio Adj. P-Value: (F2, H2O2) / (F2, Non R)`<= 0.05|
`Abundance Ratio Adj. P-Value: (F1, H2O2) / (F1, Non R)`<= 0.05)
length(unique(prot_f$`Annotated Sequence`))
table(prot_f$`Abundance Ratio: (F2, H2O2) / (F2, Non R)`>1)
```
# Peptidomic data
Peptidomics experiment: Eluate peptides ( = epitopes) from IgG and analyze through MS.
Sophie has around 140 enriched peptides in her peptidomics analysis.
It's based on Nathalie's Log2Fc on the peptide abundances (from the peptidomic, not the proteomic data) and is filtered on pval < 0.05.
```{r}
head(data)[1:9] %>%
knitr::kable()
```
data\$\`Amino acid before\` I'm guessing this is the aa before the cleavage
data\$\`Amino acid after\` this is the aa following the cleaved sequence
## Check the aa at Cter and/or Nter
```{r, include=TRUE, echo=TRUE}
list =
data %>%
filter(Sequence%in%list$Enriched_pep)
pep =
list %>%
filter(`Amino acid before`%in%c("M","C")|
`Amino acid after`%in%c("M","C")|
`First amino acid`%in%c("M","C")|
`Last amino acid`%in%c("M","C"))
nrow(pep)
pep %>%
select(Sequence, `Amino acid before`,`Amino acid after`,`First amino acid`, `Last amino acid`,Proteins, `Oxidation (M) site IDs`) %>%
knitr::kable()
```
21/140 enriched peptides contain
- M or C in Nter
- and/or M or C in Cter
Just by curiosity, how many are found to be oxidized?
```{r, echo=TRUE, include=TRUE}
pepox =
pep %>%
filter(!is.na(`Oxidation (M) site IDs`))
pepox %>%
select(Sequence, `Amino acid before`,`Amino acid after`,`First amino acid`, `Last amino acid`,Proteins, `Oxidation (M) site IDs`) %>%
knitr::kable()
```
Only four of these peptide is found oxidized in the peptidomics data.
## Check if the proteins of those 21 peptides can be found in the proteomics data.
```{r, include=TRUE, echo=TRUE}
library(stringr)
pep =
pep %>%
mutate(Uniprot = str_match(pep$Proteins, "(^tr|sp)\\|(.*?)\\|")[,3])
common =
prot %>%
filter(`Master Protein Accessions`%in%pep$Uniprot)
dim(common)
length(unique(common$`Annotated Sequence`))
length(unique(common$`Master Protein Accessions`))
common %>%
select(`Annotated Sequence`,Modifications, `Master Protein Accessions`, `Abundance Ratio: (F2, H2O2) / (F2, Non R)`,
`Abundance Ratio Adj. P-Value: (F2, H2O2) / (F2, Non R)`,state) %>%
head(50) %>%
knitr::kable()
```
There are 190 (200 total) unique peptides from 11/21 proteins that were significantly enriched in H2O2 or control whose peptides can also be found to have a cystein or a methionine next to the cleave site.
## Which are those proteins?
```{r}
uni_pep = unique(common$`Master Protein Accessions`)
pep %>%
filter(Uniprot%in%uni_pep) %>%
select(Sequence, `Amino acid before`,`Amino acid after`,`First amino acid`, `Last amino acid`,Proteins, `Oxidation (M) site IDs`) %>%
knitr::kable()
```
## In which state are they found?
```{r, echo=TRUE, include=TRUE}
table(common$state)
```
## In which condition are they found?
```{r, echo=TRUE, include=TRUE}
table(common$state, common$`Abundance Ratio: (F2, H2O2) / (F2, Non R)`>1)
```
- 83 non oxidated enriched in control condition
- 9 oxidated encriched in control condition
- 54 non oxidated found in H2O2
- 17 oxidated found in H2O2
## To which oxidated protein correspond those oxidated peptides found in the proteomics data
```{r, include=TRUE}
H2O2enriched =
common %>%
filter(`Abundance Ratio: (F2, H2O2) / (F2, Non R)`>1,
state == "Oxidated") %>%
select(`Master Protein Accessions`)
ctrlenriched =
common %>%
filter(`Abundance Ratio: (F2, H2O2) / (F2, Non R)`<1,
state == "Oxidated") %>%
select(`Master Protein Accessions`)
uni_pepH2O2 = unique(H2O2enriched$`Master Protein Accessions`)
uni_pepctrl = unique(ctrlenriched$`Master Protein Accessions`)
```
### Oxidated in H2O2
```{r}
pep %>%
filter(Uniprot%in%uni_pepH2O2) %>%
select(Sequence, `Amino acid before`,`Amino acid after`,`First amino acid`, `Last amino acid`,Proteins, `Oxidation (M) site IDs`) %>%
knitr::kable()
```
### Oxidated in control condition
```{r}
pep %>%
filter(Uniprot%in%uni_pepctrl) %>%
select(Sequence, `Amino acid before`,`Amino acid after`,`First amino acid`, `Last amino acid`,Proteins, `Oxidation (M) site IDs`) %>%
knitr::kable()
```
## Taking a look at those peptides in the proteomics data:
### Oxidated in H2O2
```{r}
aa =
pep %>%
filter(Uniprot%in%uni_pepH2O2)
prot %>%
filter(`Master Protein Accessions`%in% aa$Uniprot,
!is.na(`Abundance Ratio Adj. P-Value: (F2, H2O2) / (F2, Non R)`)) %>%
group_by(`Master Protein Accessions`, state) %>%
summarise(`Master Protein Accessions`, state, meanH2O2_ctrl = mean(`Abundance Ratio: (F2, H2O2) / (F2, Non R)`)) %>%
distinct() %>%
knitr::kable()
```
### Oxidated in ctrl
```{r}
bb =
pep %>%
filter(Uniprot%in%uni_pepctrl)
prot %>%
filter(`Master Protein Accessions`%in% bb$Uniprot,
!is.na(`Abundance Ratio Adj. P-Value: (F2, H2O2) / (F2, Non R)`)) %>%
group_by(`Master Protein Accessions`, state) %>%
summarise(`Master Protein Accessions`, state, meanH2O2_ctrl = mean(`Abundance Ratio: (F2, H2O2) / (F2, Non R)`)) %>%
distinct() %>%
knitr::kable()
```