RStudio AI Blog: BERT from R
Model
Layer
(type) Output Shape Param
# Connected to
(type) Output Shape Param
-
Token
(InputLayer) (None,
50
)
0
Input(InputLayer) (None,
-
Segment
(InputLayer) (None,
50
)
0
Input(InputLayer) (None,
-
Token
(TokenEmbedd [(None,
50
,
768
), (
23440896
Input
-
Token[
0
][
0
] Embedding(TokenEmbedd [(None,), (InputToken[][
-
Segment
(
Embedding
(None,
50
,
768
)
1536
Input
-
Segment[
0
][
0
] Embedding(None,InputSegment[][
-
Token
-
Segment
(
Add
(None,
50
,
768
)
0
Embedding
-
Token[
0
][
0
] EmbeddingToken(None,EmbeddingToken[][
-
Segment[
0
][
0
] EmbeddingSegment[][
-
Position
(
Position
(None,
50
,
768
)
38400
Embedding
-
Token
-
Segment[
0
][
0
] Embedding(None,EmbeddingTokenSegment[][
-
Dropout
(Dropout) (None,
50
,
768
)
0
Embedding
-
Position[
0
][
0
] Embedding(Dropout) (None,EmbeddingPosition[][
-
Norm
(
LayerNormali
(None,
50
,
768
)
1536
Embedding
-
Dropout[
0
][
0
] Embedding(None,EmbeddingDropout[][
-1
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Embedding
-
Norm[
0
][
0
] Encoder(None,EmbeddingNorm[][
-1
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-1
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-1
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Embedding
-
Norm[
0
][
0
] Encoder(None,EmbeddingNorm[][
-1
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-1
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-1
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-1
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-1
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-1
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-1
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-1
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-1
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-1
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-1
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-1
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-2
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-1
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-2
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-2
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-2
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-1
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-2
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-2
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-2
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-2
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-2
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-2
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-2
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-2
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-2
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-2
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-2
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-2
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-3
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-2
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-3
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-3
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-3
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-2
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-3
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-3
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-3
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-3
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-3
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-3
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-3
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-3
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-3
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-3
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-3
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-3
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-4
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-3
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-4
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-4
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-4
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-3
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-4
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-4
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-4
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-4
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-4
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-4
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-4
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-4
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-4
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-4
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-4
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-4
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-5
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-4
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-5
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-5
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-5
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-4
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-5
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-5
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-5
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-5
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-5
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-5
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-5
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-5
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-5
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-5
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-5
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-5
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-6
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-5
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-6
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-6
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-6
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-5
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-6
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-6
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-6
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-6
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-6
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-6
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-6
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-6
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-6
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-6
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-6
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-6
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-7
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-6
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-7
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-7
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-7
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-6
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-7
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-7
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-7
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-7
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-7
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-7
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-7
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-7
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-7
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-7
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-7
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-7
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-8
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-7
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-8
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-8
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-8
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-7
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-8
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-8
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-8
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-8
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-8
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-8
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-8
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-8
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-8
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-8
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-8
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-8
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-9
-
MultiHeadSelfAtten
(None,
50
,
768
)
2362368
Encoder
-8
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-9
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-9
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-9
-
MultiHeadSelfAtten
(None,
50
,
768
)
0
Encoder
-8
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-9
-
MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti
-9
-
MultiHeadSelfAtten
(None,
50
,
768
)
1536
Encoder
-9
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-9
-
FeedForward
(
FeedF
(None,
50
,
768
)
4722432
Encoder
-9
-
MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti
-9
-
FeedForward
-
Dropou
(None,
50
,
768
)
0
Encoder
-9
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-9
-
FeedForward
-
Add
(
A
(None,
50
,
768
)
0
Encoder
-9
-
MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti
-9
-
FeedForward
-
Dropout[ EncoderFeedForwardDropout[
-9
-
FeedForward
-
Norm
( (None,
50
,
768
)
1536
Encoder
-9
-
FeedForward
-
Add[
0
][
0
EncoderFeedForward( (None,EncoderFeedForwardAdd[][
-10
-
MultiHeadSelfAtte
(None,
50
,
768
)
2362368
Encoder
-9
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-10
-
MultiHeadSelfAtte
(None,
50
,
768
)
0
Encoder
-10
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-10
-
MultiHeadSelfAtte
(None,
50
,
768
)
0
Encoder
-9
-
FeedForward
-
Norm[
0
][ Encoder(None,EncoderFeedForwardNorm[][
-10
-
MultiHeadSelfAttent EncoderMultiHeadSelfAttent
-10
-
MultiHeadSelfAtte
(None,
50
,
768
)
1536
Encoder
-10
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-10
-
FeedForward
(
Feed
(None,
50
,
768
)
4722432
Encoder
-10
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-10
-
FeedForward
-
Dropo
(None,
50
,
768
)
0
Encoder
-10
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-10
-
FeedForward
-
Add
( (None,
50
,
768
)
0
Encoder
-10
-
MultiHeadSelfAttent EncoderFeedForward( (None,EncoderMultiHeadSelfAttent
-10
-
FeedForward
-
Dropout EncoderFeedForwardDropout
-10
-
FeedForward
-
Norm
(None,
50
,
768
)
1536
Encoder
-10
-
FeedForward
-
Add[
0
][ EncoderFeedForward(None,EncoderFeedForwardAdd[][
-11
-
MultiHeadSelfAtte
(None,
50
,
768
)
2362368
Encoder
-10
-
FeedForward
-
Norm[
0
] Encoder(None,EncoderFeedForwardNorm[
-11
-
MultiHeadSelfAtte
(None,
50
,
768
)
0
Encoder
-11
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-11
-
MultiHeadSelfAtte
(None,
50
,
768
)
0
Encoder
-10
-
FeedForward
-
Norm[
0
] Encoder(None,EncoderFeedForwardNorm[
-11
-
MultiHeadSelfAttent EncoderMultiHeadSelfAttent
-11
-
MultiHeadSelfAtte
(None,
50
,
768
)
1536
Encoder
-11
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-11
-
FeedForward
(
Feed
(None,
50
,
768
)
4722432
Encoder
-11
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-11
-
FeedForward
-
Dropo
(None,
50
,
768
)
0
Encoder
-11
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-11
-
FeedForward
-
Add
( (None,
50
,
768
)
0
Encoder
-11
-
MultiHeadSelfAttent EncoderFeedForward( (None,EncoderMultiHeadSelfAttent
-11
-
FeedForward
-
Dropout EncoderFeedForwardDropout
-11
-
FeedForward
-
Norm
(None,
50
,
768
)
1536
Encoder
-11
-
FeedForward
-
Add[
0
][ EncoderFeedForward(None,EncoderFeedForwardAdd[][
-12
-
MultiHeadSelfAtte
(None,
50
,
768
)
2362368
Encoder
-11
-
FeedForward
-
Norm[
0
] Encoder(None,EncoderFeedForwardNorm[
-12
-
MultiHeadSelfAtte
(None,
50
,
768
)
0
Encoder
-12
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-12
-
MultiHeadSelfAtte
(None,
50
,
768
)
0
Encoder
-11
-
FeedForward
-
Norm[
0
] Encoder(None,EncoderFeedForwardNorm[
-12
-
MultiHeadSelfAttent EncoderMultiHeadSelfAttent
-12
-
MultiHeadSelfAtte
(None,
50
,
768
)
1536
Encoder
-12
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-12
-
FeedForward
(
Feed
(None,
50
,
768
)
4722432
Encoder
-12
-
MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent
-12
-
FeedForward
-
Dropo
(None,
50
,
768
)
0
Encoder
-12
-
FeedForward[
0
][
0
] EncoderFeedForward(None,EncoderFeedForward[][
-12
-
FeedForward
-
Add
( (None,
50
,
768
)
0
Encoder
-12
-
MultiHeadSelfAttent EncoderFeedForward( (None,EncoderMultiHeadSelfAttent
-12
-
FeedForward
-
Dropout EncoderFeedForwardDropout
-12
-
FeedForward
-
Norm
(None,
50
,
768
)
1536
Encoder
-12
-
FeedForward
-
Add[
0
][ EncoderFeedForward(None,EncoderFeedForwardAdd[][
Extract
(Extract) (None,
768
)
0
Encoder
-12
-
FeedForward
-
Norm[
0
]
(Extract) (None,EncoderFeedForwardNorm[
-
Dense
(Dense) (None,
768
)
590592
Extract[
0
][
0
] NSP(Dense) (None,Extract[][
output
(Dense) (None,
1
)
769
NSP
-
Dense[
0
][
0
]
(Dense) (None,NSPDense[][
:
109
,
128
,
193
Total params
:
109
,
128
,
193
Trainable params
-
trainable params
:
0
Nontrainable params