RStudio AI Blog: BERT from R

Model

Layer

(type) Output Shape Param

# Connected to

(type) Output Shape Param

-

Token

(InputLayer) (None,

50

)

0

Input(InputLayer) (None,

-

Segment

(InputLayer) (None,

50

)

0

Input(InputLayer) (None,

-

Token

(TokenEmbedd [(None,

50

,

768

), (

23440896

Input

-

Token[

0

][

0

] Embedding(TokenEmbedd [(None,), (InputToken[][

-

Segment

(

Embedding

(None,

50

,

768

)

1536

Input

-

Segment[

0

][

0

] Embedding(None,InputSegment[][

-

Token

-

Segment

(

Add

(None,

50

,

768

)

0

Embedding

-

Token[

0

][

0

] EmbeddingToken(None,EmbeddingToken[][

-

Segment[

0

][

0

] EmbeddingSegment[][

-

Position

(

Position

(None,

50

,

768

)

38400

Embedding

-

Token

-

Segment[

0

][

0

] Embedding(None,EmbeddingTokenSegment[][

-

Dropout

(Dropout) (None,

50

,

768

)

0

Embedding

-

Position[

0

][

0

] Embedding(Dropout) (None,EmbeddingPosition[][

-

Norm

(

LayerNormali

(None,

50

,

768

)

1536

Embedding

-

Dropout[

0

][

0

] Embedding(None,EmbeddingDropout[][

-1

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Embedding

-

Norm[

0

][

0

] Encoder(None,EmbeddingNorm[][

-1

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-1

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-1

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Embedding

-

Norm[

0

][

0

] Encoder(None,EmbeddingNorm[][

-1

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-1

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-1

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-1

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-1

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-1

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-1

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-1

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-1

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-1

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-1

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-1

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-2

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-1

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-2

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-2

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-2

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-1

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-2

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-2

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-2

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-2

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-2

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-2

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-2

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-2

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-2

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-2

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-2

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-2

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-3

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-2

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-3

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-3

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-3

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-2

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-3

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-3

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-3

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-3

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-3

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-3

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-3

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-3

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-3

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-3

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-3

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-3

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-4

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-3

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-4

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-4

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-4

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-3

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-4

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-4

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-4

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-4

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-4

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-4

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-4

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-4

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-4

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-4

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-4

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-4

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-5

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-4

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-5

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-5

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-5

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-4

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-5

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-5

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-5

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-5

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-5

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-5

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-5

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-5

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-5

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-5

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-5

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-5

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-6

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-5

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-6

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-6

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-6

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-5

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-6

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-6

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-6

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-6

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-6

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-6

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-6

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-6

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-6

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-6

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-6

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-6

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-7

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-6

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-7

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-7

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-7

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-6

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-7

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-7

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-7

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-7

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-7

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-7

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-7

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-7

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-7

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-7

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-7

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-7

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-8

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-7

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-8

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-8

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-8

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-7

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-8

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-8

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-8

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-8

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-8

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-8

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-8

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-8

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-8

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-8

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-8

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-8

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-9

-

MultiHeadSelfAtten

(None,

50

,

768

)

2362368

Encoder

-8

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-9

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-9

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-9

-

MultiHeadSelfAtten

(None,

50

,

768

)

0

Encoder

-8

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-9

-

MultiHeadSelfAttenti EncoderMultiHeadSelfAttenti

-9

-

MultiHeadSelfAtten

(None,

50

,

768

)

1536

Encoder

-9

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-9

-

FeedForward

(

FeedF

(None,

50

,

768

)

4722432

Encoder

-9

-

MultiHeadSelfAttenti Encoder(None,EncoderMultiHeadSelfAttenti

-9

-

FeedForward

-

Dropou

(None,

50

,

768

)

0

Encoder

-9

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-9

-

FeedForward

-

Add

(

A

(None,

50

,

768

)

0

Encoder

-9

-

MultiHeadSelfAttenti EncoderFeedForward(None,EncoderMultiHeadSelfAttenti

-9

-

FeedForward

-

Dropout[ EncoderFeedForwardDropout[

-9

-

FeedForward

-

Norm

( (None,

50

,

768

)

1536

Encoder

-9

-

FeedForward

-

Add[

0

][

0

EncoderFeedForward( (None,EncoderFeedForwardAdd[][

-10

-

MultiHeadSelfAtte

(None,

50

,

768

)

2362368

Encoder

-9

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-10

-

MultiHeadSelfAtte

(None,

50

,

768

)

0

Encoder

-10

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-10

-

MultiHeadSelfAtte

(None,

50

,

768

)

0

Encoder

-9

-

FeedForward

-

Norm[

0

][ Encoder(None,EncoderFeedForwardNorm[][

-10

-

MultiHeadSelfAttent EncoderMultiHeadSelfAttent

-10

-

MultiHeadSelfAtte

(None,

50

,

768

)

1536

Encoder

-10

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-10

-

FeedForward

(

Feed

(None,

50

,

768

)

4722432

Encoder

-10

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-10

-

FeedForward

-

Dropo

(None,

50

,

768

)

0

Encoder

-10

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-10

-

FeedForward

-

Add

( (None,

50

,

768

)

0

Encoder

-10

-

MultiHeadSelfAttent EncoderFeedForward( (None,EncoderMultiHeadSelfAttent

-10

-

FeedForward

-

Dropout EncoderFeedForwardDropout

-10

-

FeedForward

-

Norm

(None,

50

,

768

)

1536

Encoder

-10

-

FeedForward

-

Add[

0

][ EncoderFeedForward(None,EncoderFeedForwardAdd[][

-11

-

MultiHeadSelfAtte

(None,

50

,

768

)

2362368

Encoder

-10

-

FeedForward

-

Norm[

0

] Encoder(None,EncoderFeedForwardNorm[

-11

-

MultiHeadSelfAtte

(None,

50

,

768

)

0

Encoder

-11

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-11

-

MultiHeadSelfAtte

(None,

50

,

768

)

0

Encoder

-10

-

FeedForward

-

Norm[

0

] Encoder(None,EncoderFeedForwardNorm[

-11

-

MultiHeadSelfAttent EncoderMultiHeadSelfAttent

-11

-

MultiHeadSelfAtte

(None,

50

,

768

)

1536

Encoder

-11

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-11

-

FeedForward

(

Feed

(None,

50

,

768

)

4722432

Encoder

-11

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-11

-

FeedForward

-

Dropo

(None,

50

,

768

)

0

Encoder

-11

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-11

-

FeedForward

-

Add

( (None,

50

,

768

)

0

Encoder

-11

-

MultiHeadSelfAttent EncoderFeedForward( (None,EncoderMultiHeadSelfAttent

-11

-

FeedForward

-

Dropout EncoderFeedForwardDropout

-11

-

FeedForward

-

Norm

(None,

50

,

768

)

1536

Encoder

-11

-

FeedForward

-

Add[

0

][ EncoderFeedForward(None,EncoderFeedForwardAdd[][

-12

-

MultiHeadSelfAtte

(None,

50

,

768

)

2362368

Encoder

-11

-

FeedForward

-

Norm[

0

] Encoder(None,EncoderFeedForwardNorm[

-12

-

MultiHeadSelfAtte

(None,

50

,

768

)

0

Encoder

-12

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-12

-

MultiHeadSelfAtte

(None,

50

,

768

)

0

Encoder

-11

-

FeedForward

-

Norm[

0

] Encoder(None,EncoderFeedForwardNorm[

-12

-

MultiHeadSelfAttent EncoderMultiHeadSelfAttent

-12

-

MultiHeadSelfAtte

(None,

50

,

768

)

1536

Encoder

-12

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-12

-

FeedForward

(

Feed

(None,

50

,

768

)

4722432

Encoder

-12

-

MultiHeadSelfAttent Encoder(None,EncoderMultiHeadSelfAttent

-12

-

FeedForward

-

Dropo

(None,

50

,

768

)

0

Encoder

-12

-

FeedForward[

0

][

0

] EncoderFeedForward(None,EncoderFeedForward[][

-12

-

FeedForward

-

Add

( (None,

50

,

768

)

0

Encoder

-12

-

MultiHeadSelfAttent EncoderFeedForward( (None,EncoderMultiHeadSelfAttent

-12

-

FeedForward

-

Dropout EncoderFeedForwardDropout

-12

-

FeedForward

-

Norm

(None,

50

,

768

)

1536

Encoder

-12

-

FeedForward

-

Add[

0

][ EncoderFeedForward(None,EncoderFeedForwardAdd[][

Extract

(Extract) (None,

768

)

0

Encoder

-12

-

FeedForward

-

Norm[

0

]

(Extract) (None,EncoderFeedForwardNorm[

-

Dense

(Dense) (None,

768

)

590592

Extract[

0

][

0

] NSP(Dense) (None,Extract[][

output

(Dense) (None,

1

)

769

NSP

-

Dense[

0

][

0

]

(Dense) (None,NSPDense[][

:

109

,

128

,

193

Total params

:

109

,

128

,

193

Trainable params

-

trainable params

:

0

Nontrainable params