import torch

import moco.loader
import moco.builder

from moco import clipvit
import clip

encoder = clipvit.ClipViTEmbedder
model = moco.builder.MoCo(encoder,
    512, 65536, 0.999, 0.07, False)  
checkpoint = torch.load('checkpoint_0073.pth.tar', map_location="cpu")


# rename moco pre-trained keys
state_dict = checkpoint['state_dict']

# print(state_dict['module.encoder_q.model.visual.transformer.resblocks.0.attn.out_proj.weight'])
print(model.state_dict()['encoder_q.model.visual.transformer.resblocks.0.attn.out_proj.weight'])
exit()
for k in state_dict:
    if "encoder_q" in k:
        print(f'{k}')
        # print(f'{k}: {state_dict[k]}')
exit()
for k in list(state_dict.keys()):
    # retain only encoder_q up to before the embedding layer
    if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
        # remove prefix
        state_dict[k[len("module.encoder_q."):]] = state_dict[k]
    # delete renamed or unused k
    del state_dict[k]

model.load_state_dict(state_dict, strict=False)
print(model)

