[
  {
    "model": "GFSA",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 24.001161011389005,
    "total_iterations": 11092,
    "early_stopping_runtime_hours": 16.445079668820448,
    "early_stopping_iterations": 7600,
    "batch_size": 128,
    "learning_rate": 0.00708730109428682,
    "gradient_clipping_global_norm": 214.48347086347096,
    "focal_loss_gamma": 0.5074656112807019,
    "num_noninitial_gfsa_states": 7,
    "epsilon_backtrack_is_stop": 0.06702947875218991
  },
  {
    "model": "GFSA",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 24.000898977222256,
    "total_iterations": 11090,
    "early_stopping_runtime_hours": 23.80612161852523,
    "early_stopping_iterations": 11000,
    "batch_size": 128,
    "learning_rate": 0.00708730109428682,
    "gradient_clipping_global_norm": 214.48347086347096,
    "focal_loss_gamma": 0.5074656112807019,
    "num_noninitial_gfsa_states": 7,
    "epsilon_backtrack_is_stop": 0.06702947875218991
  },
  {
    "model": "GFSA",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 13.605348123889325,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 10.402649175525777,
    "early_stopping_iterations": 382300,
    "batch_size": 8,
    "learning_rate": 0.00010384469993571317,
    "gradient_clipping_global_norm": 3204.7061293199913,
    "focal_loss_gamma": 0.28505972555567693,
    "num_noninitial_gfsa_states": 1,
    "epsilon_backtrack_is_stop": 0.018454532646119123
  },
  {
    "model": "GFSA",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 24.000120970277816,
    "total_iterations": 22359,
    "early_stopping_runtime_hours": 3.220196024456972,
    "early_stopping_iterations": 3000,
    "batch_size": 128,
    "learning_rate": 0.004339256363451823,
    "gradient_clipping_global_norm": 2.5045643331382963,
    "focal_loss_gamma": 4.175908243654459,
    "num_noninitial_gfsa_states": 7,
    "epsilon_backtrack_is_stop": 0.0010917445328478628
  },
  {
    "model": "GFSA",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 24.000069430556092,
    "total_iterations": 257536,
    "early_stopping_runtime_hours": 0.40072183520514104,
    "early_stopping_iterations": 4300,
    "batch_size": 32,
    "learning_rate": 0.0019508208104119368,
    "gradient_clipping_global_norm": 2.8897487355741074,
    "focal_loss_gamma": 1.9234488493059088,
    "num_noninitial_gfsa_states": 3,
    "epsilon_backtrack_is_stop": 0.0030745269466729043
  },
  {
    "model": "GFSA",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 24.00027181444461,
    "total_iterations": 39470,
    "early_stopping_runtime_hours": 20.248519164454155,
    "early_stopping_iterations": 33300,
    "batch_size": 32,
    "learning_rate": 0.009632891665138211,
    "gradient_clipping_global_norm": 95.91604241180926,
    "focal_loss_gamma": 1.2042422953492853,
    "num_noninitial_gfsa_states": 7,
    "epsilon_backtrack_is_stop": 0.01717690477102147
  },
  {
    "model": "GGNN dot-product",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 24.000005156944304,
    "total_iterations": 188841,
    "early_stopping_runtime_hours": 23.994794422985922,
    "early_stopping_iterations": 188800,
    "batch_size": 128,
    "learning_rate": 0.006861634138801846,
    "gradient_clipping_global_norm": 477.4342035504629,
    "focal_loss_gamma": 4.400615204652571,
    "position_embeddings": true,
    "edge_embedding_vectors": 0,
    "hidden_dim": 512,
    "num_ggnn_iterations": 5
  },
  {
    "model": "GGNN dot-product",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 5.916162806110937,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 5.887765224641605,
    "early_stopping_iterations": 497600,
    "batch_size": 8,
    "learning_rate": 0.0007607460105524131,
    "gradient_clipping_global_norm": 3.390671955012954,
    "focal_loss_gamma": 2.93125444062951,
    "position_embeddings": true,
    "edge_embedding_vectors": 0,
    "hidden_dim": 128,
    "num_ggnn_iterations": 11
  },
  {
    "model": "GGNN dot-product",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.0000719072225,
    "total_iterations": 189978,
    "early_stopping_runtime_hours": 19.947632642466143,
    "early_stopping_iterations": 157900,
    "batch_size": 128,
    "learning_rate": 0.006861634138801846,
    "gradient_clipping_global_norm": 477.4342035504629,
    "focal_loss_gamma": 4.400615204652571,
    "position_embeddings": true,
    "edge_embedding_vectors": 0,
    "hidden_dim": 512,
    "num_ggnn_iterations": 5
  },
  {
    "model": "GGNN dot-product",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 24.000183689722256,
    "total_iterations": 104409,
    "early_stopping_runtime_hours": 23.032673483226255,
    "early_stopping_iterations": 100200,
    "batch_size": 128,
    "learning_rate": 0.0005364487553473555,
    "gradient_clipping_global_norm": 2.9885432901853597,
    "focal_loss_gamma": 4.308820861466848,
    "position_embeddings": true,
    "edge_embedding_vectors": 0,
    "hidden_dim": 512,
    "num_ggnn_iterations": 10
  },
  {
    "model": "GGNN dot-product",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 5.815444095000159,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 4.111518975165112,
    "early_stopping_iterations": 353500,
    "batch_size": 8,
    "learning_rate": 0.0007607460105524131,
    "gradient_clipping_global_norm": 3.390671955012954,
    "focal_loss_gamma": 2.93125444062951,
    "position_embeddings": true,
    "edge_embedding_vectors": 0,
    "hidden_dim": 128,
    "num_ggnn_iterations": 11
  },
  {
    "model": "GGNN dot-product",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 24.000428855833096,
    "total_iterations": 346402,
    "early_stopping_runtime_hours": 23.62615181159198,
    "early_stopping_iterations": 341000,
    "batch_size": 8,
    "learning_rate": 9.890894868194445e-05,
    "gradient_clipping_global_norm": 17.08267782641385,
    "focal_loss_gamma": 4.4529469440247595,
    "position_embeddings": false,
    "edge_embedding_vectors": 1,
    "hidden_dim": 256,
    "num_ggnn_iterations": 12
  },
  {
    "model": "GGNN nodewise",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 24.00009323638909,
    "total_iterations": 185854,
    "early_stopping_runtime_hours": 7.851354658885236,
    "early_stopping_iterations": 60800,
    "batch_size": 8,
    "learning_rate": 0.0009205305103317118,
    "gradient_clipping_global_norm": 26.466177348485694,
    "focal_loss_gamma": 2.508256168633116,
    "position_embeddings": false,
    "edge_embedding_vectors": 1,
    "hidden_dim": 16,
    "num_ggnn_iterations": 12
  },
  {
    "model": "GGNN nodewise",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 24.0002944127776,
    "total_iterations": 44570,
    "early_stopping_runtime_hours": 23.531812112146756,
    "early_stopping_iterations": 43700,
    "batch_size": 32,
    "learning_rate": 0.0034535867024370805,
    "gradient_clipping_global_norm": 4.126813971997859,
    "focal_loss_gamma": 1.705949724514646,
    "position_embeddings": true,
    "edge_embedding_vectors": 0,
    "hidden_dim": 32,
    "num_ggnn_iterations": 9
  },
  {
    "model": "GGNN nodewise",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.00001111722251,
    "total_iterations": 282960,
    "early_stopping_runtime_hours": 21.170493267100433,
    "early_stopping_iterations": 249600,
    "batch_size": 8,
    "learning_rate": 0.0078381360054057,
    "gradient_clipping_global_norm": 7549.918825103126,
    "focal_loss_gamma": 2.3153017620955447,
    "position_embeddings": false,
    "edge_embedding_vectors": 0,
    "hidden_dim": 16,
    "num_ggnn_iterations": 11
  },
  {
    "model": "GGNN nodewise",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 24.000963966111414,
    "total_iterations": 184802,
    "early_stopping_runtime_hours": 18.987570112041475,
    "early_stopping_iterations": 146200,
    "batch_size": 8,
    "learning_rate": 0.0009205305103317118,
    "gradient_clipping_global_norm": 26.466177348485694,
    "focal_loss_gamma": 2.508256168633116,
    "position_embeddings": false,
    "edge_embedding_vectors": 1,
    "hidden_dim": 16,
    "num_ggnn_iterations": 12
  },
  {
    "model": "GGNN nodewise",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 24.000027439166022,
    "total_iterations": 202581,
    "early_stopping_runtime_hours": 23.528393331153325,
    "early_stopping_iterations": 198600,
    "batch_size": 8,
    "learning_rate": 0.005675627039066312,
    "gradient_clipping_global_norm": 33.72758221957592,
    "focal_loss_gamma": 0.26782419483617537,
    "position_embeddings": true,
    "edge_embedding_vectors": 0,
    "hidden_dim": 32,
    "num_ggnn_iterations": 8
  },
  {
    "model": "GGNN nodewise",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 24.000009190277694,
    "total_iterations": 286194,
    "early_stopping_runtime_hours": 0.9224515576603795,
    "early_stopping_iterations": 11000,
    "batch_size": 8,
    "learning_rate": 0.0078381360054057,
    "gradient_clipping_global_norm": 7549.918825103126,
    "focal_loss_gamma": 2.3153017620955447,
    "position_embeddings": false,
    "edge_embedding_vectors": 0,
    "hidden_dim": 16,
    "num_ggnn_iterations": 11
  },
  {
    "model": "GREAT dot-product",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 9.324681796389003,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 0.673242025699286,
    "early_stopping_iterations": 36100,
    "batch_size": 8,
    "learning_rate": 0.00038607742227550544,
    "gradient_clipping_global_norm": 1.3427736710377893,
    "focal_loss_gamma": 3.850710668791298,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 256,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 11
  },
  {
    "model": "GREAT dot-product",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 15.415242803055822,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 11.808075987140759,
    "early_stopping_iterations": 383000,
    "batch_size": 32,
    "learning_rate": 0.0001618485895154401,
    "gradient_clipping_global_norm": 143.48898703358634,
    "focal_loss_gamma": 4.755773455300122,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 256,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 10
  },
  {
    "model": "GREAT dot-product",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.00003616666735,
    "total_iterations": 383965,
    "early_stopping_runtime_hours": 19.13927330416455,
    "early_stopping_iterations": 306200,
    "batch_size": 128,
    "learning_rate": 0.001407227700934535,
    "gradient_clipping_global_norm": 6254.111259415992,
    "focal_loss_gamma": 1.2112037409404715,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 128,
    "self_attention_num_heads": 16,
    "num_transformer_blocks": 8
  },
  {
    "model": "GREAT dot-product",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 24.000077421111055,
    "total_iterations": 162622,
    "early_stopping_runtime_hours": 19.539854697120337,
    "early_stopping_iterations": 132400,
    "batch_size": 32,
    "learning_rate": 0.0007823667372818319,
    "gradient_clipping_global_norm": 3450.8760010008264,
    "focal_loss_gamma": 1.501318142627962,
    "position_embeddings": true,
    "edge_embedding_vectors": true,
    "hidden_dim": 128,
    "self_attention_num_heads": 16,
    "num_transformer_blocks": 12
  },
  {
    "model": "GREAT dot-product",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 13.385535668611436,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 10.531739464063477,
    "early_stopping_iterations": 393400,
    "batch_size": 32,
    "learning_rate": 0.0001618485895154401,
    "gradient_clipping_global_norm": 143.48898703358634,
    "focal_loss_gamma": 4.755773455300122,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 256,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 10
  },
  {
    "model": "GREAT dot-product",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 12.753805890833695,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 11.687587718359998,
    "early_stopping_iterations": 458200,
    "batch_size": 32,
    "learning_rate": 0.00019982792226978148,
    "gradient_clipping_global_norm": 4101.626386852094,
    "focal_loss_gamma": 4.7185585104533345,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 512,
    "self_attention_num_heads": 4,
    "num_transformer_blocks": 10
  },
  {
    "model": "GREAT nodewise",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 24.01161154694472,
    "total_iterations": 43902,
    "early_stopping_runtime_hours": 11.376281722391923,
    "early_stopping_iterations": 20800,
    "batch_size": 8,
    "learning_rate": 0.0014350366245941821,
    "gradient_clipping_global_norm": 4.866965923645439,
    "focal_loss_gamma": 4.399864421796455,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 64,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 12
  },
  {
    "model": "GREAT nodewise",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 24.000015794999587,
    "total_iterations": 112456,
    "early_stopping_runtime_hours": 23.326471921404412,
    "early_stopping_iterations": 109300,
    "batch_size": 8,
    "learning_rate": 0.00039429308644436717,
    "gradient_clipping_global_norm": 307.32251074165004,
    "focal_loss_gamma": 3.4648118372703025,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 12
  },
  {
    "model": "GREAT nodewise",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.004897312499804,
    "total_iterations": 43802,
    "early_stopping_runtime_hours": 6.850399899690597,
    "early_stopping_iterations": 12500,
    "batch_size": 8,
    "learning_rate": 0.0014350366245941821,
    "gradient_clipping_global_norm": 4.866965923645439,
    "focal_loss_gamma": 4.399864421796455,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 64,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 12
  },
  {
    "model": "GREAT nodewise",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 24.00001596138965,
    "total_iterations": 111618,
    "early_stopping_runtime_hours": 22.36199949815015,
    "early_stopping_iterations": 104000,
    "batch_size": 8,
    "learning_rate": 0.00039429308644436717,
    "gradient_clipping_global_norm": 307.32251074165004,
    "focal_loss_gamma": 3.4648118372703025,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 12
  },
  {
    "model": "GREAT nodewise",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 24.00001211944483,
    "total_iterations": 227885,
    "early_stopping_runtime_hours": 0.20010102914603936,
    "early_stopping_iterations": 1900,
    "batch_size": 8,
    "learning_rate": 0.0033002960515334033,
    "gradient_clipping_global_norm": 832.410414675304,
    "focal_loss_gamma": 3.775433518905526,
    "position_embeddings": false,
    "edge_embedding_vectors": false,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 7
  },
  {
    "model": "GREAT nodewise",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 24.000049997499858,
    "total_iterations": 111667,
    "early_stopping_runtime_hours": 22.56714382707053,
    "early_stopping_iterations": 105000,
    "batch_size": 8,
    "learning_rate": 0.00039429308644436717,
    "gradient_clipping_global_norm": 307.32251074165004,
    "focal_loss_gamma": 3.4648118372703025,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 12
  },
  {
    "model": "NRI encoder",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 24.00000652638885,
    "total_iterations": 176021,
    "early_stopping_runtime_hours": 11.33046933231213,
    "early_stopping_iterations": 83100,
    "batch_size": 8,
    "learning_rate": 0.00034813505814098455,
    "gradient_clipping_global_norm": 1.0237963219441952,
    "focal_loss_gamma": 2.7903563351095024,
    "position_embeddings": true,
    "edge_embedding_vectors": true,
    "non_adjacent_nodes_communicate": 0,
    "node_mlp_hidden_dim": 128,
    "intermediate_pairwise_mlp_hidden_dim": 64,
    "edge_embedding_dim": 32,
    "mlp_depth": 4,
    "num_nri_blocks": 6
  },
  {
    "model": "NRI encoder",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 24.000049860556366,
    "total_iterations": 226302,
    "early_stopping_runtime_hours": 23.734704769699402,
    "early_stopping_iterations": 223800,
    "batch_size": 8,
    "learning_rate": 8.678758081665105e-05,
    "gradient_clipping_global_norm": 3.152803100101206,
    "focal_loss_gamma": 2.618027872195009,
    "position_embeddings": true,
    "edge_embedding_vectors": true,
    "non_adjacent_nodes_communicate": 1,
    "node_mlp_hidden_dim": 512,
    "intermediate_pairwise_mlp_hidden_dim": 32,
    "edge_embedding_dim": 16,
    "mlp_depth": 4,
    "num_nri_blocks": 12
  },
  {
    "model": "NRI encoder",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.000042543888576,
    "total_iterations": 173052,
    "early_stopping_runtime_hours": 4.035788306561944,
    "early_stopping_iterations": 29100,
    "batch_size": 8,
    "learning_rate": 0.00034813505814098455,
    "gradient_clipping_global_norm": 1.0237963219441952,
    "focal_loss_gamma": 2.7903563351095024,
    "position_embeddings": true,
    "edge_embedding_vectors": true,
    "non_adjacent_nodes_communicate": 0,
    "node_mlp_hidden_dim": 128,
    "intermediate_pairwise_mlp_hidden_dim": 64,
    "edge_embedding_dim": 32,
    "mlp_depth": 4,
    "num_nri_blocks": 6
  },
  {
    "model": "NRI encoder",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 22.150049171388588,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 15.735394931354453,
    "early_stopping_iterations": 355200,
    "batch_size": 32,
    "learning_rate": 0.0007474176403056443,
    "gradient_clipping_global_norm": 1038.7682755461644,
    "focal_loss_gamma": 1.3734775125616188,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "non_adjacent_nodes_communicate": 0,
    "node_mlp_hidden_dim": 128,
    "intermediate_pairwise_mlp_hidden_dim": 16,
    "edge_embedding_dim": 64,
    "mlp_depth": 1,
    "num_nri_blocks": 10
  },
  {
    "model": "NRI encoder",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 24.00000931416625,
    "total_iterations": 98955,
    "early_stopping_runtime_hours": 0.5335760748943029,
    "early_stopping_iterations": 2200,
    "batch_size": 32,
    "learning_rate": 0.0005244208199380201,
    "gradient_clipping_global_norm": 644.1675604470992,
    "focal_loss_gamma": 4.072031333093574,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "non_adjacent_nodes_communicate": 0,
    "node_mlp_hidden_dim": 256,
    "intermediate_pairwise_mlp_hidden_dim": 32,
    "edge_embedding_dim": 32,
    "mlp_depth": 1,
    "num_nri_blocks": 11
  },
  {
    "model": "NRI encoder",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 24.000003346944347,
    "total_iterations": 224433,
    "early_stopping_runtime_hours": 23.975087221508968,
    "early_stopping_iterations": 224200,
    "batch_size": 8,
    "learning_rate": 8.678758081665105e-05,
    "gradient_clipping_global_norm": 3.152803100101206,
    "focal_loss_gamma": 2.618027872195009,
    "position_embeddings": true,
    "edge_embedding_vectors": true,
    "non_adjacent_nodes_communicate": 1,
    "node_mlp_hidden_dim": 512,
    "intermediate_pairwise_mlp_hidden_dim": 32,
    "edge_embedding_dim": 16,
    "mlp_depth": 4,
    "num_nri_blocks": 12
  },
  {
    "model": "RAT dot-product",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 9.329037731388969,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 9.019313678706855,
    "early_stopping_iterations": 483400,
    "batch_size": 8,
    "learning_rate": 0.00038607742227550544,
    "gradient_clipping_global_norm": 1.3427736710377893,
    "focal_loss_gamma": 3.850710668791298,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 256,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 11
  },
  {
    "model": "RAT dot-product",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 13.122546433055236,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 11.73155651115138,
    "early_stopping_iterations": 447000,
    "batch_size": 32,
    "learning_rate": 0.00019982792226978148,
    "gradient_clipping_global_norm": 4101.626386852094,
    "focal_loss_gamma": 4.7185585104533345,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 512,
    "self_attention_num_heads": 4,
    "num_transformer_blocks": 10
  },
  {
    "model": "RAT dot-product",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.000004796111217,
    "total_iterations": 393649,
    "early_stopping_runtime_hours": 15.998011575031521,
    "early_stopping_iterations": 262400,
    "batch_size": 128,
    "learning_rate": 0.001407227700934535,
    "gradient_clipping_global_norm": 6254.111259415992,
    "focal_loss_gamma": 1.2112037409404715,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 128,
    "self_attention_num_heads": 16,
    "num_transformer_blocks": 8
  },
  {
    "model": "RAT dot-product",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 24.00002736611144,
    "total_iterations": 359666,
    "early_stopping_runtime_hours": 5.591861041299813,
    "early_stopping_iterations": 83800,
    "batch_size": 128,
    "learning_rate": 0.001407227700934535,
    "gradient_clipping_global_norm": 6254.111259415992,
    "focal_loss_gamma": 1.2112037409404715,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 128,
    "self_attention_num_heads": 16,
    "num_transformer_blocks": 8
  },
  {
    "model": "RAT dot-product",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 12.713485717499978,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 9.024032162281486,
    "early_stopping_iterations": 354900,
    "batch_size": 32,
    "learning_rate": 0.0001618485895154401,
    "gradient_clipping_global_norm": 143.48898703358634,
    "focal_loss_gamma": 4.755773455300122,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 256,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 10
  },
  {
    "model": "RAT dot-product",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 12.94303141166652,
    "total_iterations": 500000,
    "early_stopping_runtime_hours": 9.199906727412563,
    "early_stopping_iterations": 355400,
    "batch_size": 32,
    "learning_rate": 0.00047229549282045666,
    "gradient_clipping_global_norm": 2650.732349211069,
    "focal_loss_gamma": 1.3271823499083468,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 128,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 11
  },
  {
    "model": "RAT nodewise",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 24.00006503166703,
    "total_iterations": 110611,
    "early_stopping_runtime_hours": 23.86749196267436,
    "early_stopping_iterations": 110000,
    "batch_size": 8,
    "learning_rate": 0.00039429308644436717,
    "gradient_clipping_global_norm": 307.32251074165004,
    "focal_loss_gamma": 3.4648118372703025,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 12
  },
  {
    "model": "RAT nodewise",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 24.002729125277686,
    "total_iterations": 111502,
    "early_stopping_runtime_hours": 22.538481277614515,
    "early_stopping_iterations": 104700,
    "batch_size": 8,
    "learning_rate": 0.00039429308644436717,
    "gradient_clipping_global_norm": 307.32251074165004,
    "focal_loss_gamma": 3.4648118372703025,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 12
  },
  {
    "model": "RAT nodewise",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.000304463611076,
    "total_iterations": 44013,
    "early_stopping_runtime_hours": 10.687886930833551,
    "early_stopping_iterations": 19600,
    "batch_size": 8,
    "learning_rate": 0.0014350366245941821,
    "gradient_clipping_global_norm": 4.866965923645439,
    "focal_loss_gamma": 4.399864421796455,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 64,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 12
  },
  {
    "model": "RAT nodewise",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 24.000018657777428,
    "total_iterations": 111662,
    "early_stopping_runtime_hours": 12.380228499292327,
    "early_stopping_iterations": 57600,
    "batch_size": 8,
    "learning_rate": 0.00039429308644436717,
    "gradient_clipping_global_norm": 307.32251074165004,
    "focal_loss_gamma": 3.4648118372703025,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 12
  },
  {
    "model": "RAT nodewise",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 24.00236034888893,
    "total_iterations": 112002,
    "early_stopping_runtime_hours": 0.7714906631667305,
    "early_stopping_iterations": 3600,
    "batch_size": 8,
    "learning_rate": 0.00039429308644436717,
    "gradient_clipping_global_norm": 307.32251074165004,
    "focal_loss_gamma": 3.4648118372703025,
    "position_embeddings": false,
    "edge_embedding_vectors": true,
    "hidden_dim": 64,
    "self_attention_num_heads": 2,
    "num_transformer_blocks": 12
  },
  {
    "model": "RAT nodewise",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 24.000376787222102,
    "total_iterations": 43863,
    "early_stopping_runtime_hours": 18.65838744372874,
    "early_stopping_iterations": 34100,
    "batch_size": 8,
    "learning_rate": 0.0014350366245941821,
    "gradient_clipping_global_norm": 4.866965923645439,
    "focal_loss_gamma": 4.399864421796455,
    "position_embeddings": true,
    "edge_embedding_vectors": false,
    "hidden_dim": 64,
    "self_attention_num_heads": 8,
    "num_transformer_blocks": 12
  },
  {
    "model": "RL ablation",
    "task": "Last read",
    "example_count": 100000,
    "total_runtime_hours": 24.00017837916657,
    "total_iterations": 94196,
    "early_stopping_runtime_hours": 18.217469469090087,
    "early_stopping_iterations": 71500,
    "batch_size": 32,
    "learning_rate": 0.003079090145456217,
    "gradient_clipping_global_norm": 3306.5959488045933,
    "focal_loss_gamma": 0.9988745003980543,
    "num_noninitial_gfsa_states": 3,
    "epsilon_backtrack_is_stop": 0.01583127600080948
  },
  {
    "model": "RL ablation",
    "task": "Last write",
    "example_count": 100000,
    "total_runtime_hours": 24.000479268888803,
    "total_iterations": 46533,
    "early_stopping_runtime_hours": 23.98345874977606,
    "early_stopping_iterations": 46500,
    "batch_size": 32,
    "learning_rate": 0.000979117859628248,
    "gradient_clipping_global_norm": 6771.935382440451,
    "focal_loss_gamma": 2.0170464782134245,
    "num_noninitial_gfsa_states": 7,
    "epsilon_backtrack_is_stop": 0.01668339675626595
  },
  {
    "model": "RL ablation",
    "task": "Next control flow",
    "example_count": 100000,
    "total_runtime_hours": 24.000043529723037,
    "total_iterations": 335559,
    "early_stopping_runtime_hours": 6.458488464723016,
    "early_stopping_iterations": 90300,
    "batch_size": 8,
    "learning_rate": 0.0018924367847043698,
    "gradient_clipping_global_norm": 12.421475431183254,
    "focal_loss_gamma": 0.24583429046194816,
    "num_noninitial_gfsa_states": 1,
    "epsilon_backtrack_is_stop": 0.01848010322649033
  },
  {
    "model": "RL ablation",
    "task": "Next control flow",
    "example_count": 100,
    "total_runtime_hours": 24.000010766944506,
    "total_iterations": 122457,
    "early_stopping_runtime_hours": 19.99069957804241,
    "early_stopping_iterations": 102000,
    "batch_size": 32,
    "learning_rate": 0.004259328715678222,
    "gradient_clipping_global_norm": 1.6906269217163914,
    "focal_loss_gamma": 0.7753071424756763,
    "num_noninitial_gfsa_states": 3,
    "epsilon_backtrack_is_stop": 0.01938006259996452
  },
  {
    "model": "RL ablation",
    "task": "Last read",
    "example_count": 100,
    "total_runtime_hours": 24.000380687222265,
    "total_iterations": 45772,
    "early_stopping_runtime_hours": 18.876468250021883,
    "early_stopping_iterations": 36000,
    "batch_size": 32,
    "learning_rate": 0.001361140700599731,
    "gradient_clipping_global_norm": 175.34959559230438,
    "focal_loss_gamma": 4.557898024899243,
    "num_noninitial_gfsa_states": 7,
    "epsilon_backtrack_is_stop": 0.001418816234818704
  },
  {
    "model": "RL ablation",
    "task": "Last write",
    "example_count": 100,
    "total_runtime_hours": 24.000039262221758,
    "total_iterations": 150947,
    "early_stopping_runtime_hours": 23.944867489188898,
    "early_stopping_iterations": 150600,
    "batch_size": 8,
    "learning_rate": 0.008255888448858419,
    "gradient_clipping_global_norm": 3775.4751129885026,
    "focal_loss_gamma": 1.2016433696714341,
    "num_noninitial_gfsa_states": 7,
    "epsilon_backtrack_is_stop": 0.08188191139973396
  }
]