# Activations @ <https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity>

torch.nn.ReLU
torch.nn.ELU 
torch.nn.PReLU 
torch.nn.LeakyReLU
torch.nn.Sigmoid  
torch.nn.Tanh 
torch.nn.Softmax

# RReLu, SELU, CELU, GELU, ReLU6, 
# Threshold, Hardshrink, HardTanh,LogSigmoid, Softplus, SoftShrink,
# Softsign, TanhShrink, Softmin, Softmax2d, LogSoftmax, AdaptiveSoftmaxWithLoss

torch.nn.modules.activation.Softmax

# Optimizers @ <https://pytorch.org/docs/stable/optim.html>

torch.optim.SGD
torch.optim.Adam
torch.optim.RAdam # Rectified-Adam (tf-addons)
torch.optim.AdamW
torch.optim.Adagrad
torch.optim.Adadelta
torch.optim.RMSprop

# SparseAdam, Adamax, ASGD, LBFGS, Rprop

torch.optim.rmsprop.RMSprop

torch.manual_seed(42) # "Manually" set the "seed" for [ nn.Parameter ]

model = LinearRegressionModel() # 모델 초기화 

loss_fn = nn.MSELoss() # Mean squared error

optimizer = torch.optim.Adam(params=model.parameters(), # "parameters" to optimize (apply gradient descent)
                             lr=0.01) # "l"earning "r"ate 
train_losses = []
test_losses = []

for epoch in range(100): # epochs == 100

    # 모델을 training mode로 설정 (default state)
    model.train() 
    
    y_predicted = model(X_train) # 1. (x 데이터를 모델에 넣고) 순방향 계산 진행 (forward pass)
    
    train_loss = loss_fn(y_predicted, y_train) # 2. Training cost 계산 (Cost function 계산)
    
    optimizer.zero_grad() # 3. Optimizer 내부의 이전 gradient 값 초기화 (Make "grad" to "zero")
    
    train_loss.backward() # 4. Back-propagation ("Backward" propagation)

    optimizer.step() # 5. Gradient descent 진행 (Take a "step" to update parameters)

    
    # 모델을 evaluation mode로 설정
    model.eval()

    with torch.inference_mode(): # Set "inference mode"
        
        y_predicted = model(X_test) # (x 데이터를 모델에 넣고) 순방향 계산 진행 (forward pass)
        
        test_loss = loss_fn(y_predicted, y_test.type(torch.float)) # Test cost 계산 (y_test 역시도 동일한 type으로 type-casting)

        if epoch % 10 == 0:
            train_losses.append(train_loss.detach().numpy()) # detach() : 기존 Tensor에서 gradient 전파가 안되는 Tensor 생성 (backward 적용 X, <https://bit.ly/3g9dTNt>)
            test_losses.append(test_loss.detach().numpy())
            print(f"Epoch: {epoch} | Train MSE : {train_loss} | Test MSE : {test_loss}")

Epoch: 0 | Train MSE : 0.13457220792770386 | Test MSE : 0.11861804127693176 Epoch: 10 | Train MSE : 0.047810107469558716 | Test MSE : 0.03970817103981972 Epoch: 20 | Train MSE : 0.008682653307914734 | Test MSE : 0.0063242255710065365 Epoch: 30 | Train MSE : 0.0012723570689558983 | Test MSE : 0.0012832541251555085 Epoch: 40 | Train MSE : 0.002298719482496381 | Test MSE : 0.0024113906547427177 Epoch: 50 | Train MSE : 0.0017212443053722382 | Test MSE : 0.0016611091559752822 Epoch: 60 | Train MSE : 0.000906708708498627 | Test MSE : 0.0008533037616871297 Epoch: 70 | Train MSE : 0.0007432327256537974 | Test MSE : 0.0006821436109021306 Epoch: 80 | Train MSE : 0.000636931334156543 | Test MSE : 0.0005742007051594555 Epoch: 90 | Train MSE : 0.0004997773794457316 | Test MSE : 0.0004610447504092008

x_values = np.arange(len(train_losses)) # array of range

plt.figure(figsize=(10, 7), dpi=100)

plt.plot(x_values, train_losses, label="Train loss")
plt.plot(x_values, test_losses, label="Test loss")

plt.title("Train loss vs Test loss")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend()

plt.show()

image.png

# 1. 모델을 evaluation mode로 설정
model.eval()

# 2. inference mode 세팅
with torch.inference_mode():
    
    # 3. 모델과 데이터는 동일한 디바이스 위에 있어야 함 (GPU or CPU)
    y_predicted = model(X_test) # (x 데이터를 모델에 넣고) 순방향 계산 진행 (forward pass)
    
    # # GPU 활용 시
    # model.to(device)
    # X_test = X_test.to(device)
    # y_predicted = model(X_test)
y_predicted

tensor([[0.5025], [0.8317], [0.7177], [0.9076], [0.5532], [0.9456], [0.6671], [0.6544], [0.7431], [0.5785], [0.4899], [0.3886], [0.8064], [0.4392], [0.3759]])