REST has been the default for APIs for over a decade, but it wasn't designed for microservices talking to each other millions of times per second. gRPC (Google Remote Procedure Call) was built exactly for this — high-throughput, low-latency, strongly-typed communication between services. It's used by Google, Netflix, Slack, Square, and most serious microservice architectures.
What is gRPC?
gRPC is an open-source RPC (Remote Procedure Call) framework that uses HTTP/2 for transport and Protocol Buffers (protobuf) for serialization. Instead of sending JSON over HTTP/1.1 like REST, gRPC sends compact binary data over multiplexed HTTP/2 connections.
How gRPC Works
Step 1: Define Your Service (.proto)
Everything in gRPC starts with a .proto file — the contract between client and server:
// user_service.proto
syntax = "proto3";
package users;
// Service definition — like a REST controller
service UserService {
// Unary RPC (request-response, like a normal REST call)
rpc GetUser (GetUserRequest) returns (User);
rpc CreateUser (CreateUserRequest) returns (User);
rpc ListUsers (ListUsersRequest) returns (ListUsersResponse);
// Server streaming (server sends multiple responses)
rpc WatchUsers (WatchRequest) returns (stream UserEvent);
// Client streaming (client sends multiple requests)
rpc UploadUsers (stream CreateUserRequest) returns (UploadSummary);
// Bidirectional streaming (both send multiple messages)
rpc Chat (stream ChatMessage) returns (stream ChatMessage);
}
// Message definitions — like JSON schemas, but typed and compact
message User {
string id = 1;
string name = 2;
string email = 3;
int32 age = 4;
Department department = 5;
repeated string roles = 6; // Array of strings
google.protobuf.Timestamp created_at = 7;
}
message GetUserRequest {
string id = 1;
}
message CreateUserRequest {
string name = 1;
string email = 2;
int32 age = 3;
Department department = 4;
}
message ListUsersRequest {
int32 page_size = 1;
string page_token = 2;
string filter = 3; // e.g., "department=ENGINEERING"
}
message ListUsersResponse {
repeated User users = 1;
string next_page_token = 2;
int32 total_count = 3;
}
enum Department {
UNKNOWN = 0;
ENGINEERING = 1;
MARKETING = 2;
SALES = 3;
PRODUCT = 4;
}
message UserEvent {
string event_type = 1; // "created", "updated", "deleted"
User user = 2;
}
message WatchRequest {
repeated string departments = 1;
}
message UploadSummary {
int32 created = 1;
int32 failed = 2;
}
message ChatMessage {
string sender = 1;
string content = 2;
}
Step 2: Generate Code
# Install protobuf compiler and gRPC plugins
# Python
pip install grpcio grpcio-tools
python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. user_service.proto
# Generates: user_service_pb2.py (messages) + user_service_pb2_grpc.py (service stubs)
# Go
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
protoc --go_out=. --go-grpc_out=. user_service.proto
# Node.js / TypeScript
npm install @grpc/grpc-js @grpc/proto-loader
# Or use static code generation:
npm install grpc_tools_node_protoc_ts
grpc_tools_node_protoc --js_out=. --grpc_out=. --ts_out=. user_service.proto
Step 3: Implement the Server (Python)
import grpc
from concurrent import futures
import user_service_pb2 as pb2
import user_service_pb2_grpc as pb2_grpc
# In-memory database
users_db = {}
class UserServicer(pb2_grpc.UserServiceServicer):
def GetUser(self, request, context):
user = users_db.get(request.id)
if not user:
context.set_code(grpc.StatusCode.NOT_FOUND)
context.set_details(f"User {request.id} not found")
return pb2.User()
return user
def CreateUser(self, request, context):
import uuid
user_id = str(uuid.uuid4())
user = pb2.User(
id=user_id,
name=request.name,
email=request.email,
age=request.age,
department=request.department,
)
users_db[user_id] = user
return user
def ListUsers(self, request, context):
all_users = list(users_db.values())
return pb2.ListUsersResponse(
users=all_users,
total_count=len(all_users),
)
# Server streaming — push events to the client
def WatchUsers(self, request, context):
import time
while context.is_active():
# In production, listen to a message queue
time.sleep(1)
yield pb2.UserEvent(
event_type="heartbeat",
user=pb2.User(name="system"),
)
def serve():
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
pb2_grpc.add_UserServiceServicer_to_server(UserServicer(), server)
server.add_insecure_port("[::]:50051")
server.start()
print("gRPC server running on port 50051")
server.wait_for_termination()
if __name__ == "__main__":
serve()
Step 4: Use the Client
import grpc
import user_service_pb2 as pb2
import user_service_pb2_grpc as pb2_grpc
# Create a channel and stub (auto-generated client)
channel = grpc.insecure_channel("localhost:50051")
stub = pb2_grpc.UserServiceStub(channel)
# Create a user — feels like calling a local function!
user = stub.CreateUser(pb2.CreateUserRequest(
name="Alice",
email="alice@example.com",
age=30,
department=pb2.ENGINEERING,
))
print(f"Created user: {user.id} - {user.name}")
# Get a user
user = stub.GetUser(pb2.GetUserRequest(id=user.id))
print(f"Got user: {user.name}, {user.email}")
# List all users
response = stub.ListUsers(pb2.ListUsersRequest(page_size=10))
for u in response.users:
print(f" - {u.name} ({u.email})")
# Server streaming — watch for events
for event in stub.WatchUsers(pb2.WatchRequest(departments=["ENGINEERING"])):
print(f"Event: {event.event_type} - {event.user.name}")
break # Stop after first event for demo
The 4 Types of gRPC Communication
| Pattern | Description | Use Case |
|---|---|---|
| Unary | Client sends 1 request, server returns 1 response | CRUD operations, auth |
| Server streaming | Client sends 1 request, server streams N responses | Real-time feeds, logs, events |
| Client streaming | Client streams N requests, server returns 1 response | File upload, batch inserts |
| Bidirectional | Both sides stream simultaneously | Chat, multiplayer, live collab |
Performance: Why gRPC is Faster
- Binary serialization: Protobuf is 3-10x smaller than JSON and 5-20x faster to serialize/deserialize.
- HTTP/2 multiplexing: Multiple RPCs share a single TCP connection. No head-of-line blocking.
- Header compression (HPACK): HTTP/2 compresses headers, reducing overhead for frequent calls.
- Streaming: Long-lived connections for real-time data — no polling, no WebSocket hacks.
- Code generation: Generated stubs are optimized for each language — no reflection, no runtime parsing.
Interceptors (Middleware for gRPC)
# gRPC interceptors work like Express middleware or Django middleware
class AuthInterceptor(grpc.ServerInterceptor):
def intercept_service(self, continuation, handler_call_details):
# Extract metadata (like HTTP headers)
metadata = dict(handler_call_details.invocation_metadata)
token = metadata.get("authorization", "")
if not token.startswith("Bearer "):
return grpc.unary_unary_rpc_method_handler(
lambda req, ctx: self._unauthenticated(ctx)
)
# Validate token...
return continuation(handler_call_details)
def _unauthenticated(self, context):
context.abort(grpc.StatusCode.UNAUTHENTICATED, "Invalid token")
class LoggingInterceptor(grpc.ServerInterceptor):
def intercept_service(self, continuation, handler_call_details):
method = handler_call_details.method
print(f"gRPC call: {method}")
return continuation(handler_call_details)
# Add interceptors to the server
server = grpc.server(
futures.ThreadPoolExecutor(max_workers=10),
interceptors=[AuthInterceptor(), LoggingInterceptor()],
)
When to Use gRPC vs REST
gRPC isn't a replacement for REST — it's a complement. Use REST for public APIs where simplicity and browser compatibility matter. Use gRPC for internal service-to-service communication where performance, type safety, and streaming are critical. Many companies (including Google) use both: REST at the edge, gRPC between services.