Response metadata
Many model providers include some metadata in their chat generation responses. This metadata can be accessed via the AIMessage.response_metadata: Dict attribute. Depending on the model provider and model configuration, this can contain information like token counts, logprobs, and more.
Here's what the response metadata looks like for a few different providers:
OpenAIβ
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")
msg = llm.invoke("What's the oldest known example of cuneiform")
msg.response_metadata
API Reference:ChatOpenAI
{'token_usage': {'completion_tokens': 110,
  'prompt_tokens': 16,
  'total_tokens': 126,
  'completion_tokens_details': {'accepted_prediction_tokens': 0,
   'audio_tokens': 0,
   'reasoning_tokens': 0,
   'rejected_prediction_tokens': 0},
  'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}},
 'model_name': 'gpt-4o-mini-2024-07-18',
 'system_fingerprint': 'fp_b8bc95a0ac',
 'id': 'chatcmpl-BDrISvLar6AqcZngBmhajFZXVc2u9',
 'finish_reason': 'stop',
 'logprobs': None}
Anthropicβ
from langchain_anthropic import ChatAnthropic
llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
msg = llm.invoke("What's the oldest known example of cuneiform")
msg.response_metadata
API Reference:ChatAnthropic
{'id': 'msg_01JHnvPqgERY7MZwrvfkmq52',
 'model': 'claude-3-5-sonnet-20241022',
 'stop_reason': 'end_turn',
 'stop_sequence': None,
 'usage': {'cache_creation_input_tokens': 0,
  'cache_read_input_tokens': 0,
  'input_tokens': 17,
  'output_tokens': 221},
 'model_name': 'claude-3-5-sonnet-20241022'}
Google VertexAIβ
from langchain_google_vertexai import ChatVertexAI
llm = ChatVertexAI(model="gemini-1.5-flash-001")
msg = llm.invoke("What's the oldest known example of cuneiform")
msg.response_metadata
API Reference:ChatVertexAI
{'is_blocked': False,
 'safety_ratings': [{'category': 'HARM_CATEGORY_HATE_SPEECH',
   'probability_label': 'NEGLIGIBLE',
   'probability_score': 0.046142578125,
   'blocked': False,
   'severity': 'HARM_SEVERITY_NEGLIGIBLE',
   'severity_score': 0.07275390625},
  {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT',
   'probability_label': 'NEGLIGIBLE',
   'probability_score': 0.05419921875,
   'blocked': False,
   'severity': 'HARM_SEVERITY_NEGLIGIBLE',
   'severity_score': 0.03955078125},
  {'category': 'HARM_CATEGORY_HARASSMENT',
   'probability_label': 'NEGLIGIBLE',
   'probability_score': 0.083984375,
   'blocked': False,
   'severity': 'HARM_SEVERITY_NEGLIGIBLE',
   'severity_score': 0.029296875},
  {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
   'probability_label': 'NEGLIGIBLE',
   'probability_score': 0.054931640625,
   'blocked': False,
   'severity': 'HARM_SEVERITY_NEGLIGIBLE',
   'severity_score': 0.03466796875}],
 'usage_metadata': {'prompt_token_count': 10,
  'candidates_token_count': 193,
  'total_token_count': 203,
  'prompt_tokens_details': [{'modality': 1, 'token_count': 10}],
  'candidates_tokens_details': [{'modality': 1, 'token_count': 193}],
  'cached_content_token_count': 0,
  'cache_tokens_details': []},
 'finish_reason': 'STOP',
 'avg_logprobs': -0.5702065976790196,
 'model_name': 'gemini-1.5-flash-001'}
Bedrock (Anthropic)β
from langchain_aws import ChatBedrockConverse
llm = ChatBedrockConverse(model="anthropic.claude-3-sonnet-20240229-v1:0")
msg = llm.invoke("What's the oldest known example of cuneiform")
msg.response_metadata
API Reference:ChatBedrockConverse
{'ResponseMetadata': {'RequestId': 'ea0ac2ad-3ad5-4a49-9647-274a0c73ac31',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Sat, 22 Mar 2025 11:27:46 GMT',
   'content-type': 'application/json',
   'content-length': '1660',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'ea0ac2ad-3ad5-4a49-9647-274a0c73ac31'},
  'RetryAttempts': 0},
 'stopReason': 'end_turn',
 'metrics': {'latencyMs': [11044]}}
MistralAIβ
from langchain_mistralai import ChatMistralAI
llm = ChatMistralAI(model="mistral-small-latest")
msg = llm.invoke([("human", "What's the oldest known example of cuneiform")])
msg.response_metadata
API Reference:ChatMistralAI
{'token_usage': {'prompt_tokens': 13,
  'total_tokens': 219,
  'completion_tokens': 206},
 'model_name': 'mistral-small-latest',
 'model': 'mistral-small-latest',
 'finish_reason': 'stop'}
Groqβ
from langchain_groq import ChatGroq
llm = ChatGroq(model="llama-3.1-8b-instant")
msg = llm.invoke("What's the oldest known example of cuneiform")
msg.response_metadata
API Reference:ChatGroq
{'token_usage': {'completion_tokens': 184,
  'prompt_tokens': 45,
  'total_tokens': 229,
  'completion_time': 0.245333333,
  'prompt_time': 0.002262803,
  'queue_time': 0.19315161,
  'total_time': 0.247596136},
 'model_name': 'llama-3.1-8b-instant',
 'system_fingerprint': 'fp_a56f6eea01',
 'finish_reason': 'stop',
 'logprobs': None}
FireworksAIβ
from langchain_fireworks import ChatFireworks
llm = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-70b-instruct")
msg = llm.invoke("What's the oldest known example of cuneiform")
msg.response_metadata
API Reference:ChatFireworks
{'token_usage': {'prompt_tokens': 25,
  'total_tokens': 352,
  'completion_tokens': 327},
 'model_name': 'accounts/fireworks/models/llama-v3p1-70b-instruct',
 'system_fingerprint': '',
 'finish_reason': 'stop',
 'logprobs': None}