diff --git a/include/fmt/format-inl.h b/include/fmt/format-inl.h
index 98aaee26..cc102680 100644
--- a/include/fmt/format-inl.h
+++ b/include/fmt/format-inl.h
@@ -18,14 +18,11 @@
 #include <cmath>
 #include <cstdarg>
 #include <cstddef>  // for std::ptrdiff_t
+#include <cstring>  // for std::memmove
 #if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
 # include <locale>
 #endif
 
-#if defined(_WIN32) && defined(__MINGW32__)
-# include <cstring>
-#endif
-
 #if FMT_USE_WINDOWS_H
 # if !defined(FMT_HEADER_ONLY) && !defined(WIN32_LEAN_AND_MEAN)
 #  define WIN32_LEAN_AND_MEAN
@@ -367,20 +364,47 @@ FMT_FUNC fp get_cached_power(int min_exponent, int &pow10_exponent) {
   return fp(data::POW10_SIGNIFICANDS[index], data::POW10_EXPONENTS[index]);
 }
 
+// Writes the exponent exp in the form "[-]d{1,3}" to buffer.
+FMT_FUNC char *write_exponent(char *buffer, int exp) {
+  FMT_ASSERT(-1000 < exp && exp < 1000, "exponent out of range");
+  if (exp < 0) {
+    *buffer++ = '-';
+    exp = -exp;
+  }
+  if (exp >= 100) {
+    *buffer++ = '0' + static_cast<char>(exp / 100);
+    exp %= 100;
+    const char *d = data::DIGITS + exp * 2;
+    *buffer++ = d[0];
+    *buffer++ = d[1];
+  } else if (exp >= 10) {
+    const char *d = data::DIGITS + exp * 2;
+    *buffer++ = d[0];
+    *buffer++ = d[1];
+  } else {
+    *buffer++ = '0' + static_cast<char>(exp);
+  }
+  return buffer;
+}
+
 // Generates output using Grisu2 digit-gen algorithm.
 FMT_FUNC void grisu2_gen_digits(
     const fp &scaled_value, const fp &scaled_upper, uint64_t delta,
     char *buffer, size_t &size, int &dec_exp) {
   internal::fp one(1ull << -scaled_upper.e, scaled_upper.e);
-  uint32_t hi = static_cast<uint32_t>(scaled_upper.f >> -one.e);  // p1 in Grisu
-  uint64_t lo = scaled_upper.f & (one.f - 1);                     // p2 in Grisu
+  // hi (p1 in Grisu) contains the most significant digits of scaled_upper.
+  // hi = floor(scaled_upper / one).
+  uint32_t hi = static_cast<uint32_t>(scaled_upper.f >> -one.e);
+  // lo (p2 in Grisu) contains the least significants digits of scaled_upper.
+  // lo = scaled_upper mod 1.
+  uint64_t lo = scaled_upper.f & (one.f - 1);
   size = 0;
-  auto kappa = count_digits(hi); // TODO: more descriptive name
-  while (kappa > 0) {
+  auto exp = count_digits(hi);  // kappa in Grisu.
+  while (exp > 0) {
     uint32_t digit = 0;
     // This optimization by miloyip reduces the number of integer divisions by
     // one per iteration.
-    switch (kappa) {
+    switch (exp) {
     case 10: digit = hi / 1000000000; hi %= 1000000000; break;
     case  9: digit = hi /  100000000; hi %=  100000000; break;
     case  8: digit = hi /   10000000; hi %=   10000000; break;
@@ -396,10 +420,10 @@ FMT_FUNC void grisu2_gen_digits(
     }
     if (digit != 0 || size != 0)
       buffer[size++] = '0' + static_cast<char>(digit);
-    --kappa;
+    --exp;
     uint64_t remainder = (static_cast<uint64_t>(hi) << -one.e) + lo;
     if (remainder <= delta) {
-      dec_exp += kappa;
+      dec_exp += exp;
       // TODO: use scaled_value
       (void)scaled_value;
       return;
@@ -412,31 +436,48 @@ FMT_FUNC void grisu2_gen_digits(
     if (digit != 0 || size != 0)
       buffer[size++] = '0' + digit;
     lo &= one.f - 1;
-    --kappa;
+    --exp;
     if (lo < delta) {
-      dec_exp += kappa;
+      dec_exp += exp;
       return;
     }
   }
 }
 
+// Prettifies the output of the Grisu2 algorithm.
+// The number is given as v = buffer * 10^exp.
+FMT_FUNC void grisu2_prettify(char *buffer, size_t &size, int exp) {
+  // 10^(full_exp - 1) <= v <= 10^full_exp.
+  int full_exp = static_cast<int>(size) + exp;
+  // Insert a decimal point after the first digit and add an exponent.
+  std::memmove(buffer + 2, buffer + 1, size - 1);
+  buffer[1] = '.';
+  char *p = buffer + size + 1;
+  *p++ = 'e';
+  size = to_unsigned(write_exponent(p, full_exp - 1) - buffer);
+}
+
+// Formats value using Grisu2 algorithm. Grisu2 doesn't give any guarantees on
+// the shortness of the result.
 FMT_FUNC void grisu2_format(double value, char *buffer, size_t &size) {
   fp fp_value(value);
-  fp lower, upper;
+  fp lower, upper;  // w^- and w^+ in the Grisu paper.
   fp_value.compute_boundaries(lower, upper);
   // Find a cached power of 10 close to 1 / upper.
-  int dec_exp = 0;  // K in Grisu paper.
-  const int min_exp = -60;
-  auto dec_pow = get_cached_power(
+  int dec_exp = 0;  // K in Grisu.
+  const int min_exp = -60;  // alpha in Grisu.
+  auto dec_pow = get_cached_power(  // \tilde{c}_{-k} in Grisu.
       min_exp - (upper.e + fp::significand_size), dec_exp);
+  dec_exp = -dec_exp;
   fp_value.normalize();
   fp scaled_value = fp_value * dec_pow;
-  fp scaled_lower = lower * dec_pow;
-  fp scaled_upper = upper * dec_pow;
-  ++scaled_lower.f;  // +1 ulp
-  --scaled_upper.f;  // -1 ulp
+  fp scaled_lower = lower * dec_pow;  // \tilde{M}^- in Grisu.
+  fp scaled_upper = upper * dec_pow;  // \tilde{M}^+ in Grisu.
+  ++scaled_lower.f;  // \tilde{M}^- + 1 ulp -> M^-_{\uparrow}.
+  --scaled_upper.f;  // \tilde{M}^+ - 1 ulp -> M^+_{\downarrow}.
   uint64_t delta = scaled_upper.f - scaled_lower.f;
   grisu2_gen_digits(scaled_value, scaled_upper, delta, buffer, size, dec_exp);
+  grisu2_prettify(buffer, size, dec_exp);
 }
 }  // namespace internal
 
diff --git a/include/fmt/format.h b/include/fmt/format.h
index f4c0f6ee..33843994 100644
--- a/include/fmt/format.h
+++ b/include/fmt/format.h
@@ -2949,9 +2949,12 @@ void basic_writer<Range>::write_double(T value, const format_specs &spec) {
   basic_memory_buffer<char_type> buffer;
   if (internal::const_check(FMT_USE_GRISU && sizeof(T) <= sizeof(double) &&
       std::numeric_limits<double>::is_iec559)) {
-    char buf[100]; // TODO: max size
+    // The max size = 10 (hi) + 20 (lo) + 5 (exp).
+    enum { BUF_SIZE = 35 };
+    char buf[BUF_SIZE];
     size_t size = 0;
     internal::grisu2_format(static_cast<double>(value), buf, size);
+    FMT_ASSERT(size <= BUF_SIZE, "buffer overflow");
     buffer.append(buf, buf + size); // TODO: avoid extra copy
   } else {
     format_specs normalized_spec(spec);