Вывод из текста самого длинного и короткого слова

275

15 декабря 2016, 16:16

Вот такой вариант. Из файла берем какой-то текст и выводим мин. и макс. количество символов между пробелами, как сделать аналог? (Только с использованием строк, а не записывать все в переменные.)

#include <iostream>
#include <fstream>
#include <cstdio>
#include <string>
#include <clocale>
using namespace std;
int main(int argc, char*argv[])
{
setlocale(LC_ALL,"rus");
char buff[500];    //буфер хранения считываемого текста из файла
ifstream text("text.txt");  //открываем файл
if (!text.is_open()) // если файл не открыт
{
    cerr<<"Файл не найден."<<endl;
    return 0;
}
else
{
    text.getline(buff,500); 
    cout<<buff<<endl;
    cout<<"Кол-во символов: "<<strlen(buff)<<endl;
    int countw=0, max=0, min=0, temp=0, maxw=0, minw=0;
    while(buff[temp++]!=' ')  //проверка для мин
    {
        min++;
        max++;
    }
    for(int i=0; i<strlen(buff); i++)
    {
        if (buff[i]!=' ' && buff[i]!=',' && buff[i]!='.')
        {
            countw++;                   
        }
        else
        {
            if(buff[i+1]==' ')
                i++;
            cout<<" "<<countw;
            if(countw>max)
            {   
                max=countw;     //кол-во символов
                maxw=i;        //индекс последнего символа
            }
            else if(countw<min)
            {
                min=countw;  //кол-во 
                minw=i;     //индекс последнего символа 
            }
            countw=0;                       
        }               
    }
    cout<<endl; 
          //        cout<<"max :"<<max<<endl;
          //        cout<<"min :"<<min<<endl;
    string s =buff;
    cout<<"Самое длинное слово: "<<s.substr(maxw-max,max)<<endl;
    cout<<"Самое короткое слово: "<<s.substr(minw-min,min)<<endl;
    text.close();
}
}

Answer 1

Чтобы напечатать самое короткое и самое длинное слово во входном тексте можно использовать istream_iterator, чтобы разбить текст на слова, разделённые пробелом, и minmax_element со специальным сравнителем size_less, чтобы найти кратчайшее и длиннейшее слова:

/** Print the shortest and the longest whitespace-separated words in the input.
    To try:
      $ g++ -std=c++11 print-minmax-words.cpp && </usr/share/dict/words ./a.out
      A: 1, electroencephalograph's: 23
*/
#include <algorithm> // minmax
#include <iostream>
#include <iterator> // istream_iterator
#include <string>
bool size_less(const std::string& a, const std::string& b) {
    return a.size() < b.size(); // compare word sizes in bytes
}
int main() {
  using namespace std;
  // skipws is set, the currently imbued locale is used to determine
  // whether a charT is whitespace
  //NOTE: Windows, OS X, Linux demonstrate different behavior
  //      - Windows may corrupt even a byte stream (ANSI conversions)
  //      - OS X might limit available locales (relevant for wide streams)
  if (cin.getloc().name() != "C") {
      cerr << "warning: what whitespace is depends on locale: '"
           << cin.getloc().name() << "'\n";
  }
  istream_iterator<string> words(cin), eof;
  auto p = minmax_element(words, eof, size_less);
  if (p.first == eof && p.second == eof) {
    cerr << "there are no words in the input\n";
    return 1;
  }
  else if(!(cout << *p.first << ": " << (*p.first).size() << ", "
                 << *p.second << ": " << (*p.second).size() << endl))
      return 2; // I/O error
  return cin.eof() ? 0 : 2;
}

Ввод

00  01
020304
0506070809 1011 12 13 14 15 16 17 18 19 20 21 22 23 24     25     26 27 28

Вывод

00: 2, 1011 12 13 14 15 16 17 18 19 20 21 22 23 24     25     26 27 28　: 93

Входной файл содержит Юникодные пробелы, пронумерованные двузначными цифрами (программа, которая поддерживает Юникод, вернула бы наибольшую и наименьшую длину слова равную 2, вместо 2 и 93 как пример выше, который работает только со ASCII стандартными пробелами).

Если нужно вывести только длины самого короткого и самого длинного слова (символы, разделённые пробелом), то не обязательно сами слова хранить.

Вот C программа, которая читает один байт за раз из стандартного ввода (stdin), разделяя ввод на слова, если встречен стандартный пробел ' \t\n\r\v\f'. Реализация поддерживает слова длиною до UINTMAX_MAX байт (18446744073709551615 на моей машине -- примерно полвека работы со скоростью 100 гигабит в секунду):

/** Print min, max whitespace-separated word sizes in the input.
    To try:
      $ gcc print-minmax-word-size.c && </usr/share/dict/words ./a.out
*/
#include <inttypes.h> /* PRIuMAX */
#include <stdio.h>
#include <stdlib.h>
int main(void) {
  /* ^(\S|\s)*$ */
  uintmax_t wordlen = 0, minlen = UINTMAX_MAX, maxlen = 0;
  int c;
  while ((c = getchar()) != EOF) {
    switch(c) {
    case ' ':
    case '\f':
    case '\n':
    case '\r':
    case '\t':
    case '\v':
      /* equivalent to isspace(c) in C locale */
      if (wordlen) { /* end of word, update min, max lengths */
        if (minlen > wordlen)
          minlen = wordlen;
        if (maxlen < wordlen)
          maxlen = wordlen;
        wordlen = 0;
      }
      break;
    default: /* not whitespace, inside a word */
      ++wordlen;
    };
  }
  if (wordlen) { /* end of word, update min, max lengths */
    if (minlen > wordlen)
      minlen = wordlen;
    if (maxlen < wordlen)
      maxlen = wordlen;
  }
  /** Report results.
      Exit status is like grep:
        0 -- success
        1 -- no words found
        2 or greater -- an error
  */
  if (minlen == UINTMAX_MAX && maxlen == 0) {
    fputs("there are no words in the input\n", stderr);
    exit(1);
  }
  else if (printf("%" PRIuMAX " %" PRIuMAX "\n", minlen, maxlen) < 0)
    exit(2);
  return feof(stdin) ? 0 : 2;
}

Программа выводит длину слов в байтах. Длина в символах (буквы, которые на экране видны) может отличаться в зависимости от кодировки входного текста и его нормировки (bytes -> Unicode codepoints -> user-perceived characters (grapheme clusters)).