Lernsoftware Programmierung / Code
 
StartSeite | LernsoftwareProgrammierung/ | Neues | TestSeite | ForumSeite | Teilnehmer | Kategorien | Index | Hilfe | Einstellungen | Ändern

Quellcodes zur LernsoftwareProgrammierung
Bemerkungen und Dialog   
Lernsoftware-Tools   
TextspezifischesWörterbuch   
Quellcode-Module für die Tools   
Worte aus ASCII-Text filtern (Schritt 1)   
Worte Sortieren & Doppelte und leichte Worte entfernen (Schritte 2,3,4)   
Schritt 5: Übersetzen mittels einer einfachen Wortliste   

Bemerkungen und Dialog    

Lernsoftware-Tools    

Hier sind alle geplanten und fertigen Tools aufgelistet.

TextspezifischesWörterbuch    

Startparameter und Hauptprogramm: Die erste Hälfte ist der Rahmen zur Bearbeitung der Start-Parameter und Dateien. Als Zweites enthält die Funktion TSW den zentralen Ablaufteil des Programms.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "tree.h"
#include "transl.h"

void TSW(void);

char* GetSouceWord(FILE* sourceFile);



/////////////////////////////////////////////////////////////////////////////////////////////

    char* sourceName = NULL;
    char* destinName = NULL;
    char* filterName = NULL;
    char* translName = NULL;
    FILE* sourceFile = stdin;
    FILE* destinFile = stdout;
    FILE* filterFile = NULL;
    FILE* translFile = NULL;

/////////////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[])
{

    // Schritt 0: Startparameter bearbeiten 

    if (argc > 5)
    {   // (Eckige Klammner mag der Wiki hier nicht)
        fprintf(stderr, "Utility to generate a table of translations for an ASCII text\n");
        fprintf(stderr, "Usage    : %s (<sourceName> (<destinationName> (<filterName> (<translateName>)))))\n", argv[0]);
        fprintf(stderr, "Parameter: <sourceName>     : file with the ASCII-text\n");
        fprintf(stderr, "                              If no filename is given, 'stdin'\n");
        fprintf(stderr, "                              will be read\n");
        fprintf(stderr, "           <destinationName>: Name of the table of translations.\n");
        fprintf(stderr, "                              Is no name given, the table will\n");
        fprintf(stderr, "                              written to 'stdout'\n");
        fprintf(stderr, "           <filterName>:      Name of a list of words not to be translated.\n");
        fprintf(stderr, "                              Is no name given, all words are translated\n");
        fprintf(stderr, "           <translateName>:   Name of a list of translations (e d d d).\n");
        fprintf(stderr, "                              Is no name given, nothing is translated\n");
		exit(1);
    }


    if (argc != 1) sourceName = argv[1];
    if (argc >  2) destinName = argv[2];
    if (argc >  3) filterName = argv[3];
    if (argc >  4) translName = argv[4];

    if (sourceName != NULL) sourceFile = fopen(sourceName, "r");
    if (destinName != NULL) destinFile = fopen(destinName, "w");
    if (filterName != NULL) filterFile = fopen(filterName, "r");
    if (translName != NULL) translFile = fopen(translName, "r");

    if (sourceFile ==    0)
    {
	    fprintf(stderr, "%s: Can't open %s to read => error %d\n", argv[0], sourceName, errno);
		exit(1);
    }

	if (destinFile == 0)
    {
        fprintf(stderr, "%s: Can't open %s to write => error %d\n", argv[0], destinName, errno);
		exit(1);
    }

	if ( (filterName != NULL) && (filterFile == 0) )
    {
        fprintf(stderr, "%s: Can't open %s to filter => error %d\n", argv[0], filterName, errno);
		exit(1);
    }

	if ( (translName != NULL) && (translFile == 0) )
    {
        fprintf(stderr, "%s: Can't open %s to translate => error %d\n", argv[0], translName, errno);
		exit(1);
    }


    TSW(); /////// main funtion


	if (sourceName != NULL) fclose(sourceFile);
	if (destinName != NULL) fclose(destinFile);
	if (filterName != NULL) fclose(filterFile);
	if (translName != NULL) fclose(translFile);

    exit (0);
}


/////////////////////////////////////////////////////////////////////////////////////////////


void TSW(void)
{
	char* Word;
	long  Count, Left;

	//---------- IN ------------------
	// Schritt 1: Worte aus Quelltext herausfiltern
	// Schritt 2: Wortliste alphabetisch sortieren 
	// Schritt 3: Doppelte Worte entfernen 

	Count = 0;
	do
	{
		Word = GetSouceWord(sourceFile);
		if(Word) { PutTreeWord(Word); Count++; }
	}
	while ( Word );
	Left = Elements(),
	printf("%i of %i Words were different.\n" ,Left ,Count );

	//---------- FILTER --------------
	// Schritt 4: Die "leichten" Worte entfernen 

	Count = 0;
	do
	{
		if(filterFile) Word = GetSouceWord(filterFile);
		if(Word) { DelTreeWord(Word); Count++; }
	}
	while ( Word );
	printf("%i left after %i of %i knowen Words were found.\n" ,Elements(), Left-Elements() ,Count );
	Left = Elements(),


	//---------- OUT -----------------
	// Schritt 5: Übersetzung zu den "schwierigen" Worten suchen 
	// Schritt 6: Wortpaare im Druckformat ausgeben

	InitTranslate(translFile);
	
	do
	{
		Word = GetTreeWord();
		if(Word)
		{
			fputs(Word,            destinFile);
			fputs(" = ",           destinFile);
			fputs(Translate(Word), destinFile);
		}
	}
	while ( Word );

	printf("%i Words were translated, %i not.\n" ,Translated(), Left-Translated() );
}

Quellcode-Module für die Tools    

Hier sind alle geplanten und fertigen Module aufgelistet.

Worte aus ASCII-Text filtern (Schritt 1)    

Der Orginal-Code ist ein Upload RalfEbert/spltwords.c aus http://www.wikiservice.at/fdw/wiki.cgi?UtilityPool, umgestellt auf zyklischen Aufruf.

#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

static char word[999];

char* GetSouceWord(FILE* sourceFile)
{
    long position = 0;
	char lastCharacter;
    char valid    = 0;


    while (!feof(sourceFile))
    {
        word[position] = fgetc(sourceFile);
        if (position == 0)
        {  	/* the first letter must be an upper or lower case letter */
            if (isalpha(word[position]) ||
                word[position] == 'Ä'   ||
                word[position] == 'Ö'   ||
                word[position] == 'Ü'   ||
                word[position] == 'ä'   ||
                word[position] == 'ö'   ||
                word[position] == 'ü'   ||
                word[position] == 'ß'      )
            {
                valid = 1;
                lastCharacter = word[position++];
            }
        }
        else
        {	/* all other characters must be a lower case letter sein */
	        if (!islower(word[position]) 		 &                         word[position] != 'ä'   &                         word[position] != 'ö'   &                         word[position] != 'ü'   &                         word[position] != 'ß'      )
			{   /* otherwise it must be a white-space					*/
                /* or a sentence character, followed by a white-space	*/
                if (word[position] == '.' ||
                    word[position] == ',' ||
                    word[position] == ';' ||
                    word[position] == ':' ||
                    word[position] == '?' ||
                    word[position] == '!'   )
                {	/* if the actual character is a sentence 			*/
                  	/* character, the last character must be a letter	*/
                    if (!isalpha(lastCharacter) &                                 lastCharacter != 'ä'    &                                 lastCharacter != 'ö'    &                                 lastCharacter != 'ü'    &                                 lastCharacter != 'ß'       )
                    {
                        valid = 0;
                    }
                }
                else
                {	/* if the character isn't a letter or a sentence character, then a white-space	*/
	                /* is only accepted, if the last character was a letter							*/
	                if (isspace(word[position]))
    	            {
        	            word[position] = '\0';
            	        if (!isalpha(lastCharacter) &                                     lastCharacter != 'ä'   &                                     lastCharacter != 'ö'   &                                     lastCharacter != 'ü'   &                                     lastCharacter != 'ß'       )
                  		{
                        	word[--position] = '\0';
                    	}
                    	if (isalpha(lastCharacter) ||
                            word[position] == 'ä'  ||
                            word[position] == 'ö'  ||
                            word[position] == 'ü'  ||
                            word[position] == 'ß'  ||
            	            word[position] == '.'  ||
            	            word[position] == ','  ||
                            word[position] == ';'  ||
                            word[position] == ':'  ||
                            word[position] == '?'  ||
                            word[position] == '!'     )
                        {
                            if (valid && 1 < position)
                            {
                                return word;
                            }
                        }
                        position = 0;
                    }
                    else
                    {
                        valid = 0;
                    }
                }
            }
            else
            {	/* if the last character is a letter, but
                 * the character for this not, then the
                 * checked part is not a word
                 */
                if (!isalpha(lastCharacter)         &                             lastCharacter != 'ä'   &                             lastCharacter != 'ö'   &                             lastCharacter != 'ü'   &                             lastCharacter != 'ß'    )
                {
                    valid = 0;
                }
            }
            if (position != 0)
            {
                lastCharacter = word[position++];
            }
        }
    }
    return NULL;
}

Worte Sortieren & Doppelte und leichte Worte entfernen (Schritte 2,3,4)    

Dieser Code für Binärsuche stand irgendwo im Internet für Integer und wurde für String erweitert. Kann jemand einem in Textschaufeleien Ungeübten sagen, wiso geht das free( T->Element ); nicht geht?

typedef char* ElementType;

#ifndef _Tree_H
#define _Tree_H

struct TreeNode;
typedef struct TreeNode *Position;
typedef struct TreeNode *SearchTree;

void  PutTreeWord(char* Word);
void  DelTreeWord(char* Word);
char* GetTreeWord(void      );

SearchTree  MakeEmpty(                SearchTree T );
Position    Find     ( ElementType X, SearchTree T );
Position    FindMin  (                SearchTree T );
Position    FindMax  (                SearchTree T );
SearchTree  Insert   ( ElementType X, SearchTree T );
SearchTree  Delete   ( ElementType X, SearchTree T );
ElementType Retrieve ( Position    P               );
long        Elements ( void                        );

#endif  /* _Tree_H */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "tree.h"


struct TreeNode
{
    ElementType Element;
    SearchTree  Left;
    SearchTree  Right;
};

///////////////////////////////////////////////////

static	SearchTree  Tree = NULL;
static	Position    Posi = NULL;
static	ElementType Elem = 0;
static  long        elements = 0;;


///////////////////////////////////////////////////
// User-Shell:
///////////////////////////////////////////////////

///////////////////////////////////////////////////
void PutTreeWord(char* Word)
{
	char CaseWord[99];

	if(strlen(Word)>sizeof(CaseWord))
	{
	    fprintf(stderr, "Word to long: %s\n", Word);
		return;
	}

	strcpy( CaseWord, Word );
	Tree = Insert( CaseWord, Tree);

	return;
}


///////////////////////////////////////////////////
char* GetTreeWord(void)
{
	Posi = FindMin (       Tree );	if(Posi==NULL) return NULL;
	Elem = Retrieve( Posi       );
	Tree = Delete  ( Elem, Tree );

	return Elem;
}


///////////////////////////////////////////////////
void DelTreeWord(char* Word)
{
	Tree = Delete  ( Word, Tree );
}



///////////////////////////////////////////////////
// Binary Tree:
///////////////////////////////////////////////////

///////////////////////////////////////////////////
SearchTree MakeEmpty( SearchTree T )
{
    if( T != NULL )
    {
        MakeEmpty( T->Left );
        MakeEmpty( T->Right );
        free( T->Element );
        free( T );
    }
	elements = 0;
    return NULL;
}



///////////////////////////////////////////////////
Position Find( ElementType X, SearchTree T )
{
    if( T == NULL )
        return NULL;
    if( X < T->Element )
        return Find( X, T->Left );
    else
    if( X > T->Element )
        return Find( X, T->Right );
    else
        return T;
}



///////////////////////////////////////////////////
Position FindMin( SearchTree T )
{
    if( T == NULL )
        return NULL;
    else
    if( T->Left == NULL )
        return T;
    else
        return FindMin( T->Left );
}



///////////////////////////////////////////////////
Position FindMax( SearchTree T )
{
    if( T != NULL )
        while( T->Right != NULL )
            T = T->Right;

    return T;
}



///////////////////////////////////////////////////
SearchTree Insert( ElementType X, SearchTree T ) /* Insert and New */
{
    if( T == NULL )
    {
        /* Create and return a one-node tree */
        T = malloc( sizeof( struct TreeNode ) );
        if( T == NULL )
		{
            printf( "Out of space!!!" );
			exit(1);
		}
        else
		{
            T->Element = malloc( strlen( X ) );
	        if( T->Element == NULL )
			{
		        printf( "Out of space!!!!" );
				exit(1);
			}
		    else
			{
				strcpy( T->Element, X );
				T->Left = T->Right = NULL;
				elements++;
			}
		}
    }
    else
	if( strcmp(strlwr(X),strlwr(T->Element)) < 0)
        T->Left = Insert( X, T->Left );
    else
	if( strcmp(strlwr(X),strlwr(T->Element)) > 0)
        T->Right = Insert( X, T->Right );
    /* Else X is in the tree already; we'll do nothing */

    return T;  /* Do not forget this line!! */
}



///////////////////////////////////////////////////
SearchTree Delete( ElementType X, SearchTree T )
{
    Position TmpCell;

    if( T == NULL )
        ;//printf( "Element not found" );
    else	if( strcmp(strlwr(X),strlwr(T->Element)) < 0)
        T->Left = Delete( X, T->Left );
    else
	if( strcmp(strlwr(X),strlwr(T->Element)) > 0)
        T->Right = Delete( X, T->Right );
    else  /* Found element to be deleted */
	{
		if( T->Left && T->Right )  /* Two children */
		{
			/* Replace with smallest in right subtree */
	        TmpCell    = FindMin(            T->Right );
			// Wiso geht das hier nicht ???:  free( T->Element );
		    T->Element =   TmpCell->Element;
			T->Right   = Delete( T->Element, T->Right );
	    }
		else  /* One or zero children */
		{
			TmpCell = T;
	        if( T->Left == NULL ) /* Also handles 0 children */
		        T = T->Right;
			else if( T->Right == NULL )
				T = T->Left;
	        free( TmpCell );
			elements--;
		}
	}

    return T;
}


///////////////////////////////////////////////////
ElementType Retrieve( Position P )
{
    return P->Element;
}


///////////////////////////////////////////////////
long Elements( void )
{
    return elements;
}

Schritt 5: Übersetzen mittels einer einfachen Wortliste    

void  InitTranslate(FILE* translFile);
char* Translate(char* word);
long  Translated(void);

#include <stdlib.h>
#include <stdio.h>
#include <string.h>


FILE* file = NULL;
long lines = 0;
long found = 0;

char   line[1000];
size_t pos;
char*  own = NULL;
char*  other;

long Translated(void)
{
	return found;
}

void InitTranslate(FILE* translFile)
{
	file = translFile;
}


char* Translate(char* word)
{
	while (!feof(file))
	{
		if(own==NULL)
		{
		    if( fgets( line, 1000, file ) == NULL)
			{
				if(!feof(file))
					fprintf(stderr, "Translation buffer overrun\n");
				return " @\n";
			}

			lines++;
			pos = strcspn( line, "; " );
			line[pos] = 0;

			own   =  line;
			other = &line[pos+1];
		}

		if( strcmp(strlwr(own),strlwr(word)) == 0)
		{	found++;
			own = NULL;
			return other;
		}
		if( strcmp(strlwr(own),strlwr(word))  > 0)
            return " @\n";

		own = NULL;
	}
	return " @\n";
}


StartSeite | LernsoftwareProgrammierung/ | Neues | TestSeite | ForumSeite | Teilnehmer | Kategorien | Index | Hilfe | Einstellungen | Ändern
Text dieser Seite ändern (zuletzt geändert: 4. April 2005 9:35 (diff))
Suchbegriff: gesucht wird
im Titel
im Text