#include #include #include #include #define MAX_LINE_LENGTH 1024 // Adjust line length as needed void printStats(int uid, long long length, long long A_count, long long T_count, long long G_count, long long C_count, FILE *output) { long long total_count = A_count + T_count + G_count + C_count; fprintf(output, "Total statistics for %d chromosomes\n\n", uid); fprintf(output, "Length: %lld\n", length); fprintf(output, "Nucleotide frequencies:\n"); fprintf(output, "A: % 10lld % 5.2f%%\n", A_count, ((double)A_count / total_count) * 100); fprintf(output, "T: % 10lld % 5.2f%%\n", T_count, ((double)T_count / total_count) * 100); fprintf(output, "G: % 10lld % 5.2f%%\n", G_count, ((double)G_count / total_count) * 100); fprintf(output, "C: % 10lld % 5.2f%%\n", C_count, ((double)C_count / total_count) * 100); fprintf(output, "GC content: %.2f%%\n\n", ((double)(G_count + C_count) / total_count) * 100); } void printFileContent_fgetc_putchar_rewind(FILE *file) { int c; rewind(file); // reset the file position while ((c = fgetc(file)) != EOF) { putchar(c); } printf("\n"); } void printFileContent_fgetc_putchar_fseek(FILE *file) { fseek(file, 0, SEEK_SET); // Set the file position indicator to the beginning of the file int c; while ((c = fgetc(file)) != EOF) { putchar(c); } printf("\n"); } void printFileContent_getc_printf(FILE *f) { int c; while ((c = getc(f)) != EOF){ printf("%c",c); } printf("\n"); } void printFileContent_fread_buffer(FILE *file) { fseek(file, 0, SEEK_SET); // Set the file position indicator to the beginning of the file char buffer[MAX_LINE_LENGTH]; size_t bytesRead; while ((bytesRead = fread(buffer, 1, MAX_LINE_LENGTH, file)) > 0) { fwrite(buffer, 1, bytesRead, stdout); // Print buffer content to stdout } printf("\n"); } void printFileContent_fgets_printf(FILE *file) { fseek(file, 0, SEEK_SET); // Set the file position indicator to the beginning of the file char line[MAX_LINE_LENGTH]; while (fgets(line, MAX_LINE_LENGTH, file) != NULL) { printf("%s", line); // Print each line to stdout } printf("\n"); } void printFileContent_getc_putchar(FILE *file) { fseek(file, 0, SEEK_SET); // Set the file position indicator to the beginning of the file int c; while ((c = getc(file)) != EOF) { putchar(c); // Print each character to stdout } printf("\n"); } void processFasta(FILE *fasta, FILE *output) { char *line = NULL; int chromosome_cunter = 0; size_t len = 0; ssize_t read; char *currentUID = NULL; long long A_count = 0, T_count = 0, G_count = 0, C_count = 0, length = 0; long long A_global_count = 0, T_global_count = 0, G_global_count = 0, C_global_count = 0, length_global = 0; while ((read = getline(&line, &len, fasta)) != -1) { if (line[0] == '>') { if (currentUID != NULL) { printf("Processing chromosome: %s\n", currentUID); //printStats(currentUID, length, A_count, T_count, G_count, C_count, output); free(currentUID); chromosome_cunter += 1; length_global += length; A_global_count += A_count; T_global_count += T_count; G_global_count += G_count; C_global_count += C_count; A_count = T_count = G_count = C_count = length = 0; } currentUID = strdup(line + 1); // Skip '>' char *end = strchr(currentUID, ' '); if (end != NULL) { *end = '\0'; // Terminate at the first space } } else { //size_t line_length = strlen(line); //alternatively use strlen() function from string.h //for (size_t i = 0; i < line_length; i++) { for (int i = 0; line[i] != '\0' && line[i] != '\n'; i++) { char c = toupper(line[i]); if (isalpha(c)) { length++; if (c == 'A') A_count++; else if (c == 'T') T_count++; else if (c == 'G') G_count++; else if (c == 'C') C_count++; } } } } free(currentUID); //do not forget about last one chromosome_cunter += 1; length_global += length; A_global_count += A_count; T_global_count += T_count; G_global_count += G_count; C_global_count += C_count; printf("Processed %d chromosomes:\n\n", chromosome_cunter); printStats(chromosome_cunter, length_global, A_global_count, T_global_count, G_global_count, C_global_count, output); free(line); } int main() { FILE *fasta = fopen("/home/lukaskoz/tmp/lab7_wdi/chm13v2.0.fa", "r"); //FILE *fasta = fopen("/home/lukaskoz/tmp/lab7_wdi/chm13v2.0.fa_5M", "r"); //5M contains 2 chromosomes only FILE *output = fopen("/home/lukaskoz/tmp/lab7_wdi/genome_stats_short.txt", "w"); if (fasta == NULL || output == NULL) { perror("Error opening files"); return 1; } processFasta(fasta, output); fclose(fasta); fclose(output); // Display output file content FILE *outputFile = fopen("/home/lukaskoz/tmp/lab7_wdi/genome_stats_short.txt", "r"); if (outputFile == NULL) { perror("Error opening output file"); return 1; } printf("\n\n=============================================\n\nContent of output file:\n\n"); printFileContent_fgetc_putchar_fseek(outputFile); //printFileContent_fgetc_putchar_rewind(outputFile); //printFileContent_fgets_printf(outputFile); //printFileContent_fread_buffer(outputFile); //printFileContent_getc_printf(outputFile); //printFileContent_getc_putchar(outputFile); fclose(outputFile); return 0; }