Initial: Privacy Gateway Projekt mit Team-Implementierung
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
# Dockerfile für den Anonymizer Service
|
||||
# Läuft im Backend-Container oder standalone
|
||||
|
||||
FROM node:20-alpine AS base
|
||||
|
||||
# Installiere nötige Pakete
|
||||
RUN apk add --no-cache dumb-init
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Kopiere Package-Dateien
|
||||
COPY package.json tsconfig.json ./
|
||||
|
||||
# Installiere Dependencies
|
||||
RUN npm ci --only=production
|
||||
|
||||
# Dev-Stage für Entwicklung
|
||||
FROM base AS dev
|
||||
|
||||
RUN npm ci
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
CMD ["dumb-init", "npm", "run", "dev"]
|
||||
|
||||
# Production-Stage
|
||||
FROM base AS production
|
||||
|
||||
# Kopiere kompilierte Dateien (von Build-Stage)
|
||||
COPY --from=builder /app/dist ./dist
|
||||
|
||||
USER node
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
CMD ["dumb-init", "node", "dist/index.js"]
|
||||
|
||||
# Builder-Stage (wird für Production benötigt)
|
||||
FROM base AS builder
|
||||
|
||||
RUN npm ci
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN npm run build
|
||||
@@ -0,0 +1,161 @@
|
||||
# 🔐 Privacy Gateway Anonymizer
|
||||
|
||||
PII-Erkennungs- und Anonymisierungs-Service für das Privacy Gateway.
|
||||
Nutzt lokale LLMs (Ollama) zur Erkennung persönlicher Daten mit Pattern-basiertem Fallback.
|
||||
|
||||
## Features
|
||||
|
||||
- 🤖 **LLM-basierte PII-Erkennung** via Ollama
|
||||
- 🛡️ **10 PII-Typen** erkannt: Namen, Adressen, E-Mails, Telefonnummern, IBANs, etc.
|
||||
- 🔄 **Re-Identifizierung** für nachgelagerte Prozesse
|
||||
- ⚡ **Pattern-Fallback** bei LLM-Ausfall
|
||||
- 📊 **Sensitivitäts-Bewertung** (low/medium/high/critical)
|
||||
- 💾 **Mapping-Serialisierung** für Persistenz
|
||||
|
||||
## Schnellstart
|
||||
|
||||
```typescript
|
||||
import { anonymize, reIdentify } from './index.js';
|
||||
|
||||
// Anonymisieren
|
||||
const result = await anonymize(
|
||||
'Hallo Max Mustermann, kontaktiere mich unter max@beispiel.de'
|
||||
);
|
||||
|
||||
console.log(result.anonymizedText);
|
||||
// "Hallo [NAME_1], kontaktiere mich unter [EMAIL_1]"
|
||||
|
||||
// Re-Identifizieren
|
||||
const restored = reIdentify(result.anonymizedText, result.mapping);
|
||||
console.log(restored.reidentifiedText);
|
||||
// "Hallo Max Mustermann, kontaktiere mich unter max@beispiel.de"
|
||||
```
|
||||
|
||||
## Unterstützte PII-Typen
|
||||
|
||||
| Typ | Beschreibung | Beispiel | Sensitivität |
|
||||
|-----|-------------|----------|--------------|
|
||||
| `name_person` | Personenname | Max Mustermann | 🔴 high |
|
||||
| `name_company` | Firmenname | Musterfirma GmbH | 🟡 medium |
|
||||
| `address` | Vollständige Adresse | Musterstraße 1, 12345 Berlin | 🔴 high |
|
||||
| `email` | E-Mail-Adresse | max@beispiel.de | 🔴 high |
|
||||
| `phone` | Telefonnummer (DE) | +49 170 12345678 | 🟡 medium |
|
||||
| `birthdate` | Geburtsdatum | 15.03.1985 | 🔴 high |
|
||||
| `account_number` | Kontonummer | 1234567890 | 🔴 critical |
|
||||
| `iban` | IBAN | DE89 3704... | 🔴 critical |
|
||||
| `employee_id` | Personalnummer | EMP-12345 | 🟡 medium |
|
||||
| `credit_card` | Kreditkartennummer | 4111 1111... | 🔴 critical |
|
||||
|
||||
## API-Referenz
|
||||
|
||||
### `anonymize(text, config?)`
|
||||
|
||||
Anonymisiert einen Text durch Erkennung und Ersetzung von PII.
|
||||
|
||||
**Parameter:**
|
||||
- `text` (string): Zu anonymisierender Text
|
||||
- `config` (optional): Konfigurationsobjekt
|
||||
|
||||
**Rückgabe:** `AnonymizeResult`
|
||||
```typescript
|
||||
{
|
||||
success: boolean;
|
||||
anonymizedText: string;
|
||||
mapping: ReverseMapping;
|
||||
sensitivityLevel: 'low' | 'medium' | 'high' | 'critical';
|
||||
piiCount: number;
|
||||
processingTimeMs: number;
|
||||
error?: string;
|
||||
usedFallback?: boolean;
|
||||
}
|
||||
```
|
||||
|
||||
### `reIdentify(anonymizedText, mapping, options?)`
|
||||
|
||||
Stellt einen anonymisierten Text wieder her.
|
||||
|
||||
**Parameter:**
|
||||
- `anonymizedText` (string): Anonymisierter Text mit Platzhaltern
|
||||
- `mapping` (ReverseMapping): Mapping von Platzhalter zu Originalwert
|
||||
- `options` (optional): Optionen für die Re-Identifizierung
|
||||
|
||||
**Rückgabe:** `ReidentifyResult`
|
||||
```typescript
|
||||
{
|
||||
success: boolean;
|
||||
reidentifiedText: string;
|
||||
replacementsMade: number;
|
||||
errors: string[];
|
||||
}
|
||||
```
|
||||
|
||||
### `createAnonymizer(config?)`
|
||||
|
||||
Erstellt eine konfigurierte Anonymizer-Instanz.
|
||||
|
||||
```typescript
|
||||
const anonymizer = createAnonymizer({
|
||||
ollamaUrl: 'http://localhost:11434',
|
||||
model: 'llama3.2',
|
||||
timeoutMs: 30000,
|
||||
maxRetries: 2,
|
||||
fallbackEnabled: true,
|
||||
});
|
||||
```
|
||||
|
||||
## Konfiguration
|
||||
|
||||
Umgebungsvariablen:
|
||||
```bash
|
||||
OLLAMA_URL=http://192.168.2.122:11434
|
||||
OLLAMA_MODEL=llama3.2
|
||||
OLLAMA_TIMEOUT=30000
|
||||
```
|
||||
|
||||
## Integration im Backend
|
||||
|
||||
```typescript
|
||||
import { getAnonymizer, maskForLog } from '@privacy-gateway/anonymizer';
|
||||
|
||||
const anonymizer = getAnonymizer();
|
||||
|
||||
// Vor dem Senden an externe KI
|
||||
const { anonymizedText, mapping, sensitivityLevel } = await anonymizer.anonymize(userInput);
|
||||
|
||||
// Log mit maskierten Werten
|
||||
console.log(`Sensitivität: ${sensitivityLevel}, PII: ${piiCount}`);
|
||||
|
||||
// Nach Antwort der KI
|
||||
const restored = anonymizer.reIdentify(aiResponse, mapping);
|
||||
```
|
||||
|
||||
## Tests
|
||||
|
||||
```bash
|
||||
npm test
|
||||
```
|
||||
|
||||
## Projektstruktur
|
||||
|
||||
```
|
||||
anonymizer/
|
||||
├── src/
|
||||
│ ├── index.ts # Haupt-Export
|
||||
│ ├── anonymizer.ts # Kern-Logik
|
||||
│ ├── reverser.ts # Re-Identifizierung
|
||||
│ ├── pii-types.ts # Typ-Definitionen
|
||||
│ ├── prompts/
|
||||
│ │ └── pii-detection.ts # LLM-Prompts
|
||||
│ ├── utils/
|
||||
│ │ └── text.ts # Hilfsfunktionen
|
||||
│ └── test/
|
||||
│ └── test-cases.ts # Testfälle
|
||||
├── package.json
|
||||
├── tsconfig.json
|
||||
├── Dockerfile
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## Lizenz
|
||||
|
||||
MIT
|
||||
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"name": "@privacy-gateway/anonymizer",
|
||||
"version": "1.0.0",
|
||||
"description": "PII-Erkennungs- und Anonymisierungs-Service für das Privacy Gateway",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"dev": "tsx watch src/index.ts",
|
||||
"start": "node dist/index.js",
|
||||
"test": "node --test dist/test/*.js",
|
||||
"lint": "eslint src/**/*.ts",
|
||||
"typecheck": "tsc --noEmit"
|
||||
},
|
||||
"keywords": [
|
||||
"pii",
|
||||
"anonymization",
|
||||
"privacy",
|
||||
"gdpr",
|
||||
"dsgvo",
|
||||
"ollama",
|
||||
"llm"
|
||||
],
|
||||
"author": "Privacy Gateway Team",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.0.0",
|
||||
"tsx": "^4.0.0",
|
||||
"typescript": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,271 @@
|
||||
/**
|
||||
* Haupt-Logik für PII-Erkennung und Anonymisierung
|
||||
* Interagiert mit Ollama für LLM-basierte PII-Erkennung
|
||||
*/
|
||||
|
||||
import {
|
||||
PiiType,
|
||||
PiiInstance,
|
||||
PiiDetectionResult,
|
||||
ReverseMapping,
|
||||
PII_TYPE_METADATA,
|
||||
} from './pii-types.js';
|
||||
import {
|
||||
generatePlaceholder,
|
||||
replaceInText,
|
||||
cleanJsonResponse,
|
||||
validatePiiResult,
|
||||
calculateSensitivityLevel,
|
||||
mightContainPii,
|
||||
} from './utils/text.js';
|
||||
import { generatePrompt, PromptConfig } from './prompts/pii-detection.js';
|
||||
|
||||
export interface AnonymizerConfig {
|
||||
ollamaUrl: string;
|
||||
model: string;
|
||||
timeoutMs: number;
|
||||
maxRetries: number;
|
||||
fallbackEnabled: boolean;
|
||||
}
|
||||
|
||||
export const DEFAULT_CONFIG: AnonymizerConfig = {
|
||||
ollamaUrl: process.env.OLLAMA_URL || 'http://192.168.2.122:11434',
|
||||
model: process.env.OLLAMA_MODEL || 'llama3.2',
|
||||
timeoutMs: parseInt(process.env.OLLAMA_TIMEOUT || '30000', 10),
|
||||
maxRetries: 2,
|
||||
fallbackEnabled: true,
|
||||
};
|
||||
|
||||
export interface AnonymizeResult {
|
||||
success: boolean;
|
||||
anonymizedText: string;
|
||||
mapping: ReverseMapping;
|
||||
sensitivityLevel: 'low' | 'medium' | 'high' | 'critical';
|
||||
piiCount: number;
|
||||
processingTimeMs: number;
|
||||
error?: string;
|
||||
usedFallback?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hauptklasse für die PII-Anonymisierung
|
||||
*/
|
||||
export class Anonymizer {
|
||||
private config: AnonymizerConfig;
|
||||
private placeholderCounters: Map<PiiType, number>;
|
||||
|
||||
constructor(config: Partial<AnonymizerConfig> = {}) {
|
||||
this.config = { ...DEFAULT_CONFIG, ...config };
|
||||
this.placeholderCounters = new Map();
|
||||
}
|
||||
|
||||
/**
|
||||
* Anonymisiert einen Text durch PII-Erkennung und Ersetzung
|
||||
*/
|
||||
async anonymize(text: string): Promise<AnonymizeResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// Schneller Vorab-Check
|
||||
if (!mightContainPii(text)) {
|
||||
return {
|
||||
success: true,
|
||||
anonymizedText: text,
|
||||
mapping: {},
|
||||
sensitivityLevel: 'low',
|
||||
piiCount: 0,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
// Versuche LLM-basierte Erkennung
|
||||
const result = await this.anonymizeWithLlm(text);
|
||||
const processingTimeMs = Date.now() - startTime;
|
||||
|
||||
return {
|
||||
success: true,
|
||||
anonymizedText: result.anonymizedText,
|
||||
mapping: this.buildReverseMapping(result.piiFound),
|
||||
sensitivityLevel: calculateSensitivityLevel(result.piiFound),
|
||||
piiCount: result.piiFound.length,
|
||||
processingTimeMs,
|
||||
};
|
||||
} catch (error) {
|
||||
// Fallback: Pattern-basierte Erkennung
|
||||
if (this.config.fallbackEnabled) {
|
||||
const fallbackResult = this.anonymizeWithPatterns(text);
|
||||
const processingTimeMs = Date.now() - startTime;
|
||||
|
||||
return {
|
||||
success: true,
|
||||
anonymizedText: fallbackResult.anonymizedText,
|
||||
mapping: this.buildReverseMapping(fallbackResult.piiFound),
|
||||
sensitivityLevel: calculateSensitivityLevel(fallbackResult.piiFound),
|
||||
piiCount: fallbackResult.piiFound.length,
|
||||
processingTimeMs,
|
||||
usedFallback: true,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
anonymizedText: text,
|
||||
mapping: {},
|
||||
sensitivityLevel: 'low',
|
||||
piiCount: 0,
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* LLM-basierte PII-Erkennung via Ollama
|
||||
*/
|
||||
private async anonymizeWithLlm(text: string): Promise<PiiDetectionResult> {
|
||||
const prompt = generatePrompt(text, this.config.model);
|
||||
|
||||
const response = await fetch(`${this.config.ollamaUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: this.config.model,
|
||||
messages: [
|
||||
{ role: 'system', content: prompt.system },
|
||||
{ role: 'user', content: prompt.user },
|
||||
],
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: prompt.config.temperature,
|
||||
num_predict: prompt.config.maxTokens,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json() as { message?: { content?: string } };
|
||||
const content = data.message?.content || '';
|
||||
|
||||
return this.parseLlmResponse(content, text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parst die LLM-Antwort und extrahiert PII-Daten
|
||||
*/
|
||||
private parseLlmResponse(response: string, originalText: string): PiiDetectionResult {
|
||||
const cleaned = cleanJsonResponse(response);
|
||||
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(cleaned);
|
||||
} catch {
|
||||
// Versuche, JSON aus dem Text zu extrahieren
|
||||
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
||||
if (jsonMatch) {
|
||||
try {
|
||||
parsed = JSON.parse(jsonMatch[0]);
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to parse LLM response: ${e}`);
|
||||
}
|
||||
} else {
|
||||
throw new Error('No valid JSON found in LLM response');
|
||||
}
|
||||
}
|
||||
|
||||
if (!validatePiiResult(parsed)) {
|
||||
throw new Error('Invalid PII result structure');
|
||||
}
|
||||
|
||||
// Normalisiere PII-Liste
|
||||
const piiFound: PiiInstance[] = parsed.pii_found.map((item: unknown, index: number) => {
|
||||
const pii = item as Record<string, string>;
|
||||
return {
|
||||
type: pii.type as PiiType,
|
||||
original: pii.original,
|
||||
replacement: pii.replacement || generatePlaceholder(pii.type as PiiType, index + 1),
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
piiFound,
|
||||
anonymizedText: parsed.anonymized_text,
|
||||
originalText,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback: Pattern-basierte PII-Erkennung
|
||||
* Wird verwendet, wenn LLM nicht verfügbar ist
|
||||
*/
|
||||
private anonymizeWithPatterns(text: string): PiiDetectionResult {
|
||||
const piiFound: PiiInstance[] = [];
|
||||
const replacements: Array<{ original: string; replacement: string }> = [];
|
||||
|
||||
// Pattern-basierte Erkennung für bekannte PII-Typen
|
||||
const patterns: Array<{ type: PiiType; regex: RegExp }> = [
|
||||
{ type: 'email', regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g },
|
||||
{ type: 'phone', regex: /(?:\+49|0)[\s\-/]?\d{1,4}[\s\-/]?\d{1,4}[\s\-/]?\d{1,4}[\s\-/]?\d{0,4}\b/g },
|
||||
{ type: 'iban', regex: /\b[A-Z]{2}\d{2}(?:[\s]?\d{4}){4,6}\b/g },
|
||||
{ type: 'birthdate', regex: /\b(?:0?[1-9]|[12][0-9]|3[01])[\.\/\-](?:0?[1-9]|1[0-2])[\.\/\-](?:19|20)\d{2}\b/g },
|
||||
{ type: 'credit_card', regex: /\b(?:\d{4}[\s-]?){3,4}\d{1,4}\b/g },
|
||||
];
|
||||
|
||||
for (const { type, regex } of patterns) {
|
||||
const matches = text.matchAll(regex);
|
||||
let counter = 1;
|
||||
|
||||
for (const match of matches) {
|
||||
const original = match[0];
|
||||
|
||||
// Vermeide Duplikate
|
||||
if (replacements.some(r => r.original === original)) continue;
|
||||
|
||||
const placeholder = generatePlaceholder(type, counter++);
|
||||
piiFound.push({ type, original, replacement: placeholder });
|
||||
replacements.push({ original, replacement: placeholder });
|
||||
}
|
||||
}
|
||||
|
||||
const anonymizedText = replaceInText(text, replacements);
|
||||
|
||||
return {
|
||||
piiFound,
|
||||
anonymizedText,
|
||||
originalText: text,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Baut ein Reverse-Mapping für die Re-Identifizierung
|
||||
*/
|
||||
private buildReverseMapping(piiFound: PiiInstance[]): ReverseMapping {
|
||||
const mapping: ReverseMapping = {};
|
||||
|
||||
for (const pii of piiFound) {
|
||||
mapping[pii.replacement] = pii;
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setzt den Placeholder-Zähler zurück
|
||||
*/
|
||||
resetCounters(): void {
|
||||
this.placeholderCounters.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience-Funktion für einfache Anonymisierung
|
||||
*/
|
||||
export async function anonymizeText(
|
||||
text: string,
|
||||
config?: Partial<AnonymizerConfig>
|
||||
): Promise<AnonymizeResult> {
|
||||
const anonymizer = new Anonymizer(config);
|
||||
return anonymizer.anonymize(text);
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
/**
|
||||
* Haupt-Export für den Anonymizer Service
|
||||
* Einfache API für Integration in das Backend
|
||||
*/
|
||||
|
||||
import { Anonymizer, anonymizeText, AnonymizeResult, AnonymizerConfig } from './anonymizer.js';
|
||||
import {
|
||||
reidentify,
|
||||
ReidentifyResult,
|
||||
ReidentifyOptions,
|
||||
serializeMapping,
|
||||
deserializeMapping,
|
||||
validateMapping,
|
||||
mergeMappings,
|
||||
extractPlaceholders,
|
||||
getMappingStats,
|
||||
} from './reverser.js';
|
||||
import {
|
||||
PiiType,
|
||||
PiiInstance,
|
||||
PiiDetectionResult,
|
||||
ReverseMapping,
|
||||
PII_TYPE_METADATA,
|
||||
PII_PATTERNS,
|
||||
} from './pii-types.js';
|
||||
import {
|
||||
generatePrompt,
|
||||
generateShortPrompt,
|
||||
PromptConfig,
|
||||
} from './prompts/pii-detection.js';
|
||||
import {
|
||||
generatePlaceholder,
|
||||
replaceInText,
|
||||
cleanJsonResponse,
|
||||
calculateSensitivityLevel,
|
||||
maskPiiForLog,
|
||||
mightContainPii,
|
||||
truncateText,
|
||||
} from './utils/text.js';
|
||||
|
||||
// ==================== Haupt-API ====================
|
||||
|
||||
/**
|
||||
* Einzige Funktion zum Anonymisieren von Text
|
||||
* Wrapper für einfachen Zugriff aus dem Backend
|
||||
*/
|
||||
export async function anonymize(
|
||||
text: string,
|
||||
config?: Partial<AnonymizerConfig>
|
||||
): Promise<AnonymizeResult> {
|
||||
return anonymizeText(text, config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Einzige Funktion zum Re-Identifizieren von Text
|
||||
* Wrapper für einfachen Zugriff aus dem Backend
|
||||
*/
|
||||
export function reIdentify(
|
||||
anonymizedText: string,
|
||||
mapping: ReverseMapping,
|
||||
options?: Partial<ReidentifyOptions>
|
||||
): ReidentifyResult {
|
||||
return reidentify(anonymizedText, mapping, options);
|
||||
}
|
||||
|
||||
// ==================== Erweiterte API ====================
|
||||
|
||||
export interface PrivacyGatewayAnonymizer {
|
||||
// Kern-Funktionen
|
||||
anonymize(text: string): Promise<AnonymizeResult>;
|
||||
reIdentify(text: string, mapping: ReverseMapping, options?: Partial<ReidentifyOptions>): ReidentifyResult;
|
||||
|
||||
// Hilfs-Funktionen
|
||||
validateMapping(mapping: ReverseMapping): { valid: boolean; errors: string[] };
|
||||
getMappingInfo(mapping: ReverseMapping): ReturnType<typeof getMappingStats>;
|
||||
serialize(mapping: ReverseMapping): string;
|
||||
deserialize(serialized: string): ReverseMapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt eine konfigurierte Anonymizer-Instanz
|
||||
* Für fortgeschrittene Nutzung mit mehreren Konfigurationen
|
||||
*/
|
||||
export function createAnonymizer(config?: Partial<AnonymizerConfig>): PrivacyGatewayAnonymizer {
|
||||
const instance = new Anonymizer(config);
|
||||
|
||||
return {
|
||||
anonymize: (text: string) => instance.anonymize(text),
|
||||
reIdentify: (text: string, mapping: ReverseMapping, options?: Partial<ReidentifyOptions>) =>
|
||||
reidentify(text, mapping, options),
|
||||
validateMapping: (mapping: ReverseMapping) => validateMapping(mapping),
|
||||
getMappingInfo: (mapping: ReverseMapping) => getMappingStats(mapping),
|
||||
serialize: (mapping: ReverseMapping) => serializeMapping(mapping),
|
||||
deserialize: (serialized: string) => deserializeMapping(serialized),
|
||||
};
|
||||
}
|
||||
|
||||
// ==================== Integration für Backend ====================
|
||||
|
||||
/**
|
||||
* Standard-Konfiguration für das Backend
|
||||
*/
|
||||
export const DEFAULT_BACKEND_CONFIG: AnonymizerConfig = {
|
||||
ollamaUrl: process.env.OLLAMA_URL || 'http://192.168.2.122:11434',
|
||||
model: process.env.OLLAMA_MODEL || 'llama3.2',
|
||||
timeoutMs: parseInt(process.env.OLLAMA_TIMEOUT || '30000', 10),
|
||||
maxRetries: 2,
|
||||
fallbackEnabled: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Singleton-Instanz für das Backend
|
||||
*/
|
||||
let globalAnonymizer: PrivacyGatewayAnonymizer | null = null;
|
||||
|
||||
/**
|
||||
* Gibt die globale Anonymizer-Instanz zurück
|
||||
* Lazy-Initialisierung
|
||||
*/
|
||||
export function getAnonymizer(): PrivacyGatewayAnonymizer {
|
||||
if (!globalAnonymizer) {
|
||||
globalAnonymizer = createAnonymizer(DEFAULT_BACKEND_CONFIG);
|
||||
}
|
||||
return globalAnonymizer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setzt die globale Konfiguration zurück
|
||||
* Nützlich für Tests oder Konfigurations-Updates
|
||||
*/
|
||||
export function resetAnonymizer(): void {
|
||||
globalAnonymizer = null;
|
||||
}
|
||||
|
||||
// ==================== Schnelle Hilfsfunktionen ====================
|
||||
|
||||
/**
|
||||
* Prüft ob ein Text PII enthalten könnte (schneller Vorab-Check)
|
||||
*/
|
||||
export function quickCheck(text: string): boolean {
|
||||
return mightContainPii(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gibt Informationen über unterstützte PII-Typen zurück
|
||||
*/
|
||||
export function getSupportedPiiTypes(): Array<{
|
||||
type: PiiType;
|
||||
label: string;
|
||||
description: string;
|
||||
example: string;
|
||||
sensitiveLevel: string;
|
||||
}> {
|
||||
return Object.values(PII_TYPE_METADATA).map(meta => ({
|
||||
type: meta.type,
|
||||
label: meta.label,
|
||||
description: meta.description,
|
||||
example: meta.example,
|
||||
sensitiveLevel: meta.sensitiveLevel,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Maskiert einen PII-Wert für Logging
|
||||
*/
|
||||
export function maskForLog(value: string, type: PiiType): string {
|
||||
return maskPiiForLog(value, type);
|
||||
}
|
||||
|
||||
// ==================== Type-Exports ====================
|
||||
|
||||
export {
|
||||
// Haupt-Klassen
|
||||
Anonymizer,
|
||||
|
||||
// Re-Identifizierung
|
||||
reidentify,
|
||||
serializeMapping,
|
||||
deserializeMapping,
|
||||
validateMapping,
|
||||
mergeMappings,
|
||||
extractPlaceholders,
|
||||
getMappingStats,
|
||||
|
||||
// Prompts
|
||||
generatePrompt,
|
||||
generateShortPrompt,
|
||||
|
||||
// Utils
|
||||
generatePlaceholder,
|
||||
replaceInText,
|
||||
cleanJsonResponse,
|
||||
calculateSensitivityLevel,
|
||||
truncateText,
|
||||
|
||||
// Types
|
||||
AnonymizerConfig,
|
||||
AnonymizeResult,
|
||||
ReidentifyResult,
|
||||
ReidentifyOptions,
|
||||
PromptConfig,
|
||||
PiiType,
|
||||
PiiInstance,
|
||||
PiiDetectionResult,
|
||||
ReverseMapping,
|
||||
PII_TYPE_METADATA,
|
||||
PII_PATTERNS,
|
||||
};
|
||||
|
||||
// ==================== Default Export ====================
|
||||
|
||||
export default {
|
||||
anonymize,
|
||||
reIdentify,
|
||||
createAnonymizer,
|
||||
getAnonymizer,
|
||||
quickCheck,
|
||||
getSupportedPiiTypes,
|
||||
maskForLog,
|
||||
};
|
||||
@@ -0,0 +1,146 @@
|
||||
/**
|
||||
* PII-Typen Definitionen für das Privacy Gateway
|
||||
* Definiert alle erkennbaren persönlichen Informationen und ihre Metadaten
|
||||
*/
|
||||
|
||||
export type PiiType =
|
||||
| 'name_person'
|
||||
| 'name_company'
|
||||
| 'address'
|
||||
| 'email'
|
||||
| 'phone'
|
||||
| 'birthdate'
|
||||
| 'account_number'
|
||||
| 'iban'
|
||||
| 'employee_id'
|
||||
| 'credit_card';
|
||||
|
||||
export interface PiiInstance {
|
||||
type: PiiType;
|
||||
original: string;
|
||||
replacement: string;
|
||||
startIndex?: number;
|
||||
endIndex?: number;
|
||||
confidence?: number;
|
||||
}
|
||||
|
||||
export interface PiiDetectionResult {
|
||||
piiFound: PiiInstance[];
|
||||
anonymizedText: string;
|
||||
originalText: string;
|
||||
}
|
||||
|
||||
export interface ReplacementMap {
|
||||
[placeholder: string]: string;
|
||||
}
|
||||
|
||||
export interface ReverseMapping {
|
||||
[placeholder: string]: PiiInstance;
|
||||
}
|
||||
|
||||
// Metadaten für jeden PII-Typ
|
||||
export interface PiiTypeMetadata {
|
||||
type: PiiType;
|
||||
label: string;
|
||||
description: string;
|
||||
example: string;
|
||||
prefix: string;
|
||||
sensitiveLevel: 'low' | 'medium' | 'high' | 'critical';
|
||||
}
|
||||
|
||||
export const PII_TYPE_METADATA: Record<PiiType, PiiTypeMetadata> = {
|
||||
name_person: {
|
||||
type: 'name_person',
|
||||
label: 'Personenname',
|
||||
description: 'Vor- und Nachname einer natürlichen Person',
|
||||
example: 'Max Mustermann',
|
||||
prefix: 'NAME',
|
||||
sensitiveLevel: 'high',
|
||||
},
|
||||
name_company: {
|
||||
type: 'name_company',
|
||||
label: 'Firmenname',
|
||||
description: 'Name eines Unternehmens oder Organisation',
|
||||
example: 'Musterfirma GmbH',
|
||||
prefix: 'COMPANY',
|
||||
sensitiveLevel: 'medium',
|
||||
},
|
||||
address: {
|
||||
type: 'address',
|
||||
label: 'Adresse',
|
||||
description: 'Straße, Hausnummer, PLZ und Ort',
|
||||
example: 'Musterstraße 42, 12345 Berlin',
|
||||
prefix: 'ADDRESS',
|
||||
sensitiveLevel: 'high',
|
||||
},
|
||||
email: {
|
||||
type: 'email',
|
||||
label: 'E-Mail',
|
||||
description: 'E-Mail-Adresse',
|
||||
example: 'max@mustermann.de',
|
||||
prefix: 'EMAIL',
|
||||
sensitiveLevel: 'high',
|
||||
},
|
||||
phone: {
|
||||
type: 'phone',
|
||||
label: 'Telefonnummer',
|
||||
description: 'Deutsche Telefonnummern (Mobil/Festnetz)',
|
||||
example: '+49 170 12345678',
|
||||
prefix: 'PHONE',
|
||||
sensitiveLevel: 'medium',
|
||||
},
|
||||
birthdate: {
|
||||
type: 'birthdate',
|
||||
label: 'Geburtsdatum',
|
||||
description: 'Geburtsdatum in verschiedenen Formaten',
|
||||
example: '01.01.1990',
|
||||
prefix: 'BIRTHDATE',
|
||||
sensitiveLevel: 'high',
|
||||
},
|
||||
account_number: {
|
||||
type: 'account_number',
|
||||
label: 'Kontonummer',
|
||||
description: 'Bankkontonummer (nicht IBAN)',
|
||||
example: '1234567890',
|
||||
prefix: 'ACCOUNT',
|
||||
sensitiveLevel: 'critical',
|
||||
},
|
||||
iban: {
|
||||
type: 'iban',
|
||||
label: 'IBAN',
|
||||
description: 'Internationale Bankkontonummer',
|
||||
example: 'DE89 3704 0044 0532 0130 00',
|
||||
prefix: 'IBAN',
|
||||
sensitiveLevel: 'critical',
|
||||
},
|
||||
employee_id: {
|
||||
type: 'employee_id',
|
||||
label: 'Personalnummer',
|
||||
description: 'Mitarbeiter- oder Personalnummer',
|
||||
example: 'EMP-12345',
|
||||
prefix: 'EMPID',
|
||||
sensitiveLevel: 'medium',
|
||||
},
|
||||
credit_card: {
|
||||
type: 'credit_card',
|
||||
label: 'Kreditkarte',
|
||||
description: 'Kreditkartennummer',
|
||||
example: '4111 1111 1111 1111',
|
||||
prefix: 'CC',
|
||||
sensitiveLevel: 'critical',
|
||||
},
|
||||
};
|
||||
|
||||
// Regex-Patterns für Vorab-Validierung (zusätzlich zum LLM)
|
||||
export const PII_PATTERNS: Record<PiiType, RegExp> = {
|
||||
email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
|
||||
phone: /(?:\+49|0)[\s\-/]?[\d\s\-/]{7,15}\b/g,
|
||||
iban: /\b[A-Z]{2}\d{2}[\s]?[\d]{4}[\s]?[\d]{4}[\s]?[\d]{4}[\s]?[\d]{4}[\s]?[\d]{0,4}\b/g,
|
||||
credit_card: /\b(?:\d{4}[\s-]?){3,4}\d{1,4}\b/g,
|
||||
birthdate: /\b(?:0?[1-9]|[12][0-9]|3[01])[\.\/\-](?:0?[1-9]|1[0-2])[\.\/\-](?:19|20)\d{2}\b/g,
|
||||
account_number: /\b\d{8,12}\b/g,
|
||||
employee_id: /\b(?:EMP|PN|MA)[\-\s]?\d{4,10}\b/gi,
|
||||
name_person: /\b[A-ZÄÖÜ][a-zäöüß]+\s+[A-ZÄÖÜ][a-zäöüß]+\b/g,
|
||||
name_company: /\b[A-ZÄÖÜ][a-zäöüß]+(?:\s+(?:GmbH|AG|KG|OHG|e\.?V\.?|UG|Ltd|Inc|Corp))\b/gi,
|
||||
address: /\b[A-ZÄÖÜ][a-zäöüß]+(?:straße|str|weg|platz|allee|gasse)\s+\d+[\s,]*\d{5}\s+[A-ZÄÖÜ][a-zäöüß]+\b/gi,
|
||||
};
|
||||
@@ -0,0 +1,172 @@
|
||||
/**
|
||||
* Prompt-Templates für PII-Erkennung
|
||||
* Optimiert für lokale LLMs (Ollama)
|
||||
*/
|
||||
|
||||
export interface PromptConfig {
|
||||
model: string;
|
||||
temperature: number;
|
||||
maxTokens: number;
|
||||
systemPrompt: string;
|
||||
userPromptTemplate: string;
|
||||
}
|
||||
|
||||
// System-Prompt: Definiert die Rolle und Regeln
|
||||
export const SYSTEM_PROMPT = `Du bist ein spezialisierter Datenschutz-Assistent für PII-Erkennung (Personally Identifiable Information).
|
||||
Deine Aufgabe ist es, persönliche Daten in Texten zu identifizieren und zu anonymisieren.
|
||||
|
||||
REGELN:
|
||||
1. Identifiziere ALLE persönlichen Informationen im Text
|
||||
2. Ersetze jede PII durch einen neutralen Platzhalter im Format [TYP_NUMMER]
|
||||
3. Antworte NUR mit gültigem JSON - kein Markdown, keine Erklärungen
|
||||
4. Sei gründlich: Es ist besser zu viel zu erkennen als zu wenig
|
||||
5. Bei Namen: Unterscheide zwischen Personen- und Firmennamen
|
||||
6. Bei Adressen: Erkenne komplette Adressen (Straße + Ort)
|
||||
7. Bei Telefonnummern: Erkenne deutsche Formate (+49, 0...)
|
||||
8. Bei Datumsangaben: Nur Geburtsdaten (nicht Termine)
|
||||
|
||||
PII-TYPEN:
|
||||
- name_person: Vor- und Nachname einer Person (z.B. "Max Mustermann")
|
||||
- name_company: Firmenname (z.B. "Musterfirma GmbH")
|
||||
- address: Vollständige Adresse (z.B. "Musterstraße 1, 12345 Berlin")
|
||||
- email: E-Mail-Adresse (z.B. "max@beispiel.de")
|
||||
- phone: Telefonnummer (z.B. "+49 170 12345678")
|
||||
- birthdate: Geburtsdatum (z.B. "15.03.1985")
|
||||
- account_number: Kontonummer (z.B. "1234567890")
|
||||
- iban: IBAN (z.B. "DE89 3704 0044 0532 0130 00")
|
||||
- employee_id: Personalnummer (z.B. "EMP-12345")
|
||||
- credit_card: Kreditkartennummer (z.B. "4111 1111 1111 1111")
|
||||
|
||||
WICHTIG: Der anonymisierte Text muss natürlich lesbar bleiben, nur die sensitiven Daten werden ersetzt.`;
|
||||
|
||||
// User-Prompt Template mit Platzhalter für den zu analysierenden Text
|
||||
export const USER_PROMPT_TEMPLATE = `Analysiere den folgenden Text und identifiziere ALLE persönlichen Informationen (PII).
|
||||
|
||||
Für jede gefundene PII gib zurück:
|
||||
- type: Der PII-Typ (siehe System-Regeln)
|
||||
- original: Der originale Wert im Text
|
||||
- replacement: Ein eindeutiger Platzhalter [TYP_INDEX]
|
||||
|
||||
ANTWORTE NUR mit diesem JSON-Format:
|
||||
{
|
||||
"pii_found": [
|
||||
{
|
||||
"type": "name_person",
|
||||
"original": "Max Mustermann",
|
||||
"replacement": "[NAME_1]"
|
||||
}
|
||||
],
|
||||
"anonymized_text": "Hallo [NAME_1], ..."
|
||||
}
|
||||
|
||||
Zu analysierender Text:
|
||||
---
|
||||
{{TEXT}}
|
||||
---`;
|
||||
|
||||
// Erweiterte Version mit Beispielen für bessere Few-Shot Performance
|
||||
export const USER_PROMPT_TEMPLATE_FEW_SHOT = `Analysiere den folgenden Text und identifiziere ALLE persönlichen Informationen (PII).
|
||||
|
||||
BEISPIELE:
|
||||
|
||||
Eingabe: "Herr Klaus Müller von der Firma Schmidt & Co GmbH wohnt in der Hauptstraße 42, 10115 Berlin. Erreichbar unter klaus.mueller@email.de oder 030-12345678."
|
||||
Ausgabe:
|
||||
{
|
||||
"pii_found": [
|
||||
{"type": "name_person", "original": "Klaus Müller", "replacement": "[NAME_1]"},
|
||||
{"type": "name_company", "original": "Schmidt & Co GmbH", "replacement": "[COMPANY_1]"},
|
||||
{"type": "address", "original": "Hauptstraße 42, 10115 Berlin", "replacement": "[ADDRESS_1]"},
|
||||
{"type": "email", "original": "klaus.mueller@email.de", "replacement": "[EMAIL_1]"},
|
||||
{"type": "phone", "original": "030-12345678", "replacement": "[PHONE_1]"}
|
||||
],
|
||||
"anonymized_text": "Herr [NAME_1] von der Firma [COMPANY_1] wohnt in der [ADDRESS_1]. Erreichbar unter [EMAIL_1] oder [PHONE_1]."
|
||||
}
|
||||
|
||||
Eingabe: "Überweisung an Max Mustermann, IBAN: DE89 3704 0044 0532 0130 00, Geburtsdatum: 15.03.1985"
|
||||
Ausgabe:
|
||||
{
|
||||
"pii_found": [
|
||||
{"type": "name_person", "original": "Max Mustermann", "replacement": "[NAME_1]"},
|
||||
{"type": "iban", "original": "DE89 3704 0044 0532 0130 00", "replacement": "[IBAN_1]"},
|
||||
{"type": "birthdate", "original": "15.03.1985", "replacement": "[BIRTHDATE_1]"}
|
||||
],
|
||||
"anonymized_text": "Überweisung an [NAME_1], IBAN: [IBAN_1], Geburtsdatum: [BIRTHDATE_1]"
|
||||
}
|
||||
|
||||
ANWENDUNG:
|
||||
|
||||
Eingabe: {{TEXT}}
|
||||
Ausgabe:`;
|
||||
|
||||
// Konfiguration für verschiedene Modelle
|
||||
export const MODEL_CONFIGS: Record<string, PromptConfig> = {
|
||||
'llama3.2': {
|
||||
model: 'llama3.2',
|
||||
temperature: 0.1,
|
||||
maxTokens: 4096,
|
||||
systemPrompt: SYSTEM_PROMPT,
|
||||
userPromptTemplate: USER_PROMPT_TEMPLATE,
|
||||
},
|
||||
'mistral': {
|
||||
model: 'mistral',
|
||||
temperature: 0.1,
|
||||
maxTokens: 4096,
|
||||
systemPrompt: SYSTEM_PROMPT,
|
||||
userPromptTemplate: USER_PROMPT_TEMPLATE,
|
||||
},
|
||||
'qwen2.5': {
|
||||
model: 'qwen2.5',
|
||||
temperature: 0.1,
|
||||
maxTokens: 4096,
|
||||
systemPrompt: SYSTEM_PROMPT,
|
||||
userPromptTemplate: USER_PROMPT_TEMPLATE_FEW_SHOT,
|
||||
},
|
||||
'gemma2': {
|
||||
model: 'gemma2',
|
||||
temperature: 0.1,
|
||||
maxTokens: 4096,
|
||||
systemPrompt: SYSTEM_PROMPT,
|
||||
userPromptTemplate: USER_PROMPT_TEMPLATE,
|
||||
},
|
||||
'phi4': {
|
||||
model: 'phi4',
|
||||
temperature: 0.1,
|
||||
maxTokens: 4096,
|
||||
systemPrompt: SYSTEM_PROMPT,
|
||||
userPromptTemplate: USER_PROMPT_TEMPLATE_FEW_SHOT,
|
||||
},
|
||||
'default': {
|
||||
model: 'llama3.2',
|
||||
temperature: 0.1,
|
||||
maxTokens: 4096,
|
||||
systemPrompt: SYSTEM_PROMPT,
|
||||
userPromptTemplate: USER_PROMPT_TEMPLATE,
|
||||
},
|
||||
};
|
||||
|
||||
// Hilfsfunktion zum Generieren des Prompts
|
||||
export function generatePrompt(
|
||||
text: string,
|
||||
modelName: string = 'default'
|
||||
): { system: string; user: string; config: PromptConfig } {
|
||||
const config = MODEL_CONFIGS[modelName] || MODEL_CONFIGS.default;
|
||||
const userPrompt = config.userPromptTemplate.replace('{{TEXT}}', text);
|
||||
|
||||
return {
|
||||
system: config.systemPrompt,
|
||||
user: userPrompt,
|
||||
config,
|
||||
};
|
||||
}
|
||||
|
||||
// Optimierter Prompt für sehr kurze Texte (schnellere Verarbeitung)
|
||||
export const SHORT_TEXT_SYSTEM_PROMPT = `Du bist ein PII-Erkennungs-Assistent. Erkenne alle persönlichen Daten.
|
||||
Antworte nur mit JSON: {"pii_found":[],"anonymized_text":""}
|
||||
Typen: name_person,name_company,address,email,phone,birthdate,account_number,iban,employee_id,credit_card`;
|
||||
|
||||
export function generateShortPrompt(text: string): { system: string; user: string } {
|
||||
return {
|
||||
system: SHORT_TEXT_SYSTEM_PROMPT,
|
||||
user: `Text: "${text}"\nJSON:`,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,261 @@
|
||||
/**
|
||||
* Re-Identifizierung (De-Anonymisierung)
|
||||
* Stellt anonymisierte Texte wieder her
|
||||
*/
|
||||
|
||||
import { PiiInstance, ReverseMapping, PII_TYPE_METADATA, PiiType } from './pii-types.js';
|
||||
import { extractTypeFromPlaceholder, extractIndexFromPlaceholder } from './utils/text.js';
|
||||
|
||||
export interface ReidentifyResult {
|
||||
success: boolean;
|
||||
reidentifiedText: string;
|
||||
replacementsMade: number;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
export interface ReidentifyOptions {
|
||||
strictMode: boolean; // Bei true: Fehler wenn Platzhalter nicht gefunden
|
||||
partialMatch: boolean; // Bei true: Auch teilweise Übereinstimmungen
|
||||
caseSensitive: boolean; // Bei false: Groß-/Kleinschreibung ignorieren
|
||||
}
|
||||
|
||||
export const DEFAULT_REIDENTIFY_OPTIONS: ReidentifyOptions = {
|
||||
strictMode: false,
|
||||
partialMatch: false,
|
||||
caseSensitive: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Re-Identifiziert einen anonymisierten Text
|
||||
* Ersetzt Platzhalter durch Originalwerte
|
||||
*/
|
||||
export function reidentify(
|
||||
anonymizedText: string,
|
||||
mapping: ReverseMapping,
|
||||
options: Partial<ReidentifyOptions> = {}
|
||||
): ReidentifyResult {
|
||||
const opts = { ...DEFAULT_REIDENTIFY_OPTIONS, ...options };
|
||||
const errors: string[] = [];
|
||||
let reidentifiedText = anonymizedText;
|
||||
let replacementsMade = 0;
|
||||
|
||||
// Sortiere Platzhalter nach Länge (absteigend), damit längere zuerst ersetzt werden
|
||||
const placeholders = Object.keys(mapping).sort((a, b) => b.length - a.length);
|
||||
|
||||
for (const placeholder of placeholders) {
|
||||
const pii = mapping[placeholder];
|
||||
|
||||
if (!pii || !pii.original) {
|
||||
errors.push(`Missing original value for placeholder: ${placeholder}`);
|
||||
if (opts.strictMode) {
|
||||
return {
|
||||
success: false,
|
||||
reidentifiedText: anonymizedText,
|
||||
replacementsMade,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ersetze Platzhalter durch Original
|
||||
const escapedPlaceholder = placeholder.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
const regex = new RegExp(
|
||||
escapedPlaceholder,
|
||||
opts.caseSensitive ? 'g' : 'gi'
|
||||
);
|
||||
|
||||
const matches = reidentifiedText.match(regex);
|
||||
if (matches) {
|
||||
reidentifiedText = reidentifiedText.replace(regex, pii.original);
|
||||
replacementsMade += matches.length;
|
||||
} else if (opts.strictMode) {
|
||||
errors.push(`Placeholder not found in text: ${placeholder}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: errors.length === 0 || !opts.strictMode,
|
||||
reidentifiedText,
|
||||
replacementsMade,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Inkrementelle Re-Identifizierung
|
||||
* Fügt neue Ersetzungen zu bestehendem Text hinzu
|
||||
*/
|
||||
export function reidentifyIncremental(
|
||||
currentText: string,
|
||||
additionalMapping: ReverseMapping,
|
||||
options?: Partial<ReidentifyOptions>
|
||||
): ReidentifyResult {
|
||||
return reidentify(currentText, additionalMapping, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt eine serialisierbare Version des Mappings
|
||||
* Für Speicherung oder Übertragung
|
||||
*/
|
||||
export function serializeMapping(mapping: ReverseMapping): string {
|
||||
// Entferne nicht-serialisierbare Felder
|
||||
const serializable: Record<string, { type: string; original: string; replacement: string }> = {};
|
||||
|
||||
for (const [placeholder, pii] of Object.entries(mapping)) {
|
||||
serializable[placeholder] = {
|
||||
type: pii.type,
|
||||
original: pii.original,
|
||||
replacement: pii.replacement,
|
||||
};
|
||||
}
|
||||
|
||||
return JSON.stringify(serializable, null, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deserialisiert ein Mapping aus einem String
|
||||
*/
|
||||
export function deserializeMapping(serialized: string): ReverseMapping {
|
||||
const parsed = JSON.parse(serialized) as Record<string, { type: string; original: string; replacement: string }>;
|
||||
const mapping: ReverseMapping = {};
|
||||
|
||||
for (const [placeholder, data] of Object.entries(parsed)) {
|
||||
mapping[placeholder] = {
|
||||
type: data.type as PiiType,
|
||||
original: data.original,
|
||||
replacement: data.replacement,
|
||||
};
|
||||
}
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validiert ein Mapping
|
||||
* Prüft auf Konsistenz und Vollständigkeit
|
||||
*/
|
||||
export function validateMapping(mapping: ReverseMapping): { valid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const [placeholder, pii] of Object.entries(mapping)) {
|
||||
// Prüfe ob Platzhalter-Format korrekt
|
||||
if (!placeholder.match(/^\[[A-Z_]+_\d+\]$/)) {
|
||||
errors.push(`Invalid placeholder format: ${placeholder}`);
|
||||
}
|
||||
|
||||
// Prüfe ob PII-Typ bekannt
|
||||
if (!PII_TYPE_METADATA[pii.type]) {
|
||||
errors.push(`Unknown PII type "${pii.type}" for placeholder ${placeholder}`);
|
||||
}
|
||||
|
||||
// Prüfe ob Original-Wert vorhanden
|
||||
if (!pii.original || pii.original.trim() === '') {
|
||||
errors.push(`Missing or empty original value for placeholder ${placeholder}`);
|
||||
}
|
||||
|
||||
// Prüfe ob Platzhalter mit PII.replacement übereinstimmt
|
||||
if (pii.replacement !== placeholder) {
|
||||
errors.push(`Mismatch: placeholder "${placeholder}" vs replacement "${pii.replacement}"`);
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: errors.length === 0, errors };
|
||||
}
|
||||
|
||||
/**
|
||||
* Mergt mehrere Mappings zu einem
|
||||
* Bei Konflikten: spätere Mappings überschreiben frühere
|
||||
*/
|
||||
export function mergeMappings(...mappings: ReverseMapping[]): ReverseMapping {
|
||||
const merged: ReverseMapping = {};
|
||||
|
||||
for (const mapping of mappings) {
|
||||
for (const [placeholder, pii] of Object.entries(mapping)) {
|
||||
merged[placeholder] = pii;
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert Platzhalter aus einem anonymisierten Text
|
||||
*/
|
||||
export function extractPlaceholders(text: string): string[] {
|
||||
const matches = text.match(/\[[A-Z_]+_\d+\]/g);
|
||||
return matches ? [...new Set(matches)] : [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt ein partielles Mapping basierend auf gefundenen Platzhaltern
|
||||
* Nützlich wenn man nur bestimmte PII-Typen re-identifizieren möchte
|
||||
*/
|
||||
export function filterMappingByTypes(
|
||||
mapping: ReverseMapping,
|
||||
types: PiiType[]
|
||||
): ReverseMapping {
|
||||
const filtered: ReverseMapping = {};
|
||||
|
||||
for (const [placeholder, pii] of Object.entries(mapping)) {
|
||||
if (types.includes(pii.type)) {
|
||||
filtered[placeholder] = pii;
|
||||
}
|
||||
}
|
||||
|
||||
return filtered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt ein partielles Mapping basierend auf Placeholder-Präfix
|
||||
*/
|
||||
export function filterMappingByPlaceholder(
|
||||
mapping: ReverseMapping,
|
||||
prefix: string
|
||||
): ReverseMapping {
|
||||
const filtered: ReverseMapping = {};
|
||||
|
||||
for (const [placeholder, pii] of Object.entries(mapping)) {
|
||||
if (placeholder.startsWith(`[${prefix}_`)) {
|
||||
filtered[placeholder] = pii;
|
||||
}
|
||||
}
|
||||
|
||||
return filtered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Berechnet Statistiken über ein Mapping
|
||||
*/
|
||||
export function getMappingStats(mapping: ReverseMapping): {
|
||||
totalPlaceholders: number;
|
||||
byType: Record<string, number>;
|
||||
sensitiveLevel: 'low' | 'medium' | 'high' | 'critical';
|
||||
} {
|
||||
const byType: Record<string, number> = {};
|
||||
let criticalCount = 0;
|
||||
let highCount = 0;
|
||||
|
||||
for (const pii of Object.values(mapping)) {
|
||||
byType[pii.type] = (byType[pii.type] || 0) + 1;
|
||||
|
||||
if (pii.type === 'credit_card' || pii.type === 'iban') {
|
||||
criticalCount++;
|
||||
} else if (['name_person', 'address', 'birthdate', 'email'].includes(pii.type)) {
|
||||
highCount++;
|
||||
}
|
||||
}
|
||||
|
||||
const totalPlaceholders = Object.keys(mapping).length;
|
||||
|
||||
let sensitiveLevel: 'low' | 'medium' | 'high' | 'critical' = 'low';
|
||||
if (criticalCount > 0) sensitiveLevel = 'critical';
|
||||
else if (highCount >= 3 || totalPlaceholders >= 5) sensitiveLevel = 'high';
|
||||
else if (highCount > 0 || totalPlaceholders >= 2) sensitiveLevel = 'medium';
|
||||
|
||||
return {
|
||||
totalPlaceholders,
|
||||
byType,
|
||||
sensitiveLevel,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,370 @@
|
||||
/**
|
||||
* Testfälle für das PII-Anonymisierungs-System
|
||||
* Umfassende Testabdeckung für verschiedene Szenarien
|
||||
*/
|
||||
|
||||
import { PiiDetectionResult, ReverseMapping } from '../pii-types.js';
|
||||
|
||||
export interface TestCase {
|
||||
name: string;
|
||||
description: string;
|
||||
input: string;
|
||||
expectedPii: Array<{
|
||||
type: string;
|
||||
original: string;
|
||||
}>;
|
||||
shouldAnonymize: boolean;
|
||||
category: 'basic' | 'advanced' | 'edge' | 'realworld';
|
||||
}
|
||||
|
||||
export interface TestResult {
|
||||
testCase: TestCase;
|
||||
success: boolean;
|
||||
anonymizedText?: string;
|
||||
detectedPii?: PiiDetectionResult['piiFound'];
|
||||
reidentifiedText?: string;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
// Basis-Testfälle
|
||||
export const BASIC_TEST_CASES: TestCase[] = [
|
||||
{
|
||||
name: 'einfache_email',
|
||||
description: 'Einfache E-Mail-Adresse',
|
||||
input: 'Kontaktieren Sie mich unter max.mustermann@beispiel.de',
|
||||
expectedPii: [
|
||||
{ type: 'email', original: 'max.mustermann@beispiel.de' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'basic',
|
||||
},
|
||||
{
|
||||
name: 'deutsche_telefonnummer',
|
||||
description: 'Deutsche Telefonnummer mit Vorwahl',
|
||||
input: 'Rufen Sie mich an: 030-12345678 oder +49 170 12345678',
|
||||
expectedPii: [
|
||||
{ type: 'phone', original: '030-12345678' },
|
||||
{ type: 'phone', original: '+49 170 12345678' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'basic',
|
||||
},
|
||||
{
|
||||
name: 'personenname',
|
||||
description: 'Vollständiger Personenname',
|
||||
input: 'Herr Dr. Klaus Müller wird Sie betreuen.',
|
||||
expectedPii: [
|
||||
{ type: 'name_person', original: 'Klaus Müller' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'basic',
|
||||
},
|
||||
{
|
||||
name: 'vollstaendige_adresse',
|
||||
description: 'Vollständige deutsche Adresse',
|
||||
input: 'Meine Adresse lautet: Musterstraße 42, 10115 Berlin',
|
||||
expectedPii: [
|
||||
{ type: 'address', original: 'Musterstraße 42, 10115 Berlin' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'basic',
|
||||
},
|
||||
{
|
||||
name: 'iban',
|
||||
description: 'Deutsche IBAN',
|
||||
input: 'Bitte überweisen Sie auf DE89 3704 0044 0532 0130 00',
|
||||
expectedPii: [
|
||||
{ type: 'iban', original: 'DE89 3704 0044 0532 0130 00' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'basic',
|
||||
},
|
||||
{
|
||||
name: 'geburtsdatum',
|
||||
description: 'Geburtsdatum verschiedener Formate',
|
||||
input: 'Geboren am 15.03.1985, zuvor am 15/03/1985',
|
||||
expectedPii: [
|
||||
{ type: 'birthdate', original: '15.03.1985' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'basic',
|
||||
},
|
||||
];
|
||||
|
||||
// Erweiterte Testfälle
|
||||
export const ADVANCED_TEST_CASES: TestCase[] = [
|
||||
{
|
||||
name: 'kombination_mehrere_pii',
|
||||
description: 'Mehrere PII-Typen in einem Text',
|
||||
input: 'Hallo, ich bin Anna Schmidt von der Firma Tech Solutions GmbH. \
|
||||
Sie erreichen mich unter anna.schmidt@tech-solutions.de oder +49 89 12345678. \
|
||||
Meine Adresse: Hauptstraße 1, 80331 München. IBAN: DE12 3456 7890 1234 5678 90',
|
||||
expectedPii: [
|
||||
{ type: 'name_person', original: 'Anna Schmidt' },
|
||||
{ type: 'name_company', original: 'Tech Solutions GmbH' },
|
||||
{ type: 'email', original: 'anna.schmidt@tech-solutions.de' },
|
||||
{ type: 'phone', original: '+49 89 12345678' },
|
||||
{ type: 'address', original: 'Hauptstraße 1, 80331 München' },
|
||||
{ type: 'iban', original: 'DE12 3456 7890 1234 5678 90' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'advanced',
|
||||
},
|
||||
{
|
||||
name: 'kreditkarte',
|
||||
description: 'Kreditkartennummer',
|
||||
input: 'Meine Karte: 4111 1111 1111 1111 läuft ab 12/25',
|
||||
expectedPii: [
|
||||
{ type: 'credit_card', original: '4111 1111 1111 1111' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'advanced',
|
||||
},
|
||||
{
|
||||
name: 'personalnummer',
|
||||
description: 'Personalnummer verschiedener Formate',
|
||||
input: 'Meine Personalnummer ist EMP-12345 oder MA-98765',
|
||||
expectedPii: [
|
||||
{ type: 'employee_id', original: 'EMP-12345' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'advanced',
|
||||
},
|
||||
{
|
||||
name: 'kontonummer',
|
||||
description: 'Alte Kontonummer (nicht IBAN)',
|
||||
input: 'Bitte überweisen Sie auf Konto 1234567890, BLZ 70090100',
|
||||
expectedPii: [
|
||||
{ type: 'account_number', original: '1234567890' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'advanced',
|
||||
},
|
||||
];
|
||||
|
||||
// Edge Cases
|
||||
export const EDGE_TEST_CASES: TestCase[] = [
|
||||
{
|
||||
name: 'keine_pii',
|
||||
description: 'Text ohne erkennbare PII',
|
||||
input: 'Das Wetter ist heute wunderschön und ich plane einen Spaziergang.',
|
||||
expectedPii: [],
|
||||
shouldAnonymize: false,
|
||||
category: 'edge',
|
||||
},
|
||||
{
|
||||
name: 'leerer_text',
|
||||
description: 'Leerer Eingabetext',
|
||||
input: '',
|
||||
expectedPii: [],
|
||||
shouldAnonymize: false,
|
||||
category: 'edge',
|
||||
},
|
||||
{
|
||||
name: 'nur_whitespace',
|
||||
description: 'Nur Leerzeichen',
|
||||
input: ' \n\t ',
|
||||
expectedPii: [],
|
||||
shouldAnonymize: false,
|
||||
category: 'edge',
|
||||
},
|
||||
{
|
||||
name: 'sehr_langer_text',
|
||||
description: 'Sehr langer Text mit verstreuten PII',
|
||||
input: 'Lorem ipsum '.repeat(100) + ' max.mustermann@beispiel.de ' + 'dolor sit '.repeat(100),
|
||||
expectedPii: [
|
||||
{ type: 'email', original: 'max.mustermann@beispiel.de' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'edge',
|
||||
},
|
||||
{
|
||||
name: 'gleiche_pii_mehrfach',
|
||||
description: 'Selbe E-Mail mehrfach im Text',
|
||||
input: 'Schreiben Sie an kontakt@firma.de oder kontakt@firma.de',
|
||||
expectedPii: [
|
||||
{ type: 'email', original: 'kontakt@firma.de' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'edge',
|
||||
},
|
||||
{
|
||||
name: 'aehnliche_worte',
|
||||
description: 'Wörter die wie PII aussehen aber keine sind',
|
||||
input: 'Das Passwort lautet test@123 und die Adresse ist localhost',
|
||||
expectedPii: [],
|
||||
shouldAnonymize: false,
|
||||
category: 'edge',
|
||||
},
|
||||
];
|
||||
|
||||
// Real-World Szenarien
|
||||
export const REALWORLD_TEST_CASES: TestCase[] = [
|
||||
{
|
||||
name: 'support_ticket',
|
||||
description: 'Typischer Support-Chat mit Kundendaten',
|
||||
input: `Kunde: Hallo, ich habe ein Problem mit meinem Konto.
|
||||
Support: Guten Tag, wie kann ich helfen?
|
||||
Kunde: Mein Name ist Sabine Weber, ich wohne in Berlin und meine Mail ist s.weber@web.de
|
||||
Support: Danke Frau Weber, ich schaue mir das an.`,
|
||||
expectedPii: [
|
||||
{ type: 'name_person', original: 'Sabine Weber' },
|
||||
{ type: 'email', original: 's.weber@web.de' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'realworld',
|
||||
},
|
||||
{
|
||||
name: 'rechnung_kontext',
|
||||
description: 'Rechnungsinformationen',
|
||||
input: `Rechnung Nr. 2024-001 an
|
||||
Herrn Peter Schmidt
|
||||
Musterweg 15
|
||||
20095 Hamburg
|
||||
|
||||
Betrag: 1.234,56 EUR
|
||||
IBAN: DE44 2001 0020 0123 4567 89`,
|
||||
expectedPii: [
|
||||
{ type: 'name_person', original: 'Peter Schmidt' },
|
||||
{ type: 'address', original: 'Musterweg 15, 20095 Hamburg' },
|
||||
{ type: 'iban', original: 'DE44 2001 0020 0123 4567 89' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'realworld',
|
||||
},
|
||||
{
|
||||
name: 'terminplanung',
|
||||
description: 'Terminplanung mit Kontaktdaten',
|
||||
input: `Hallo Frau Dr. Angela Müller,
|
||||
|
||||
wir haben Ihren Termin am 15.03.2024 um 14:30 Uhr bestätigt.
|
||||
|
||||
Bei Fragen erreichen Sie uns unter:
|
||||
- E-Mail: praxis@mueller-med.de
|
||||
- Telefon: 089 / 12345678
|
||||
|
||||
Mit freundlichen Grüßen
|
||||
Praxis Dr. Müller`,
|
||||
expectedPii: [
|
||||
{ type: 'name_person', original: 'Angela Müller' },
|
||||
{ type: 'email', original: 'praxis@mueller-med.de' },
|
||||
{ type: 'phone', original: '089 / 12345678' },
|
||||
],
|
||||
shouldAnonymize: true,
|
||||
category: 'realworld',
|
||||
},
|
||||
];
|
||||
|
||||
// Alle Testfälle zusammen
|
||||
export const ALL_TEST_CASES: TestCase[] = [
|
||||
...BASIC_TEST_CASES,
|
||||
...ADVANCED_TEST_CASES,
|
||||
...EDGE_TEST_CASES,
|
||||
...REALWORLD_TEST_CASES,
|
||||
];
|
||||
|
||||
/**
|
||||
* Validiert ein Anonymisierungsergebnis gegen erwartete Werte
|
||||
*/
|
||||
export function validateResult(
|
||||
result: PiiDetectionResult,
|
||||
testCase: TestCase
|
||||
): { valid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
// Prüfe ob anonymisiert wurde wenn erwartet
|
||||
if (testCase.shouldAnonymize && result.anonymizedText === testCase.input) {
|
||||
errors.push('Text wurde nicht anonymisiert');
|
||||
}
|
||||
|
||||
// Prüfe ob alle erwarteten PII gefunden wurden
|
||||
for (const expected of testCase.expectedPii) {
|
||||
const found = result.piiFound.some(
|
||||
p => p.type === expected.type && p.original === expected.original
|
||||
);
|
||||
if (!found) {
|
||||
errors.push(`Erwartete PII nicht gefunden: ${expected.type} = "${expected.original}"`);
|
||||
}
|
||||
}
|
||||
|
||||
// Prüfe ob zu viele PII gefunden wurden (False Positives)
|
||||
if (result.piiFound.length > testCase.expectedPii.length) {
|
||||
const extra = result.piiFound.filter(
|
||||
p => !testCase.expectedPii.some(e => e.original === p.original)
|
||||
);
|
||||
errors.push(`Unerwartete PII gefunden: ${extra.map(p => `${p.type}="${p.original}"`).join(', ')}`);
|
||||
}
|
||||
|
||||
// Prüfe Re-Identifizierung
|
||||
const reidentified = result.anonymizedText;
|
||||
for (const pii of result.piiFound) {
|
||||
if (reidentified.includes(pii.original)) {
|
||||
errors.push(`Originalwert "${pii.original}" noch im anonymisierten Text enthalten`);
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: errors.length === 0, errors };
|
||||
}
|
||||
|
||||
/**
|
||||
* Führt einen Testfall aus
|
||||
*/
|
||||
export async function runTestCase(
|
||||
testCase: TestCase,
|
||||
anonymizeFn: (text: string) => Promise<PiiDetectionResult>,
|
||||
reidentifyFn: (text: string, mapping: ReverseMapping) => string
|
||||
): Promise<TestResult> {
|
||||
const errors: string[] = [];
|
||||
|
||||
try {
|
||||
// Anonymisiere
|
||||
const result = await anonymizeFn(testCase.input);
|
||||
|
||||
// Validiere
|
||||
const validation = validateResult(result, testCase);
|
||||
errors.push(...validation.errors);
|
||||
|
||||
// Re-Identifiziere
|
||||
const mapping: ReverseMapping = {};
|
||||
for (const pii of result.piiFound) {
|
||||
mapping[pii.replacement] = pii;
|
||||
}
|
||||
const reidentifiedText = reidentifyFn(result.anonymizedText, mapping);
|
||||
|
||||
// Prüfe ob Re-Identifizierung erfolgreich
|
||||
if (reidentifiedText !== testCase.input) {
|
||||
errors.push('Re-Identifizierung ergab nicht den Originaltext');
|
||||
}
|
||||
|
||||
return {
|
||||
testCase,
|
||||
success: errors.length === 0,
|
||||
anonymizedText: result.anonymizedText,
|
||||
detectedPii: result.piiFound,
|
||||
reidentifiedText,
|
||||
errors,
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
errors.push(`Exception: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return {
|
||||
testCase,
|
||||
success: false,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Beispiel-Mapping für manuelle Tests
|
||||
export const SAMPLE_MAPPING: ReverseMapping = {
|
||||
'[NAME_1]': { type: 'name_person', original: 'Max Mustermann', replacement: '[NAME_1]' },
|
||||
'[EMAIL_1]': { type: 'email', original: 'max@beispiel.de', replacement: '[EMAIL_1]' },
|
||||
'[PHONE_1]': { type: 'phone', original: '030-12345678', replacement: '[PHONE_1]' },
|
||||
'[ADDRESS_1]': { type: 'address', original: 'Musterstraße 1, 12345 Berlin', replacement: '[ADDRESS_1]' },
|
||||
'[IBAN_1]': { type: 'iban', original: 'DE89 3704 0044 0532 0130 00', replacement: '[IBAN_1]' },
|
||||
'[COMPANY_1]': { type: 'name_company', original: 'Musterfirma GmbH', replacement: '[COMPANY_1]' },
|
||||
};
|
||||
|
||||
export const SAMPLE_ANONYMIZED_TEXT =
|
||||
'Hallo [NAME_1], kontaktiere mich unter [EMAIL_1] oder [PHONE_1]. \
|
||||
Ich wohne in [ADDRESS_1]. Überweise an [IBAN_1]. Arbeite bei [COMPANY_1].';
|
||||
@@ -0,0 +1,158 @@
|
||||
/**
|
||||
* Hilfsfunktionen für Textverarbeitung und PII-Handling
|
||||
*/
|
||||
|
||||
import { PiiInstance, PiiType, PII_TYPE_METADATA } from '../pii-types.js';
|
||||
|
||||
/**
|
||||
* Generiert einen eindeutigen Platzhalter für einen PII-Typ
|
||||
*/
|
||||
export function generatePlaceholder(type: PiiType, index: number): string {
|
||||
const metadata = PII_TYPE_METADATA[type];
|
||||
return `[${metadata.prefix}_${index}]`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert den Typ aus einem Platzhalter
|
||||
* z.B. "[NAME_1]" -> "name_person"
|
||||
*/
|
||||
export function extractTypeFromPlaceholder(placeholder: string): PiiType | null {
|
||||
const match = placeholder.match(/\[([A-Z_]+)_\d+\]/);
|
||||
if (!match) return null;
|
||||
|
||||
const prefix = match[1];
|
||||
|
||||
// Reverse lookup vom Prefix zum Typ
|
||||
for (const [type, metadata] of Object.entries(PII_TYPE_METADATA)) {
|
||||
if (metadata.prefix === prefix) {
|
||||
return type as PiiType;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extrahiert den Index aus einem Platzhalter
|
||||
* z.B. "[NAME_1]" -> 1
|
||||
*/
|
||||
export function extractIndexFromPlaceholder(placeholder: string): number {
|
||||
const match = placeholder.match(/\[[A-Z_]+_(\d+)\]/);
|
||||
return match ? parseInt(match[1], 10) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ersetzt alle Vorkommen eines Strings in einem Text
|
||||
* Beachtet: Längere Strings zuerst ersetzen, um Teil-Überschneidungen zu vermeiden
|
||||
*/
|
||||
export function replaceInText(
|
||||
text: string,
|
||||
replacements: Array<{ original: string; replacement: string }>
|
||||
): string {
|
||||
// Sortiere nach Länge (absteigend), damit längere Strings zuerst ersetzt werden
|
||||
const sorted = [...replacements].sort((a, b) => b.original.length - a.original.length);
|
||||
|
||||
let result = text;
|
||||
for (const { original, replacement } of sorted) {
|
||||
// Escape spezielle Regex-Zeichen im Original
|
||||
const escaped = original.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
// Globale Ersetzung (case-sensitive für Präzision)
|
||||
result = result.replace(new RegExp(escaped, 'g'), replacement);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bereinigt JSON-String von Markdown-Code-Blocks und Whitespace
|
||||
*/
|
||||
export function cleanJsonResponse(response: string): string {
|
||||
return response
|
||||
.replace(/```json\n?/g, '')
|
||||
.replace(/```\n?/g, '')
|
||||
.replace(/^\s+|\s+$/g, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Validiert und repariert ein PII-Ergebnis
|
||||
* Stellt sicher, dass alle Felder vorhanden sind
|
||||
*/
|
||||
export function validatePiiResult(result: unknown): result is { pii_found: unknown[]; anonymized_text: string } {
|
||||
if (typeof result !== 'object' || result === null) return false;
|
||||
const r = result as Record<string, unknown>;
|
||||
return Array.isArray(r.pii_found) && typeof r.anonymized_text === 'string';
|
||||
}
|
||||
|
||||
/**
|
||||
* Berechnet die Sensitivitätsstufe eines Textes
|
||||
* Basierend auf der Anzahl und Art der erkannten PII
|
||||
*/
|
||||
export function calculateSensitivityLevel(piiList: PiiInstance[]): 'low' | 'medium' | 'high' | 'critical' {
|
||||
if (piiList.length === 0) return 'low';
|
||||
|
||||
const criticalCount = piiList.filter(p => p.type === 'credit_card' || p.type === 'iban').length;
|
||||
const highCount = piiList.filter(p =>
|
||||
p.type === 'name_person' ||
|
||||
p.type === 'address' ||
|
||||
p.type === 'birthdate' ||
|
||||
p.type === 'email'
|
||||
).length;
|
||||
|
||||
if (criticalCount > 0) return 'critical';
|
||||
if (highCount >= 3 || piiList.length >= 5) return 'high';
|
||||
if (highCount > 0 || piiList.length >= 2) return 'medium';
|
||||
|
||||
return 'low';
|
||||
}
|
||||
|
||||
/**
|
||||
* Maskiert einen PII-Wert für Logging/Anzeige
|
||||
* z.B. "max.mustermann@email.de" -> "m***@e***.de"
|
||||
*/
|
||||
export function maskPiiForLog(value: string, type: PiiType): string {
|
||||
if (value.length <= 4) return '****';
|
||||
|
||||
if (type === 'email') {
|
||||
const [local, domain] = value.split('@');
|
||||
if (!domain) return value.substring(0, 2) + '***';
|
||||
const [domainName, tld] = domain.split('.');
|
||||
return `${local.substring(0, 1)}***@${domainName?.substring(0, 1) ?? ''}***.${tld ?? ''}`;
|
||||
}
|
||||
|
||||
if (type === 'phone' || type === 'credit_card' || type === 'iban') {
|
||||
return value.substring(0, 4) + ' **** **** ' + value.substring(value.length - 4);
|
||||
}
|
||||
|
||||
// Default: Erste 2 und letzte 2 Zeichen
|
||||
return value.substring(0, 2) + '***' + value.substring(value.length - 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Zählt die Vorkommen eines Substrings (case-sensitive)
|
||||
*/
|
||||
export function countOccurrences(text: string, search: string): number {
|
||||
if (!search) return 0;
|
||||
const matches = text.match(new RegExp(search.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'));
|
||||
return matches?.length ?? 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prüft, ob ein Text potenziell PII enthält (schneller Vorab-Check)
|
||||
*/
|
||||
export function mightContainPii(text: string): boolean {
|
||||
// Schnelle Heuristik: Enthält E-Mail, viele Zahlen oder bestimmte Schlüsselwörter?
|
||||
const hasEmail = text.includes('@');
|
||||
const hasPhone = /\b(?:\+49|0)[\d\s\-/]{7,}\b/.test(text);
|
||||
const hasDate = /\b\d{1,2}[\.\/\-]\d{1,2}[\.\/\-]\d{2,4}\b/.test(text);
|
||||
const hasIban = /\b[A-Z]{2}\d{2}\b/.test(text);
|
||||
|
||||
return hasEmail || hasPhone || hasDate || hasIban;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncates text for display purposes
|
||||
*/
|
||||
export function truncateText(text: string, maxLength: number = 100): string {
|
||||
if (text.length <= maxLength) return text;
|
||||
return text.substring(0, maxLength - 3) + '...';
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"resolveJsonModule": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"noImplicitReturns": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"isolatedModules": true,
|
||||
"verbatimModuleSyntax": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user